react-native-gemma-agent 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +457 -0
- package/package.json +52 -0
- package/skills/calculator.ts +47 -0
- package/skills/deviceLocation.ts +180 -0
- package/skills/index.ts +3 -0
- package/skills/queryWikipedia.ts +96 -0
- package/skills/readCalendar.ts +74 -0
- package/skills/webSearch.ts +75 -0
- package/src/AgentOrchestrator.ts +315 -0
- package/src/BM25Scorer.ts +118 -0
- package/src/FunctionCallParser.ts +113 -0
- package/src/GemmaAgentProvider.tsx +101 -0
- package/src/InferenceEngine.ts +301 -0
- package/src/ModelManager.ts +244 -0
- package/src/SkillRegistry.ts +60 -0
- package/src/SkillSandbox.tsx +155 -0
- package/src/index.ts +52 -0
- package/src/types.ts +197 -0
- package/src/useGemmaAgent.ts +222 -0
- package/src/useModelDownload.ts +80 -0
- package/src/useSkillRegistry.ts +58 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Shashank Gupta
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
# react-native-gemma-agent
|
|
2
|
+
|
|
3
|
+
The first React Native SDK for building **on-device AI agents** powered by Google's Gemma 4. Run a complete agent loop — inference, tool calling, and skill execution — entirely on the user's phone with zero cloud dependency.
|
|
4
|
+
|
|
5
|
+
## Why This Exists
|
|
6
|
+
|
|
7
|
+
Every major AI framework (LangChain, CrewAI, AutoGen) assumes a cloud LLM. But mobile apps need agents that work **offline**, respect **privacy**, and cost **zero per inference**. This SDK brings the agentic pattern — model thinks, picks a tool, executes it, responds — entirely on-device using Gemma 4's native function calling.
|
|
8
|
+
|
|
9
|
+
Inspired by [Google AI Edge Gallery's Agent Skills](https://github.com/google-ai-edge/gallery), rebuilt as a React Native SDK that any developer can drop into their app.
|
|
10
|
+
|
|
11
|
+
## What It Does
|
|
12
|
+
|
|
13
|
+
- Runs **Gemma 4 E2B** (2.3B effective params, 4.6B total MoE) on-device via llama.rn
|
|
14
|
+
- **Agent loop**: model generates -> detects tool calls -> executes skills -> feeds results back -> responds naturally
|
|
15
|
+
- **JS Skill Sandbox**: execute skills in a hidden WebView (same pattern as Google AI Edge Gallery)
|
|
16
|
+
- **Native Skills**: run skills directly in React Native context with full access to device APIs (GPS, calendar, health, gallery, etc.)
|
|
17
|
+
- **BM25 Skill Routing** (opt-in): smart pre-filtering when you have many skills — only sends the most relevant tools to the model per query
|
|
18
|
+
- **React Hooks API**: `useGemmaAgent()`, `useModelDownload()`, `useSkillRegistry()`
|
|
19
|
+
- **Streaming**: token-by-token output for real-time UI updates
|
|
20
|
+
- Fully typed with TypeScript
|
|
21
|
+
|
|
22
|
+
## Quick Start
|
|
23
|
+
|
|
24
|
+
```tsx
|
|
25
|
+
import {
|
|
26
|
+
GemmaAgentProvider,
|
|
27
|
+
useGemmaAgent,
|
|
28
|
+
useModelDownload,
|
|
29
|
+
} from 'react-native-gemma-agent';
|
|
30
|
+
import { calculatorSkill, queryWikipediaSkill } from 'react-native-gemma-agent/skills';
|
|
31
|
+
|
|
32
|
+
// 1. Wrap your app
|
|
33
|
+
function App() {
|
|
34
|
+
return (
|
|
35
|
+
<GemmaAgentProvider
|
|
36
|
+
model={{
|
|
37
|
+
repoId: 'unsloth/gemma-4-E2B-it-GGUF',
|
|
38
|
+
filename: 'gemma-4-E2B-it-Q4_K_M.gguf',
|
|
39
|
+
}}
|
|
40
|
+
skills={[calculatorSkill, queryWikipediaSkill]}
|
|
41
|
+
systemPrompt="You are a helpful assistant."
|
|
42
|
+
>
|
|
43
|
+
<ChatScreen />
|
|
44
|
+
</GemmaAgentProvider>
|
|
45
|
+
);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// 2. Use the hooks
|
|
49
|
+
function ChatScreen() {
|
|
50
|
+
const { sendMessage, messages, streamingText, isProcessing, activeSkill, loadModel } = useGemmaAgent();
|
|
51
|
+
const { download, progress } = useModelDownload();
|
|
52
|
+
|
|
53
|
+
// Download model (3.1 GB, one-time)
|
|
54
|
+
// await download();
|
|
55
|
+
|
|
56
|
+
// Load into memory
|
|
57
|
+
// await loadModel();
|
|
58
|
+
|
|
59
|
+
// Chat with agent
|
|
60
|
+
// const response = await sendMessage("What is 234 * 567?");
|
|
61
|
+
// Agent calls calculator skill -> returns "132678"
|
|
62
|
+
}
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Requirements
|
|
66
|
+
|
|
67
|
+
| Requirement | Minimum |
|
|
68
|
+
|---|---|
|
|
69
|
+
| React Native | 0.76+ (New Architecture required) |
|
|
70
|
+
| Android | API 26 (8.0) |
|
|
71
|
+
| Device RAM | 8 GB+ recommended |
|
|
72
|
+
| Disk Space | ~3.5 GB (for model file) |
|
|
73
|
+
| llama.rn | 0.12.0-rc.3+ |
|
|
74
|
+
|
|
75
|
+
## Installation
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
npm install react-native-gemma-agent
|
|
79
|
+
|
|
80
|
+
# Peer dependencies
|
|
81
|
+
npm install llama.rn react-native-fs react-native-webview
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
**Android Setup:**
|
|
85
|
+
|
|
86
|
+
Add to `android/app/src/main/AndroidManifest.xml`:
|
|
87
|
+
```xml
|
|
88
|
+
<application android:largeHeap="true" ...>
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## API Reference
|
|
92
|
+
|
|
93
|
+
### GemmaAgentProvider
|
|
94
|
+
|
|
95
|
+
Wrap your app to initialize the SDK. Creates all internal instances and renders the hidden WebView sandbox for JS skill execution.
|
|
96
|
+
|
|
97
|
+
```tsx
|
|
98
|
+
<GemmaAgentProvider
|
|
99
|
+
model={{ repoId: string, filename: string, expectedSize?: number }}
|
|
100
|
+
skills={SkillManifest[]} // Skills to register on mount
|
|
101
|
+
systemPrompt={string} // Base system prompt
|
|
102
|
+
engineConfig={InferenceEngineConfig} // Optional engine tuning
|
|
103
|
+
agentConfig={AgentConfig} // Optional agent config
|
|
104
|
+
>
|
|
105
|
+
{children}
|
|
106
|
+
</GemmaAgentProvider>
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### useGemmaAgent()
|
|
110
|
+
|
|
111
|
+
Main hook for chat interactions. Returns everything you need to build a chat UI.
|
|
112
|
+
|
|
113
|
+
```tsx
|
|
114
|
+
const {
|
|
115
|
+
sendMessage, // (text: string, onEvent?) => Promise<string>
|
|
116
|
+
messages, // ReadonlyArray<Message> - conversation history
|
|
117
|
+
streamingText, // string - tokens streamed so far (live typing effect)
|
|
118
|
+
isProcessing, // boolean - is the agent thinking/executing?
|
|
119
|
+
isModelLoaded, // boolean - model loaded and ready?
|
|
120
|
+
modelStatus, // ModelStatus - lifecycle state
|
|
121
|
+
activeSkill, // string | null - skill currently executing
|
|
122
|
+
error, // string | null - last error
|
|
123
|
+
contextUsage, // { used, total, percent } - context window consumption
|
|
124
|
+
loadModel, // (onProgress?) => Promise<number> - returns load time ms
|
|
125
|
+
unloadModel, // () => Promise<void>
|
|
126
|
+
reset, // () => void - clear conversation history
|
|
127
|
+
} = useGemmaAgent();
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
**How it works under the hood:**
|
|
131
|
+
1. You call `sendMessage("What is 234 * 567?")`
|
|
132
|
+
2. SDK sends the message to Gemma 4 running on-device
|
|
133
|
+
3. Model decides to call the `calculator` skill with `{expression: "234 * 567"}`
|
|
134
|
+
4. SDK executes the skill, gets the result `"132678"`
|
|
135
|
+
5. SDK feeds the result back to the model
|
|
136
|
+
6. Model generates a natural response: "234 * 567 equals 132,678"
|
|
137
|
+
7. `messages` updates with the full conversation, `sendMessage` returns the response text
|
|
138
|
+
|
|
139
|
+
During this process, `streamingText` updates token-by-token, `activeSkill` shows which skill is running, and `isProcessing` stays true until done.
|
|
140
|
+
|
|
141
|
+
### useModelDownload()
|
|
142
|
+
|
|
143
|
+
Hook for model download management. The model is ~3.1 GB and needs to be downloaded once.
|
|
144
|
+
|
|
145
|
+
```tsx
|
|
146
|
+
const {
|
|
147
|
+
download, // () => Promise<string> - returns file path
|
|
148
|
+
cancelDownload, // () => void
|
|
149
|
+
checkModel, // () => Promise<boolean> - is model on device?
|
|
150
|
+
setModelPath, // (path: string) => Promise<void> - custom path
|
|
151
|
+
deleteModel, // () => Promise<void>
|
|
152
|
+
progress, // DownloadProgress | null - { bytesDownloaded, totalBytes, percent }
|
|
153
|
+
status, // ModelStatus
|
|
154
|
+
checkStorage, // () => Promise<{ available, required, sufficient }>
|
|
155
|
+
} = useModelDownload();
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Downloads support **resume** — if the app is killed mid-download, calling `download()` again continues from where it left off.
|
|
159
|
+
|
|
160
|
+
### useSkillRegistry()
|
|
161
|
+
|
|
162
|
+
Hook for dynamic skill management at runtime.
|
|
163
|
+
|
|
164
|
+
```tsx
|
|
165
|
+
const {
|
|
166
|
+
registerSkill, // (skill: SkillManifest) => void
|
|
167
|
+
unregisterSkill, // (name: string) => void
|
|
168
|
+
skills, // SkillManifest[] - currently registered skills
|
|
169
|
+
hasSkill, // (name: string) => boolean
|
|
170
|
+
clear, // () => void - remove all skills
|
|
171
|
+
} = useSkillRegistry();
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Creating Custom Skills
|
|
175
|
+
|
|
176
|
+
The SDK supports two skill types: **native** (runs in React Native context) and **js** (runs in a sandboxed WebView).
|
|
177
|
+
|
|
178
|
+
### Native Skill (runs in RN context, fully offline)
|
|
179
|
+
|
|
180
|
+
Native skills have **full access to everything React Native can access** — GPS, camera, calendar, health data, file system, Bluetooth, etc. Use these when your skill needs device APIs.
|
|
181
|
+
|
|
182
|
+
```typescript
|
|
183
|
+
import type { SkillManifest } from 'react-native-gemma-agent';
|
|
184
|
+
|
|
185
|
+
const locationSkill: SkillManifest = {
|
|
186
|
+
name: 'get_current_location',
|
|
187
|
+
description: 'Get the user GPS coordinates and city name',
|
|
188
|
+
version: '1.0.0',
|
|
189
|
+
type: 'native',
|
|
190
|
+
requiresNetwork: false,
|
|
191
|
+
parameters: {
|
|
192
|
+
accuracy: { type: 'string', description: 'high or low accuracy', enum: ['high', 'low'] },
|
|
193
|
+
},
|
|
194
|
+
execute: async (params) => {
|
|
195
|
+
// Use any React Native library here
|
|
196
|
+
// e.g., react-native-geolocation, expo-location, etc.
|
|
197
|
+
const pos = await getCurrentPosition(params.accuracy);
|
|
198
|
+
return { result: JSON.stringify({ lat: pos.lat, lng: pos.lng, city: pos.city }) };
|
|
199
|
+
},
|
|
200
|
+
};
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
**Use cases for native skills:**
|
|
204
|
+
- **Travel app**: GPS location -> find nearby attractions
|
|
205
|
+
- **Fitness app**: HealthKit/Google Fit data -> AI coaching
|
|
206
|
+
- **Calendar app**: Calendar events -> AI scheduling
|
|
207
|
+
- **Photo app**: Camera Roll access -> AI-powered organization
|
|
208
|
+
- **Smart home**: Bluetooth/WiFi device control -> voice commands
|
|
209
|
+
|
|
210
|
+
The model calls these skills naturally when the user's question matches the skill description. You just register them — the SDK handles everything else.
|
|
211
|
+
|
|
212
|
+
### JS Skill (runs in sandboxed WebView, can use fetch)
|
|
213
|
+
|
|
214
|
+
JS skills run in an isolated WebView — they can make HTTP requests but can't access device APIs. Use these for web-based data fetching.
|
|
215
|
+
|
|
216
|
+
```typescript
|
|
217
|
+
const weatherSkill: SkillManifest = {
|
|
218
|
+
name: 'get_weather',
|
|
219
|
+
description: 'Get current weather for a location',
|
|
220
|
+
version: '1.0.0',
|
|
221
|
+
type: 'js',
|
|
222
|
+
requiresNetwork: true,
|
|
223
|
+
parameters: {
|
|
224
|
+
location: { type: 'string', description: 'City name' },
|
|
225
|
+
},
|
|
226
|
+
requiredParameters: ['location'],
|
|
227
|
+
html: `<!DOCTYPE html>
|
|
228
|
+
<html><body><script>
|
|
229
|
+
window['ai_edge_gallery_get_result'] = async function(jsonData) {
|
|
230
|
+
const params = JSON.parse(jsonData);
|
|
231
|
+
const res = await fetch('https://wttr.in/' + params.location + '?format=j1');
|
|
232
|
+
const data = await res.json();
|
|
233
|
+
return JSON.stringify({
|
|
234
|
+
result: data.current_condition[0].weatherDesc[0].value +
|
|
235
|
+
', ' + data.current_condition[0].temp_C + ' C'
|
|
236
|
+
});
|
|
237
|
+
};
|
|
238
|
+
</script></body></html>`,
|
|
239
|
+
};
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
### SkillManifest Reference
|
|
243
|
+
|
|
244
|
+
```typescript
|
|
245
|
+
type SkillManifest = {
|
|
246
|
+
name: string; // Unique identifier (used in tool calls)
|
|
247
|
+
description: string; // What it does (model reads this to decide when to use it)
|
|
248
|
+
version: string;
|
|
249
|
+
type: 'native' | 'js';
|
|
250
|
+
requiresNetwork?: boolean; // SDK checks connectivity before execution
|
|
251
|
+
parameters: Record<string, SkillParameter>;
|
|
252
|
+
requiredParameters?: string[];
|
|
253
|
+
html?: string; // Required for 'js' skills
|
|
254
|
+
execute?: (params) => Promise<SkillResult>; // Required for 'native' skills
|
|
255
|
+
instructions?: string; // Extra instructions for the model
|
|
256
|
+
};
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
## Built-in Demo Skills
|
|
260
|
+
|
|
261
|
+
| Skill | Type | Network | Description |
|
|
262
|
+
|---|---|---|---|
|
|
263
|
+
| `calculatorSkill` | native | No | Evaluate math expressions (fully offline) |
|
|
264
|
+
| `queryWikipediaSkill` | js | Yes | Search and summarize Wikipedia articles |
|
|
265
|
+
| `webSearchSkill` | js | Yes | Web search via SearXNG |
|
|
266
|
+
| `deviceLocationSkill` | native | No | GPS location with offline city lookup (60 cities) |
|
|
267
|
+
| `readCalendarSkill` | native | No | Read device calendar events for any day |
|
|
268
|
+
|
|
269
|
+
```typescript
|
|
270
|
+
// Core skills (no extra dependencies)
|
|
271
|
+
import { calculatorSkill, queryWikipediaSkill, webSearchSkill } from 'react-native-gemma-agent/skills';
|
|
272
|
+
|
|
273
|
+
// Device skills (require additional packages)
|
|
274
|
+
import { deviceLocationSkill } from 'react-native-gemma-agent/skills/deviceLocation';
|
|
275
|
+
// requires: @react-native-community/geolocation
|
|
276
|
+
|
|
277
|
+
import { readCalendarSkill } from 'react-native-gemma-agent/skills/readCalendar';
|
|
278
|
+
// requires: react-native-calendar-events
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## BM25 Skill Routing (Opt-in)
|
|
282
|
+
|
|
283
|
+
When you have more than ~10 skills, sending all tool definitions to the model on every query wastes context tokens and reduces accuracy. The SDK includes an opt-in **BM25 pre-filter** that scores skills against the user's query and only sends the top-N most relevant ones.
|
|
284
|
+
|
|
285
|
+
```tsx
|
|
286
|
+
<GemmaAgentProvider
|
|
287
|
+
agentConfig={{
|
|
288
|
+
skillRouting: 'bm25', // 'all' (default) or 'bm25'
|
|
289
|
+
maxToolsPerInvocation: 5, // Only with 'bm25'. Default: 5
|
|
290
|
+
}}
|
|
291
|
+
>
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
| Mode | Behavior | Best for |
|
|
295
|
+
|---|---|---|
|
|
296
|
+
| `'all'` (default) | All registered skills sent every time | <10 skills |
|
|
297
|
+
| `'bm25'` | Top-N skills selected per query using BM25 scoring | 10+ skills |
|
|
298
|
+
|
|
299
|
+
BM25 is a standard information retrieval algorithm (term frequency + inverse document frequency). It runs in <1ms, uses no extra memory, and needs no ML model.
|
|
300
|
+
|
|
301
|
+
## How the Agent Loop Works
|
|
302
|
+
|
|
303
|
+
```
|
|
304
|
+
User: "What is the population of Tokyo?"
|
|
305
|
+
|
|
|
306
|
+
v
|
|
307
|
+
[Gemma 4 on-device inference]
|
|
308
|
+
|
|
|
309
|
+
v
|
|
310
|
+
Model outputs tool_call: query_wikipedia({ query: "Tokyo population" })
|
|
311
|
+
|
|
|
312
|
+
v
|
|
313
|
+
[SkillSandbox executes Wikipedia skill in hidden WebView]
|
|
314
|
+
|
|
|
315
|
+
v
|
|
316
|
+
Skill returns: "Tokyo has a population of approximately 14 million"
|
|
317
|
+
|
|
|
318
|
+
v
|
|
319
|
+
[Model re-invoked with skill result in context]
|
|
320
|
+
|
|
|
321
|
+
v
|
|
322
|
+
"The population of Tokyo is approximately 14 million people."
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
The agent can chain multiple skills in sequence (max depth configurable, default 5). For example: "Look up Tokyo's population on Wikipedia, then calculate 15% of it" calls Wikipedia first, then calculator.
|
|
326
|
+
|
|
327
|
+
## Architecture
|
|
328
|
+
|
|
329
|
+
```
|
|
330
|
+
GemmaAgentProvider
|
|
331
|
+
|-- ModelManager (download, store, locate GGUF models)
|
|
332
|
+
|-- InferenceEngine (llama.rn wrapper, streaming, tool call passthrough)
|
|
333
|
+
|-- SkillRegistry (register/manage skills, convert to OpenAI tool format)
|
|
334
|
+
|-- AgentOrchestrator (agent loop: infer -> tool call -> skill exec -> re-invoke)
|
|
335
|
+
|-- SkillSandbox (hidden WebView for JS skill execution)
|
|
336
|
+
|-- BM25Scorer (opt-in skill pre-filtering by query relevance)
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
## Configuration
|
|
340
|
+
|
|
341
|
+
### InferenceEngineConfig
|
|
342
|
+
|
|
343
|
+
Control model loading and inference behavior.
|
|
344
|
+
|
|
345
|
+
```typescript
|
|
346
|
+
{
|
|
347
|
+
contextSize: 4096, // Context window in tokens (default: 4096, max: 128K)
|
|
348
|
+
batchSize: 512, // Batch size for prompt processing
|
|
349
|
+
threads: 4, // CPU threads for inference
|
|
350
|
+
flashAttn: 'auto', // Flash attention: 'auto' | 'on' | 'off'
|
|
351
|
+
useMlock: true, // Lock model in memory (prevents swapping)
|
|
352
|
+
gpuLayers: -1, // GPU layers to offload (-1 = all available)
|
|
353
|
+
}
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
### AgentConfig
|
|
357
|
+
|
|
358
|
+
Control agent behavior.
|
|
359
|
+
|
|
360
|
+
```typescript
|
|
361
|
+
{
|
|
362
|
+
maxChainDepth: 5, // Max sequential skill calls per message
|
|
363
|
+
skillTimeout: 30000, // Timeout per skill execution (ms)
|
|
364
|
+
systemPrompt: '...', // Base system prompt
|
|
365
|
+
skillRouting: 'all', // 'all' or 'bm25'
|
|
366
|
+
maxToolsPerInvocation: 5, // Top-N skills per query (bm25 only)
|
|
367
|
+
}
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
## Context Window & Memory
|
|
371
|
+
|
|
372
|
+
The model's "memory" is its **context window** — a rolling buffer of the current conversation. Understanding this is key to building good experiences:
|
|
373
|
+
|
|
374
|
+
| Setting | Default | Range | Tradeoff |
|
|
375
|
+
|---|---|---|---|
|
|
376
|
+
| `contextSize` | 4096 tokens | 2048 - 131072 | More context = more RAM + slower prompt eval |
|
|
377
|
+
|
|
378
|
+
**Practical limits at 4096 tokens (~3000 words):**
|
|
379
|
+
- ~15-20 back-and-forth exchanges before oldest messages get pushed out
|
|
380
|
+
- Each registered skill costs ~50-100 tokens (tool definitions in prompt)
|
|
381
|
+
- With 3 skills: ~200 tokens used, ~3900 left for conversation
|
|
382
|
+
- With 10 skills: ~700 tokens used, ~3400 left
|
|
383
|
+
- With 30 skills: ~2100 tokens used, only ~2000 left for conversation
|
|
384
|
+
|
|
385
|
+
**No persistent memory**: The model only remembers the current conversation. It does not remember across app restarts. If you need cross-session memory, build a native skill that persists notes to device storage and injects them into the system prompt.
|
|
386
|
+
|
|
387
|
+
**Increasing context**: You can set `contextSize: 8192` or higher. Gemma 4 E2B supports up to 128K. But more context means more RAM usage and slower prompt processing. On a phone with 8GB RAM, 4096-8192 is the sweet spot.
|
|
388
|
+
|
|
389
|
+
## Model Setup
|
|
390
|
+
|
|
391
|
+
**Option A: Push via ADB (development)**
|
|
392
|
+
```bash
|
|
393
|
+
# Download model
|
|
394
|
+
huggingface-cli download unsloth/gemma-4-E2B-it-GGUF \
|
|
395
|
+
gemma-4-E2B-it-Q4_K_M.gguf --local-dir ./models
|
|
396
|
+
|
|
397
|
+
# Push to device
|
|
398
|
+
adb push ./models/gemma-4-E2B-it-Q4_K_M.gguf /data/local/tmp/
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
**Option B: In-app download**
|
|
402
|
+
```tsx
|
|
403
|
+
const { download, progress, checkStorage } = useModelDownload();
|
|
404
|
+
|
|
405
|
+
// Check storage first
|
|
406
|
+
const storage = await checkStorage();
|
|
407
|
+
if (!storage.sufficient) {
|
|
408
|
+
alert(`Need ${storage.required} bytes, only ${storage.available} available`);
|
|
409
|
+
return;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// Download with progress
|
|
413
|
+
await download();
|
|
414
|
+
// progress.percent updates 0-100
|
|
415
|
+
```
|
|
416
|
+
|
|
417
|
+
## Performance
|
|
418
|
+
|
|
419
|
+
Tested on Medium Phone API 36 emulator (CPU-only, 8 GB RAM):
|
|
420
|
+
|
|
421
|
+
| Metric | Value |
|
|
422
|
+
|---|---|
|
|
423
|
+
| Model | Gemma 4 E2B Q4_K_M (3.09 GB, 4.6B params) |
|
|
424
|
+
| Cold load | 6.7s |
|
|
425
|
+
| Warm load | 2.2s |
|
|
426
|
+
| Generation speed | 30.0 tok/s (CPU-only) |
|
|
427
|
+
| Prompt eval | 60.2 tok/s |
|
|
428
|
+
|
|
429
|
+
Physical devices with GPU offloading (Snapdragon 8 Elite, Dimensity 9300, etc.) should see **60-120+ tok/s** generation speed.
|
|
430
|
+
|
|
431
|
+
## Supported Models
|
|
432
|
+
|
|
433
|
+
Currently tested with:
|
|
434
|
+
- **Gemma 4 E2B-it Q4_K_M** (3.09 GB) — recommended
|
|
435
|
+
- **Gemma 4 E2B-it Q3_K_M** (~2.3 GB) — for lower-RAM devices (6GB)
|
|
436
|
+
|
|
437
|
+
Any GGUF model compatible with llama.rn should work, but function calling (tool use) is only tested with Gemma 4.
|
|
438
|
+
|
|
439
|
+
## Roadmap
|
|
440
|
+
|
|
441
|
+
- [x] Context usage monitoring API (`contextUsage` in `useGemmaAgent()`)
|
|
442
|
+
- [x] BM25 skill routing (opt-in pre-filter)
|
|
443
|
+
- [x] Network awareness (`requiresNetwork` flag on skills)
|
|
444
|
+
- [x] GPS and calendar device skills
|
|
445
|
+
- [ ] On-device knowledge base skill (persistent local notes)
|
|
446
|
+
- [ ] Skill categories for grouping and selective loading
|
|
447
|
+
- [ ] Semantic vector routing (embedding-based tool selection, 97%+ accuracy)
|
|
448
|
+
- [ ] iOS support
|
|
449
|
+
- [ ] TurboQuant KV cache (6x longer conversations)
|
|
450
|
+
- [ ] Multimodal vision skills (camera input)
|
|
451
|
+
- [ ] Audio input (Gemma 4 supports audio)
|
|
452
|
+
- [ ] Skill marketplace
|
|
453
|
+
- [ ] Expo plugin
|
|
454
|
+
|
|
455
|
+
## License
|
|
456
|
+
|
|
457
|
+
MIT
|
package/package.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "react-native-gemma-agent",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "React Native SDK for on-device AI agents powered by Google Gemma 4",
|
|
5
|
+
"main": "lib/commonjs/index.js",
|
|
6
|
+
"module": "lib/module/index.js",
|
|
7
|
+
"types": "lib/typescript/index.d.ts",
|
|
8
|
+
"react-native": "src/index.ts",
|
|
9
|
+
"source": "src/index.ts",
|
|
10
|
+
"files": [
|
|
11
|
+
"src",
|
|
12
|
+
"lib",
|
|
13
|
+
"skills",
|
|
14
|
+
"!**/__tests__"
|
|
15
|
+
],
|
|
16
|
+
"scripts": {
|
|
17
|
+
"build": "tsc --project tsconfig.build.json",
|
|
18
|
+
"lint": "eslint src/",
|
|
19
|
+
"typecheck": "tsc --noEmit",
|
|
20
|
+
"test": "jest"
|
|
21
|
+
},
|
|
22
|
+
"keywords": [
|
|
23
|
+
"react-native",
|
|
24
|
+
"gemma",
|
|
25
|
+
"on-device-ai",
|
|
26
|
+
"llm",
|
|
27
|
+
"agent",
|
|
28
|
+
"skills",
|
|
29
|
+
"llama.rn"
|
|
30
|
+
],
|
|
31
|
+
"repository": {
|
|
32
|
+
"type": "git",
|
|
33
|
+
"url": "https://github.com/shashankg-dev404/react-native-gemma-agent"
|
|
34
|
+
},
|
|
35
|
+
"author": "Shashank Gupta",
|
|
36
|
+
"license": "MIT",
|
|
37
|
+
"peerDependencies": {
|
|
38
|
+
"llama.rn": ">=0.12.0-rc.3",
|
|
39
|
+
"react": ">=18.0.0",
|
|
40
|
+
"react-native": ">=0.76.0",
|
|
41
|
+
"react-native-fs": ">=2.20.0",
|
|
42
|
+
"react-native-webview": ">=13.0.0"
|
|
43
|
+
},
|
|
44
|
+
"devDependencies": {
|
|
45
|
+
"@types/jest": "^30.0.0",
|
|
46
|
+
"@types/react": "^19.2.14",
|
|
47
|
+
"@types/react-native": "^0.72.8",
|
|
48
|
+
"jest": "^30.3.0",
|
|
49
|
+
"ts-jest": "^29.4.9",
|
|
50
|
+
"typescript": "^5.4.0"
|
|
51
|
+
}
|
|
52
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import type { SkillManifest } from '../src/types';
|
|
2
|
+
|
|
3
|
+
export const calculatorSkill: SkillManifest = {
|
|
4
|
+
name: 'calculator',
|
|
5
|
+
description: 'Evaluate mathematical expressions accurately.',
|
|
6
|
+
version: '1.0.0',
|
|
7
|
+
type: 'native',
|
|
8
|
+
requiresNetwork: false,
|
|
9
|
+
parameters: {
|
|
10
|
+
expression: {
|
|
11
|
+
type: 'string',
|
|
12
|
+
description: 'Mathematical expression to evaluate (e.g. "2 + 3 * 4")',
|
|
13
|
+
},
|
|
14
|
+
},
|
|
15
|
+
requiredParameters: ['expression'],
|
|
16
|
+
instructions:
|
|
17
|
+
'Use this when the user asks for calculations, math, unit conversions, or percentages. Pass the expression as a string.',
|
|
18
|
+
execute: async (params) => {
|
|
19
|
+
try {
|
|
20
|
+
const raw = String(params.expression ?? '');
|
|
21
|
+
|
|
22
|
+
// Replace ^ with ** for exponentiation
|
|
23
|
+
const expr = raw.replace(/\^/g, '**');
|
|
24
|
+
|
|
25
|
+
// Only allow safe characters: digits, operators, parens, decimal, spaces
|
|
26
|
+
if (!/^[\d\s+\-*/().%*]+$/.test(expr)) {
|
|
27
|
+
return {
|
|
28
|
+
error:
|
|
29
|
+
'Invalid expression: only numbers and basic operators (+, -, *, /, ^, %, parentheses) are allowed.',
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// eslint-disable-next-line no-new-func
|
|
34
|
+
const result = new Function(`"use strict"; return (${expr});`)();
|
|
35
|
+
|
|
36
|
+
if (typeof result !== 'number' || !Number.isFinite(result)) {
|
|
37
|
+
return { error: 'Expression did not produce a valid number.' };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return { result: String(result) };
|
|
41
|
+
} catch (err) {
|
|
42
|
+
return {
|
|
43
|
+
error: `Calculator error: ${err instanceof Error ? err.message : 'Invalid expression'}`,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
};
|