expo-ai-kit 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -5
- package/build/index.d.ts +99 -1
- package/build/index.d.ts.map +1 -1
- package/build/index.js +279 -0
- package/build/index.js.map +1 -1
- package/build/structured.d.ts +36 -0
- package/build/structured.d.ts.map +1 -0
- package/build/structured.js +190 -0
- package/build/structured.js.map +1 -0
- package/build/tools.d.ts +41 -0
- package/build/tools.d.ts.map +1 -0
- package/build/tools.js +86 -0
- package/build/tools.js.map +1 -0
- package/build/types.d.ts +173 -0
- package/build/types.d.ts.map +1 -1
- package/build/types.js.map +1 -1
- package/package.json +7 -2
- package/src/index.ts +347 -0
- package/src/structured.ts +202 -0
- package/src/tools.ts +126 -0
- package/src/types.ts +202 -0
package/README.md
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
# expo-ai-kit
|
|
2
2
|
|
|
3
|
-
On-device AI for Expo & React Native — run LLMs locally
|
|
3
|
+
On-device AI for Expo & React Native — run LLMs locally. No API keys, no cloud, no cost.
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/expo-ai-kit)
|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
7
7
|
|
|
8
8
|
Runs **Apple Foundation Models** (iOS 26+), **ML Kit** (Android), and downloadable
|
|
9
9
|
**Gemma 4** (E2B / E4B, iOS + Android via [LiteRT-LM](https://ai.google.dev/edge/litert-lm))
|
|
10
|
-
— with streaming, cancellation, and runtime model switching,
|
|
10
|
+
— with streaming, structured output, tool calling, cancellation, and runtime model switching,
|
|
11
|
+
all on-device.
|
|
11
12
|
|
|
12
13
|
## Install
|
|
13
14
|
|
|
@@ -17,7 +18,7 @@ npx expo install expo-ai-kit
|
|
|
17
18
|
|
|
18
19
|
Bare RN: run `npx pod-install`. Android needs `minSdkVersion 26`. Requires Expo SDK 54+.
|
|
19
20
|
|
|
20
|
-
##
|
|
21
|
+
## Text
|
|
21
22
|
|
|
22
23
|
```tsx
|
|
23
24
|
import { isAvailable, sendMessage, streamMessage } from 'expo-ai-kit';
|
|
@@ -36,6 +37,67 @@ if (await isAvailable()) {
|
|
|
36
37
|
`messages` is `{ role: 'system' | 'user' | 'assistant'; content: string }[]`. On-device
|
|
37
38
|
models are stateless — pass the full history each call.
|
|
38
39
|
|
|
40
|
+
## Structured output
|
|
41
|
+
|
|
42
|
+
Get a typed object back instead of a string. Pass a JSON Schema; expo-ai-kit prompts the
|
|
43
|
+
model, extracts the JSON (tolerating prose and code fences), validates it against the
|
|
44
|
+
schema, and repairs on a mismatch. Works on every backend.
|
|
45
|
+
|
|
46
|
+
```tsx
|
|
47
|
+
import { generateObject } from 'expo-ai-kit';
|
|
48
|
+
|
|
49
|
+
type Recipe = { title: string; minutes: number; ingredients: string[] };
|
|
50
|
+
|
|
51
|
+
const { object } = await generateObject<Recipe>(
|
|
52
|
+
[{ role: 'user', content: 'A quick weeknight pasta.' }],
|
|
53
|
+
{
|
|
54
|
+
type: 'object',
|
|
55
|
+
properties: {
|
|
56
|
+
title: { type: 'string' },
|
|
57
|
+
minutes: { type: 'integer' },
|
|
58
|
+
ingredients: { type: 'array', items: { type: 'string' } },
|
|
59
|
+
},
|
|
60
|
+
required: ['title', 'minutes', 'ingredients'],
|
|
61
|
+
},
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
object.title; // typed Recipe
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Throws `INFERENCE_FAILED` if the model can't produce schema-valid JSON after the repair
|
|
68
|
+
attempts (`maxRepairAttempts`, default 2). Keep schemas small and shallow for best results.
|
|
69
|
+
|
|
70
|
+
## Tool calling
|
|
71
|
+
|
|
72
|
+
Let the model call functions you provide. It proposes a call, expo-ai-kit validates the
|
|
73
|
+
arguments against the tool's schema, runs your `execute`, feeds the result back, and loops
|
|
74
|
+
until it produces an answer (bounded by `maxSteps`, default 5). Works on every backend.
|
|
75
|
+
|
|
76
|
+
```tsx
|
|
77
|
+
import { generateText } from 'expo-ai-kit';
|
|
78
|
+
|
|
79
|
+
const { text } = await generateText(
|
|
80
|
+
[{ role: 'user', content: 'What should I wear in Paris today?' }],
|
|
81
|
+
{
|
|
82
|
+
tools: {
|
|
83
|
+
getWeather: {
|
|
84
|
+
description: 'Get the current weather for a city.',
|
|
85
|
+
parameters: {
|
|
86
|
+
type: 'object',
|
|
87
|
+
properties: { city: { type: 'string' } },
|
|
88
|
+
required: ['city'],
|
|
89
|
+
},
|
|
90
|
+
execute: async ({ city }: { city: string }) => fetchWeather(city),
|
|
91
|
+
},
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
);
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Omit a tool's `execute` to gate it yourself: the loop stops with `finishReason: 'tool-calls'`
|
|
98
|
+
and hands you the proposed call to confirm before running. Keep tool sets small and parameter
|
|
99
|
+
schemas flat — on-device models pick tools more reliably that way.
|
|
100
|
+
|
|
39
101
|
## Downloadable Gemma 4
|
|
40
102
|
|
|
41
103
|
```tsx
|
|
@@ -45,13 +107,13 @@ const best = await getRecommendedModel(); // E4B on high-RAM phones, el
|
|
|
45
107
|
if (best) {
|
|
46
108
|
await downloadModel(best.id, { onProgress: (p) => console.log(p) });
|
|
47
109
|
await setModel(best.id, { generation: { temperature: 0.7 } });
|
|
48
|
-
// sendMessage / streamMessage now use it; unloadModel() reverts to the OS model
|
|
110
|
+
// sendMessage / streamMessage / generateObject / generateText now use it; unloadModel() reverts to the OS model
|
|
49
111
|
}
|
|
50
112
|
```
|
|
51
113
|
|
|
52
114
|
## API
|
|
53
115
|
|
|
54
|
-
Inference: `isAvailable`, `sendMessage`, `streamMessage`.
|
|
116
|
+
Inference: `isAvailable`, `sendMessage`, `streamMessage`, `generateObject`, `generateText`.
|
|
55
117
|
Models: `getBuiltInModels`, `getDownloadableModels`, `getRecommendedModel`,
|
|
56
118
|
`downloadModel`, `cancelDownload`, `deleteModel`, `setModel`, `unloadModel`, `getActiveModel`.
|
|
57
119
|
|
package/build/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { LLMMessage, LLMSendOptions, LLMResponse, LLMStreamOptions, LLMStreamCallback, LLMStreamHandle, BuiltInModel, DownloadableModel, SetModelOptions } from './types';
|
|
1
|
+
import { LLMMessage, LLMSendOptions, LLMResponse, LLMStreamOptions, LLMStreamCallback, LLMStreamHandle, BuiltInModel, DownloadableModel, SetModelOptions, JSONSchema, GenerateObjectOptions, GenerateObjectResult, GenerateTextOptions, GenerateTextResult } from './types';
|
|
2
2
|
export * from './types';
|
|
3
3
|
export * from './models';
|
|
4
4
|
/**
|
|
@@ -76,6 +76,104 @@ export declare function sendMessage(messages: LLMMessage[], options?: LLMSendOpt
|
|
|
76
76
|
* ```
|
|
77
77
|
*/
|
|
78
78
|
export declare function streamMessage(messages: LLMMessage[], onToken: LLMStreamCallback, options?: LLMStreamOptions): LLMStreamHandle;
|
|
79
|
+
/**
|
|
80
|
+
* Generate a typed object instead of free text.
|
|
81
|
+
*
|
|
82
|
+
* You describe the shape you want with a JSON Schema. expo-ai-kit appends a
|
|
83
|
+
* strict instruction to the system prompt, runs the on-device model, extracts
|
|
84
|
+
* the JSON from its output (tolerating prose and ```json fences), validates it
|
|
85
|
+
* against the schema, and — on a parse error or schema mismatch — feeds the
|
|
86
|
+
* error back and re-prompts up to `maxRepairAttempts` times.
|
|
87
|
+
*
|
|
88
|
+
* Works on every backend (Apple Foundation Models, ML Kit, Gemma) because it is
|
|
89
|
+
* orchestrated over {@link sendMessage}: it honors the same single-flight guard,
|
|
90
|
+
* `AbortSignal`, and `systemPrompt` semantics. Keep schemas small and shallow —
|
|
91
|
+
* on-device models follow flat shapes far more reliably than deeply nested ones.
|
|
92
|
+
*
|
|
93
|
+
* @param messages - The conversation, same shape as {@link sendMessage}.
|
|
94
|
+
* @param schema - A JSON Schema describing the desired result.
|
|
95
|
+
* @param options - Optional settings (systemPrompt, signal, maxRepairAttempts).
|
|
96
|
+
* @returns `{ object, text }` — the validated value and the raw output.
|
|
97
|
+
* @throws {ModelError} INFERENCE_FAILED if no schema-valid JSON is produced
|
|
98
|
+
* after the repair attempts. Also propagates INFERENCE_BUSY / INFERENCE_CANCELLED
|
|
99
|
+
* from the underlying generation.
|
|
100
|
+
*
|
|
101
|
+
* @example
|
|
102
|
+
* ```ts
|
|
103
|
+
* type Recipe = { title: string; minutes: number; ingredients: string[] };
|
|
104
|
+
*
|
|
105
|
+
* const { object } = await generateObject<Recipe>(
|
|
106
|
+
* [{ role: 'user', content: 'A quick weeknight pasta.' }],
|
|
107
|
+
* {
|
|
108
|
+
* type: 'object',
|
|
109
|
+
* properties: {
|
|
110
|
+
* title: { type: 'string' },
|
|
111
|
+
* minutes: { type: 'integer' },
|
|
112
|
+
* ingredients: { type: 'array', items: { type: 'string' } },
|
|
113
|
+
* },
|
|
114
|
+
* required: ['title', 'minutes', 'ingredients'],
|
|
115
|
+
* },
|
|
116
|
+
* );
|
|
117
|
+
* object.title; // typed Recipe
|
|
118
|
+
* ```
|
|
119
|
+
*/
|
|
120
|
+
export declare function generateObject<T = unknown>(messages: LLMMessage[], schema: JSONSchema, options?: GenerateObjectOptions): Promise<GenerateObjectResult<T>>;
|
|
121
|
+
/**
|
|
122
|
+
* Generate text, optionally letting the model call tools (functions) you provide.
|
|
123
|
+
*
|
|
124
|
+
* Unlike {@link generateObject} (where the JSON *is* the answer), tool calling is
|
|
125
|
+
* a loop: the model proposes a call, expo-ai-kit validates the arguments against
|
|
126
|
+
* the tool's `parameters`, runs your `execute`, feeds the result back, and lets
|
|
127
|
+
* the model continue — until it produces a plain-text answer or the `maxSteps`
|
|
128
|
+
* budget is reached. With no `tools`, this is a single text generation.
|
|
129
|
+
*
|
|
130
|
+
* Orchestrated in JS over {@link sendMessage}, so it works on every backend
|
|
131
|
+
* (Apple Foundation Models, ML Kit, Gemma) and inherits the single-flight guard,
|
|
132
|
+
* `AbortSignal`, and `systemPrompt` semantics. On-device models are imperfect at
|
|
133
|
+
* tool selection, so the loop is defensive: malformed calls, unknown tool names,
|
|
134
|
+
* and schema-invalid arguments are re-prompted up to `maxRepairAttempts` times,
|
|
135
|
+
* and a tool with no `execute` stops the loop and returns the proposed call for
|
|
136
|
+
* you to gate. Keep tool sets small and `parameters` flat for best reliability.
|
|
137
|
+
*
|
|
138
|
+
* @param messages - The conversation, same shape as {@link sendMessage}.
|
|
139
|
+
* @param options - Tools, `maxSteps`, `systemPrompt`, `signal`, `maxRepairAttempts`.
|
|
140
|
+
* @returns `{ text, steps, toolCalls, toolResults, finishReason }`.
|
|
141
|
+
* @throws {ModelError} INFERENCE_FAILED if the model keeps proposing an unknown
|
|
142
|
+
* tool or schema-invalid arguments after the repair attempts. Also propagates
|
|
143
|
+
* INFERENCE_BUSY / INFERENCE_CANCELLED from the underlying generation.
|
|
144
|
+
*
|
|
145
|
+
* @example
|
|
146
|
+
* ```ts
|
|
147
|
+
* const { text } = await generateText(
|
|
148
|
+
* [{ role: 'user', content: 'What should I wear in Paris today?' }],
|
|
149
|
+
* {
|
|
150
|
+
* tools: {
|
|
151
|
+
* getWeather: {
|
|
152
|
+
* description: 'Get the current weather for a city.',
|
|
153
|
+
* parameters: {
|
|
154
|
+
* type: 'object',
|
|
155
|
+
* properties: { city: { type: 'string' } },
|
|
156
|
+
* required: ['city'],
|
|
157
|
+
* },
|
|
158
|
+
* execute: async ({ city }: { city: string }) => fetchWeather(city),
|
|
159
|
+
* },
|
|
160
|
+
* },
|
|
161
|
+
* },
|
|
162
|
+
* );
|
|
163
|
+
* ```
|
|
164
|
+
*
|
|
165
|
+
* @example
|
|
166
|
+
* ```ts
|
|
167
|
+
* // Human-in-the-loop: omit `execute` to gate the call yourself.
|
|
168
|
+
* const res = await generateText(messages, {
|
|
169
|
+
* tools: { deleteAccount: { description: '…', parameters: { type: 'object' } } },
|
|
170
|
+
* });
|
|
171
|
+
* if (res.finishReason === 'tool-calls') {
|
|
172
|
+
* const call = res.toolCalls[0]; // confirm with the user before running
|
|
173
|
+
* }
|
|
174
|
+
* ```
|
|
175
|
+
*/
|
|
176
|
+
export declare function generateText(messages: LLMMessage[], options?: GenerateTextOptions): Promise<GenerateTextResult>;
|
|
79
177
|
/**
|
|
80
178
|
* Get all built-in models available on the current platform.
|
|
81
179
|
*
|
package/build/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAEA,OAAO,EACL,UAAU,EACV,cAAc,EACd,WAAW,EACX,gBAAgB,EAEhB,iBAAiB,EACjB,eAAe,EACf,YAAY,EACZ,iBAAiB,EAIjB,eAAe,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAEA,OAAO,EACL,UAAU,EACV,cAAc,EACd,WAAW,EACX,gBAAgB,EAEhB,iBAAiB,EACjB,eAAe,EACf,YAAY,EACZ,iBAAiB,EAIjB,eAAe,EACf,UAAU,EACV,qBAAqB,EACrB,oBAAoB,EACpB,mBAAmB,EACnB,kBAAkB,EAInB,MAAM,SAAS,CAAC;AAiBjB,cAAc,SAAS,CAAC;AACxB,cAAc,UAAU,CAAC;AA0HzB;;;GAGG;AACH,wBAAsB,WAAW,IAAI,OAAO,CAAC,OAAO,CAAC,CAKpD;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AACH,wBAAsB,WAAW,CAC/B,QAAQ,EAAE,UAAU,EAAE,EACtB,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,WAAW,CAAC,CAmEtB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,wBAAgB,aAAa,CAC3B,QAAQ,EAAE,UAAU,EAAE,EACtB,OAAO,EAAE,iBAAiB,EAC1B,OAAO,CAAC,EAAE,gBAAgB,GACzB,eAAe,CAwFjB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAwCG;AACH,wBAAsB,cAAc,CAAC,CAAC,GAAG,OAAO,EAC9C,QAAQ,EAAE,UAAU,EAAE,EACtB,MAAM,EAAE,UAAU,EAClB,OAAO,CAAC,EAAE,qBAAqB,GAC9B,OAAO,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,CAoElC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsDG;AACH,wBAAsB,YAAY,CAChC,QAAQ,EAAE,UAAU,EAAE,EACtB,OAAO,CAAC,EAAE,mBAAmB,GAC5B,OAAO,CAAC,kBAAkB,CAAC,CAmJ7B;AAMD;;;;;;;GAOG;AACH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC,CAKhE;AAED;;;;;;;GAOG;AACH,wBAAsB,qBAAqB,IAAI,OAAO,CAAC,iBAAiB,EAAE,CAAC,CAiC1E;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAsB,mBAAmB,IAAI,OAAO,CAAC,iBAAiB,GAAG,IAAI,CAAC,CAM7E;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,aAAa,CACjC,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE;IAAE,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAA;CAAE,GACpD,OAAO,CAAC,IAAI,CAAC,CA+Cf;AAED;;;;;;;GAOG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAKnE;AAED;;;;;;;GAOG;AACH,wBAAsB,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAOhE;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAsB,QAAQ,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC,CAQxF;AAED;;;;GAIG;AACH,wBAAgB,cAAc,IAAI,MAAM,CAEvC;AAED;;;;;GAKG;AACH,wBAAsB,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC,CAEjD"}
|
package/build/index.js
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import ExpoAiKitModule from './ExpoAiKitModule';
|
|
2
2
|
import { Platform } from 'react-native';
|
|
3
3
|
import { ModelError, } from './types';
|
|
4
|
+
import { buildSchemaInstruction, buildSchemaRepair, extractJson, validateAgainstSchema, REPAIR_INVALID_JSON, } from './structured';
|
|
5
|
+
import { buildToolInstruction, parseToolCall, buildUnknownToolRepair, buildToolArgsRepair, formatToolResult, } from './tools';
|
|
4
6
|
import { MODEL_REGISTRY, getRegistryEntry } from './models';
|
|
5
7
|
export * from './types';
|
|
6
8
|
export * from './models';
|
|
7
9
|
const DEFAULT_SYSTEM_PROMPT = 'You are a helpful, friendly assistant. Answer the user directly and concisely.';
|
|
10
|
+
const DEFAULT_OBJECT_SYSTEM_PROMPT = 'You output structured data as JSON. Follow the provided JSON Schema exactly.';
|
|
8
11
|
let streamIdCounter = 0;
|
|
9
12
|
function generateSessionId() {
|
|
10
13
|
return `gen_${Date.now()}_${++streamIdCounter}`;
|
|
@@ -318,6 +321,282 @@ export function streamMessage(messages, onToken, options) {
|
|
|
318
321
|
};
|
|
319
322
|
return { promise, stop };
|
|
320
323
|
}
|
|
324
|
+
/**
|
|
325
|
+
* Generate a typed object instead of free text.
|
|
326
|
+
*
|
|
327
|
+
* You describe the shape you want with a JSON Schema. expo-ai-kit appends a
|
|
328
|
+
* strict instruction to the system prompt, runs the on-device model, extracts
|
|
329
|
+
* the JSON from its output (tolerating prose and ```json fences), validates it
|
|
330
|
+
* against the schema, and — on a parse error or schema mismatch — feeds the
|
|
331
|
+
* error back and re-prompts up to `maxRepairAttempts` times.
|
|
332
|
+
*
|
|
333
|
+
* Works on every backend (Apple Foundation Models, ML Kit, Gemma) because it is
|
|
334
|
+
* orchestrated over {@link sendMessage}: it honors the same single-flight guard,
|
|
335
|
+
* `AbortSignal`, and `systemPrompt` semantics. Keep schemas small and shallow —
|
|
336
|
+
* on-device models follow flat shapes far more reliably than deeply nested ones.
|
|
337
|
+
*
|
|
338
|
+
* @param messages - The conversation, same shape as {@link sendMessage}.
|
|
339
|
+
* @param schema - A JSON Schema describing the desired result.
|
|
340
|
+
* @param options - Optional settings (systemPrompt, signal, maxRepairAttempts).
|
|
341
|
+
* @returns `{ object, text }` — the validated value and the raw output.
|
|
342
|
+
* @throws {ModelError} INFERENCE_FAILED if no schema-valid JSON is produced
|
|
343
|
+
* after the repair attempts. Also propagates INFERENCE_BUSY / INFERENCE_CANCELLED
|
|
344
|
+
* from the underlying generation.
|
|
345
|
+
*
|
|
346
|
+
* @example
|
|
347
|
+
* ```ts
|
|
348
|
+
* type Recipe = { title: string; minutes: number; ingredients: string[] };
|
|
349
|
+
*
|
|
350
|
+
* const { object } = await generateObject<Recipe>(
|
|
351
|
+
* [{ role: 'user', content: 'A quick weeknight pasta.' }],
|
|
352
|
+
* {
|
|
353
|
+
* type: 'object',
|
|
354
|
+
* properties: {
|
|
355
|
+
* title: { type: 'string' },
|
|
356
|
+
* minutes: { type: 'integer' },
|
|
357
|
+
* ingredients: { type: 'array', items: { type: 'string' } },
|
|
358
|
+
* },
|
|
359
|
+
* required: ['title', 'minutes', 'ingredients'],
|
|
360
|
+
* },
|
|
361
|
+
* );
|
|
362
|
+
* object.title; // typed Recipe
|
|
363
|
+
* ```
|
|
364
|
+
*/
|
|
365
|
+
export async function generateObject(messages, schema, options) {
|
|
366
|
+
if (Platform.OS !== 'ios' && Platform.OS !== 'android') {
|
|
367
|
+
throw new ModelError('DEVICE_NOT_SUPPORTED', '', 'generateObject is only available on iOS and Android');
|
|
368
|
+
}
|
|
369
|
+
if (!messages || messages.length === 0) {
|
|
370
|
+
throw new Error('messages array cannot be empty');
|
|
371
|
+
}
|
|
372
|
+
if (!schema || typeof schema !== 'object') {
|
|
373
|
+
throw new Error('schema must be a JSON Schema object');
|
|
374
|
+
}
|
|
375
|
+
const maxRepairAttempts = Math.max(0, options?.maxRepairAttempts ?? 2);
|
|
376
|
+
const instruction = buildSchemaInstruction(schema);
|
|
377
|
+
// Inject the schema instruction. If the caller supplied a system message we
|
|
378
|
+
// append to it (sendMessage reads system from the array); otherwise we carry
|
|
379
|
+
// the instruction via the systemPrompt option, which sendMessage applies when
|
|
380
|
+
// the array has no system message — including on the repair turns we append.
|
|
381
|
+
const sysIdx = messages.findIndex((m) => m.role === 'system');
|
|
382
|
+
let working;
|
|
383
|
+
let systemPrompt;
|
|
384
|
+
if (sysIdx >= 0) {
|
|
385
|
+
working = messages.map((m, i) => i === sysIdx ? { role: m.role, content: `${m.content}\n\n${instruction}` } : m);
|
|
386
|
+
systemPrompt = undefined; // the array carries the system message
|
|
387
|
+
}
|
|
388
|
+
else {
|
|
389
|
+
working = [...messages];
|
|
390
|
+
systemPrompt = `${options?.systemPrompt ?? DEFAULT_OBJECT_SYSTEM_PROMPT}\n\n${instruction}`;
|
|
391
|
+
}
|
|
392
|
+
let lastText = '';
|
|
393
|
+
for (let attempt = 0; attempt <= maxRepairAttempts; attempt++) {
|
|
394
|
+
const { text } = await sendMessage(working, { systemPrompt, signal: options?.signal });
|
|
395
|
+
lastText = text;
|
|
396
|
+
const parsed = extractJson(text);
|
|
397
|
+
if (parsed.ok) {
|
|
398
|
+
const errors = validateAgainstSchema(parsed.value, schema);
|
|
399
|
+
if (errors.length === 0) {
|
|
400
|
+
return { object: parsed.value, text };
|
|
401
|
+
}
|
|
402
|
+
if (attempt < maxRepairAttempts) {
|
|
403
|
+
working = [
|
|
404
|
+
...working,
|
|
405
|
+
{ role: 'assistant', content: text },
|
|
406
|
+
{ role: 'user', content: buildSchemaRepair(errors) },
|
|
407
|
+
];
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
else if (attempt < maxRepairAttempts) {
|
|
411
|
+
working = [
|
|
412
|
+
...working,
|
|
413
|
+
{ role: 'assistant', content: text },
|
|
414
|
+
{ role: 'user', content: REPAIR_INVALID_JSON },
|
|
415
|
+
];
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
throw new ModelError('INFERENCE_FAILED', getActiveModel(), `generateObject: model did not return schema-valid JSON after ${maxRepairAttempts + 1} attempt(s). ` +
|
|
419
|
+
`Last output: ${lastText.slice(0, 200)}`);
|
|
420
|
+
}
|
|
421
|
+
/**
|
|
422
|
+
* Generate text, optionally letting the model call tools (functions) you provide.
|
|
423
|
+
*
|
|
424
|
+
* Unlike {@link generateObject} (where the JSON *is* the answer), tool calling is
|
|
425
|
+
* a loop: the model proposes a call, expo-ai-kit validates the arguments against
|
|
426
|
+
* the tool's `parameters`, runs your `execute`, feeds the result back, and lets
|
|
427
|
+
* the model continue — until it produces a plain-text answer or the `maxSteps`
|
|
428
|
+
* budget is reached. With no `tools`, this is a single text generation.
|
|
429
|
+
*
|
|
430
|
+
* Orchestrated in JS over {@link sendMessage}, so it works on every backend
|
|
431
|
+
* (Apple Foundation Models, ML Kit, Gemma) and inherits the single-flight guard,
|
|
432
|
+
* `AbortSignal`, and `systemPrompt` semantics. On-device models are imperfect at
|
|
433
|
+
* tool selection, so the loop is defensive: malformed calls, unknown tool names,
|
|
434
|
+
* and schema-invalid arguments are re-prompted up to `maxRepairAttempts` times,
|
|
435
|
+
* and a tool with no `execute` stops the loop and returns the proposed call for
|
|
436
|
+
* you to gate. Keep tool sets small and `parameters` flat for best reliability.
|
|
437
|
+
*
|
|
438
|
+
* @param messages - The conversation, same shape as {@link sendMessage}.
|
|
439
|
+
* @param options - Tools, `maxSteps`, `systemPrompt`, `signal`, `maxRepairAttempts`.
|
|
440
|
+
* @returns `{ text, steps, toolCalls, toolResults, finishReason }`.
|
|
441
|
+
* @throws {ModelError} INFERENCE_FAILED if the model keeps proposing an unknown
|
|
442
|
+
* tool or schema-invalid arguments after the repair attempts. Also propagates
|
|
443
|
+
* INFERENCE_BUSY / INFERENCE_CANCELLED from the underlying generation.
|
|
444
|
+
*
|
|
445
|
+
* @example
|
|
446
|
+
* ```ts
|
|
447
|
+
* const { text } = await generateText(
|
|
448
|
+
* [{ role: 'user', content: 'What should I wear in Paris today?' }],
|
|
449
|
+
* {
|
|
450
|
+
* tools: {
|
|
451
|
+
* getWeather: {
|
|
452
|
+
* description: 'Get the current weather for a city.',
|
|
453
|
+
* parameters: {
|
|
454
|
+
* type: 'object',
|
|
455
|
+
* properties: { city: { type: 'string' } },
|
|
456
|
+
* required: ['city'],
|
|
457
|
+
* },
|
|
458
|
+
* execute: async ({ city }: { city: string }) => fetchWeather(city),
|
|
459
|
+
* },
|
|
460
|
+
* },
|
|
461
|
+
* },
|
|
462
|
+
* );
|
|
463
|
+
* ```
|
|
464
|
+
*
|
|
465
|
+
* @example
|
|
466
|
+
* ```ts
|
|
467
|
+
* // Human-in-the-loop: omit `execute` to gate the call yourself.
|
|
468
|
+
* const res = await generateText(messages, {
|
|
469
|
+
* tools: { deleteAccount: { description: '…', parameters: { type: 'object' } } },
|
|
470
|
+
* });
|
|
471
|
+
* if (res.finishReason === 'tool-calls') {
|
|
472
|
+
* const call = res.toolCalls[0]; // confirm with the user before running
|
|
473
|
+
* }
|
|
474
|
+
* ```
|
|
475
|
+
*/
|
|
476
|
+
export async function generateText(messages, options) {
|
|
477
|
+
if (Platform.OS !== 'ios' && Platform.OS !== 'android') {
|
|
478
|
+
throw new ModelError('DEVICE_NOT_SUPPORTED', '', 'generateText is only available on iOS and Android');
|
|
479
|
+
}
|
|
480
|
+
if (!messages || messages.length === 0) {
|
|
481
|
+
throw new Error('messages array cannot be empty');
|
|
482
|
+
}
|
|
483
|
+
const tools = options?.tools ?? {};
|
|
484
|
+
const toolNames = Object.keys(tools);
|
|
485
|
+
const maxSteps = Math.max(1, options?.maxSteps ?? 5);
|
|
486
|
+
const maxRepairAttempts = Math.max(0, options?.maxRepairAttempts ?? 2);
|
|
487
|
+
// Inject the tool instruction the same way generateObject injects its schema
|
|
488
|
+
// instruction: into the array's system message if present, else via the
|
|
489
|
+
// systemPrompt option. With no tools, this is a plain single-shot generation.
|
|
490
|
+
const instruction = toolNames.length > 0 ? buildToolInstruction(tools) : '';
|
|
491
|
+
const sysIdx = messages.findIndex((m) => m.role === 'system');
|
|
492
|
+
let working;
|
|
493
|
+
let systemPrompt;
|
|
494
|
+
if (instruction === '') {
|
|
495
|
+
working = [...messages];
|
|
496
|
+
systemPrompt = options?.systemPrompt;
|
|
497
|
+
}
|
|
498
|
+
else if (sysIdx >= 0) {
|
|
499
|
+
working = messages.map((m, i) => i === sysIdx ? { role: m.role, content: `${m.content}\n\n${instruction}` } : m);
|
|
500
|
+
systemPrompt = undefined; // the array carries the system message
|
|
501
|
+
}
|
|
502
|
+
else {
|
|
503
|
+
working = [...messages];
|
|
504
|
+
systemPrompt = `${options?.systemPrompt ?? DEFAULT_SYSTEM_PROMPT}\n\n${instruction}`;
|
|
505
|
+
}
|
|
506
|
+
const steps = [];
|
|
507
|
+
const allToolCalls = [];
|
|
508
|
+
const allToolResults = [];
|
|
509
|
+
for (let step = 0; step < maxSteps; step++) {
|
|
510
|
+
// One model round-trip, with an inner repair loop for malformed/invalid calls.
|
|
511
|
+
let call = null;
|
|
512
|
+
let text = '';
|
|
513
|
+
for (let repair = 0;; repair++) {
|
|
514
|
+
const r = await sendMessage(working, { systemPrompt, signal: options?.signal });
|
|
515
|
+
text = r.text;
|
|
516
|
+
if (toolNames.length === 0)
|
|
517
|
+
break; // no tools → this is the final answer
|
|
518
|
+
const parsed = parseToolCall(text, toolNames);
|
|
519
|
+
if (parsed.kind === 'text')
|
|
520
|
+
break; // plain answer, no tool call
|
|
521
|
+
if (parsed.kind === 'unknown-tool') {
|
|
522
|
+
if (repair >= maxRepairAttempts) {
|
|
523
|
+
throw new ModelError('INFERENCE_FAILED', getActiveModel(), `generateText: model called unknown tool "${parsed.toolName}" after ${maxRepairAttempts + 1} attempt(s).`);
|
|
524
|
+
}
|
|
525
|
+
working = [
|
|
526
|
+
...working,
|
|
527
|
+
{ role: 'assistant', content: text },
|
|
528
|
+
{ role: 'user', content: buildUnknownToolRepair(parsed.toolName, toolNames) },
|
|
529
|
+
];
|
|
530
|
+
continue;
|
|
531
|
+
}
|
|
532
|
+
// parsed.kind === 'tool' — validate the proposed args before executing.
|
|
533
|
+
const errors = validateAgainstSchema(parsed.args, tools[parsed.toolName].parameters);
|
|
534
|
+
if (errors.length === 0) {
|
|
535
|
+
call = { toolName: parsed.toolName, args: parsed.args };
|
|
536
|
+
break;
|
|
537
|
+
}
|
|
538
|
+
if (repair >= maxRepairAttempts) {
|
|
539
|
+
throw new ModelError('INFERENCE_FAILED', getActiveModel(), `generateText: arguments for "${parsed.toolName}" failed schema validation after ` +
|
|
540
|
+
`${maxRepairAttempts + 1} attempt(s): ${errors.slice(0, 4).join('; ')}`);
|
|
541
|
+
}
|
|
542
|
+
working = [
|
|
543
|
+
...working,
|
|
544
|
+
{ role: 'assistant', content: text },
|
|
545
|
+
{ role: 'user', content: buildToolArgsRepair(parsed.toolName, errors) },
|
|
546
|
+
];
|
|
547
|
+
}
|
|
548
|
+
// No tool call this step → the model produced its final text answer.
|
|
549
|
+
if (!call) {
|
|
550
|
+
steps.push({ text, toolCalls: [], toolResults: [] });
|
|
551
|
+
return {
|
|
552
|
+
text,
|
|
553
|
+
steps,
|
|
554
|
+
toolCalls: allToolCalls,
|
|
555
|
+
toolResults: allToolResults,
|
|
556
|
+
finishReason: 'stop',
|
|
557
|
+
};
|
|
558
|
+
}
|
|
559
|
+
allToolCalls.push(call);
|
|
560
|
+
const tool = tools[call.toolName];
|
|
561
|
+
// No execute → hand the proposed call back to the caller (human-in-the-loop).
|
|
562
|
+
if (typeof tool.execute !== 'function') {
|
|
563
|
+
steps.push({ text, toolCalls: [call], toolResults: [] });
|
|
564
|
+
return {
|
|
565
|
+
text,
|
|
566
|
+
steps,
|
|
567
|
+
toolCalls: allToolCalls,
|
|
568
|
+
toolResults: allToolResults,
|
|
569
|
+
finishReason: 'tool-calls',
|
|
570
|
+
};
|
|
571
|
+
}
|
|
572
|
+
// Run the tool. A thrown error is fed back as the result so the model can recover.
|
|
573
|
+
let result;
|
|
574
|
+
try {
|
|
575
|
+
result = await tool.execute(call.args);
|
|
576
|
+
}
|
|
577
|
+
catch (e) {
|
|
578
|
+
result = { error: String(e?.message ?? e) };
|
|
579
|
+
}
|
|
580
|
+
const toolResult = { toolName: call.toolName, args: call.args, result };
|
|
581
|
+
allToolResults.push(toolResult);
|
|
582
|
+
steps.push({ text, toolCalls: [call], toolResults: [toolResult] });
|
|
583
|
+
// Feed the call + result back into the conversation for the next step.
|
|
584
|
+
working = [
|
|
585
|
+
...working,
|
|
586
|
+
{ role: 'assistant', content: text },
|
|
587
|
+
{ role: 'user', content: formatToolResult(call.toolName, result) },
|
|
588
|
+
];
|
|
589
|
+
}
|
|
590
|
+
// Step budget exhausted while still calling tools — no final answer was
|
|
591
|
+
// produced. Signal it via finishReason so the caller can raise maxSteps.
|
|
592
|
+
return {
|
|
593
|
+
text: '',
|
|
594
|
+
steps,
|
|
595
|
+
toolCalls: allToolCalls,
|
|
596
|
+
toolResults: allToolResults,
|
|
597
|
+
finishReason: 'max-steps',
|
|
598
|
+
};
|
|
599
|
+
}
|
|
321
600
|
// ============================================================================
|
|
322
601
|
// Model Management API
|
|
323
602
|
// ============================================================================
|