@trymirai/uzu 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +207 -27
- package/package.json +3 -3
- package/uzu.d.ts +17 -7
- package/uzu.node +0 -0
package/README.md
CHANGED
|
@@ -1,38 +1,218 @@
|
|
|
1
|
-
|
|
1
|
+
<p align="center">
|
|
2
|
+
<picture>
|
|
3
|
+
<img alt="Mirai" src="https://artifacts.trymirai.com/social/github/uzu-typescript.jpg" style="max-width: 100%;">
|
|
4
|
+
</picture>
|
|
5
|
+
</p>
|
|
2
6
|
|
|
3
|
-
|
|
7
|
+
<a href="https://artifacts.trymirai.com/social/about_us.mp3"><img src="https://img.shields.io/badge/Listen-Podcast-red" alt="Listen to our podcast"></a>
|
|
8
|
+
<a href="https://docsend.com/v/76bpr/mirai2025"><img src="https://img.shields.io/badge/View-Deck-red" alt="View our deck"></a>
|
|
9
|
+
<a href="mailto:alexey@getmirai.co,dima@getmirai.co,aleksei@getmirai.co?subject=Interested%20in%20Mirai"><img src="https://img.shields.io/badge/Send-Email-green" alt="Contact us"></a>
|
|
10
|
+
<a href="https://docs.trymirai.com/components/inference-engine"><img src="https://img.shields.io/badge/Read-Docs-blue" alt="Read docs"></a>
|
|
11
|
+
[](LICENSE)
|
|
4
12
|
|
|
5
|
-
|
|
6
|
-
## Prerequisites
|
|
13
|
+
# uzu-ts
|
|
7
14
|
|
|
8
|
-
|
|
9
|
-
```bash
|
|
10
|
-
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
|
11
|
-
```
|
|
12
|
-
2. **Node.js**
|
|
13
|
-
```bash
|
|
14
|
-
brew install node
|
|
15
|
-
```
|
|
16
|
-
3. **pnpm** package manager –
|
|
17
|
-
```bash
|
|
18
|
-
brew install pnpm
|
|
19
|
-
```
|
|
15
|
+
Node package for [uzu](https://github.com/trymirai/uzu), a **high-performance** inference engine for AI models on Apple Silicon. It allows you to deploy AI directly in your app with **zero latency**, **full data privacy**, and **no inference costs**. You don’t need an ML team or weeks of setup - one developer can handle everything in minutes. Key features:
|
|
20
16
|
|
|
21
|
-
|
|
17
|
+
- Simple, high-level API
|
|
18
|
+
- Specialized configurations with significant performance boosts for common use cases like classification and summarization
|
|
19
|
+
- [Broad model support](https://trymirai.com/models)
|
|
20
|
+
- Observable model manager
|
|
22
21
|
|
|
23
|
-
|
|
24
|
-
## Getting started
|
|
22
|
+
## Quick Start
|
|
25
23
|
|
|
26
24
|
```bash
|
|
27
|
-
#
|
|
28
|
-
cd bindings/ts
|
|
29
|
-
pnpm install
|
|
30
|
-
|
|
31
|
-
# 2 / Build the native addon in release mode
|
|
32
|
-
pnpm run build
|
|
33
|
-
|
|
34
|
-
# 3 / Set your API key in `examples/api_key.ts`, then run the examples
|
|
25
|
+
# Set your API key in `examples/api_key.ts`, then run the examples
|
|
35
26
|
pnpm run chat
|
|
36
27
|
pnpm run summarize
|
|
37
28
|
pnpm run classify
|
|
38
29
|
```
|
|
30
|
+
|
|
31
|
+
### Setup
|
|
32
|
+
|
|
33
|
+
Add the `uzu-ts` dependency to your project:
|
|
34
|
+
|
|
35
|
+
Set up your project via [Platform](https://platform.trymirai.com), obtain an `API_KEY`, and initialize engine:
|
|
36
|
+
|
|
37
|
+
```ts
|
|
38
|
+
import { Engine } from './uzu'
|
|
39
|
+
|
|
40
|
+
const engine = new Engine()
|
|
41
|
+
const licenseStatus = await engine.activate('API_KEY')
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Refresh models registry / list cloud models:
|
|
45
|
+
|
|
46
|
+
```ts
|
|
47
|
+
const registry = await engine.updateRegistry()
|
|
48
|
+
const modelIdentifiers = registry.map((m) => m.identifier)
|
|
49
|
+
|
|
50
|
+
// To explore available cloud models:
|
|
51
|
+
const cloudModels = await engine.getCloudModels()
|
|
52
|
+
console.log('Cloud models:', cloudModels)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Download with progress handle
|
|
56
|
+
|
|
57
|
+
```ts
|
|
58
|
+
const modelIdentifier = 'Meta-Llama-3.2-1B-Instruct-bfloat16'
|
|
59
|
+
|
|
60
|
+
const handle = engine.downloadHandle(modelIdentifier)
|
|
61
|
+
handle.start()
|
|
62
|
+
|
|
63
|
+
for await (const downloadProgress of handle.progress()) {
|
|
64
|
+
console.log(`Progress: ${Math.round(downloadProgress.progress * 100)}%`)
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Alternatively, you may use engine to control and observe model download:
|
|
69
|
+
|
|
70
|
+
```ts
|
|
71
|
+
engine.download(modelIdentifier)
|
|
72
|
+
engine.pause(modelIdentifier)
|
|
73
|
+
engine.resume(modelIdentifier)
|
|
74
|
+
engine.delete(modelIdentifier)
|
|
75
|
+
|
|
76
|
+
// ... later you can query state
|
|
77
|
+
const state = engine.getState(modelIdentifier)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Possible model state values:
|
|
81
|
+
|
|
82
|
+
- `.notDownloaded`
|
|
83
|
+
- `.downloading(progress: Double)`
|
|
84
|
+
- `.paused(progress: Double)`
|
|
85
|
+
- `.downloaded`
|
|
86
|
+
- `.error(message: String)`
|
|
87
|
+
|
|
88
|
+
### Session
|
|
89
|
+
|
|
90
|
+
`Session` is the core entity used to communicate with the model:
|
|
91
|
+
|
|
92
|
+
```ts
|
|
93
|
+
import { type ModelID } from './uzu'
|
|
94
|
+
|
|
95
|
+
const modelIdentifier = 'Meta-Llama-3.2-1B-Instruct-bfloat16'
|
|
96
|
+
|
|
97
|
+
// Choose one of the two options below by commenting/uncommenting:
|
|
98
|
+
// 1) Local model (default)
|
|
99
|
+
// const modelId: ModelID = { type: 'Local', id: modelIdentifier }
|
|
100
|
+
|
|
101
|
+
// 2) Cloud model (uncomment and set your repoId; you can list with engine.getCloudModels())
|
|
102
|
+
const cloudRepoId = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct'
|
|
103
|
+
const modelId: ModelID = { type: 'Cloud', id: cloudRepoId }
|
|
104
|
+
|
|
105
|
+
const session = engine.createSession(modelId)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
`Session` offers different configuration presets that can provide significant performance boosts for common use cases like classification and summarization:
|
|
109
|
+
|
|
110
|
+
```ts
|
|
111
|
+
import { type SessionConfig } from './uzu'
|
|
112
|
+
|
|
113
|
+
const config: SessionConfig = {
|
|
114
|
+
preset: { type: 'General' },
|
|
115
|
+
samplingSeed: { type: 'Default' },
|
|
116
|
+
contextLength: { type: 'Default' },
|
|
117
|
+
}
|
|
118
|
+
session.load(config)
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Once loaded, the same `Session` can be reused for multiple requests until you drop it. Each model may consume a significant amount of RAM, so it's important to keep only one session loaded at a time.
|
|
122
|
+
|
|
123
|
+
### Inference
|
|
124
|
+
|
|
125
|
+
After loading, you can run the `Session` with a specific prompt or a list of messages:
|
|
126
|
+
|
|
127
|
+
```ts
|
|
128
|
+
import { SessionMessageRole, type SessionInput } from './uzu'
|
|
129
|
+
|
|
130
|
+
const input: SessionInput = {
|
|
131
|
+
type: 'Messages',
|
|
132
|
+
messages: [
|
|
133
|
+
{ role: SessionMessageRole.System, content: 'You are a helpful assistant' },
|
|
134
|
+
{ role: SessionMessageRole.User, content: 'Tell about London' },
|
|
135
|
+
],
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const output = session.run(
|
|
139
|
+
input,
|
|
140
|
+
{ tokensLimit: 128, samplingConfig: { type: 'Argmax' } },
|
|
141
|
+
(partialOutput) => {
|
|
142
|
+
// Access the current text using partialOutput.text
|
|
143
|
+
return true // Return true to continue generation
|
|
144
|
+
},
|
|
145
|
+
)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
`SessionOutput` also includes generation metrics such as prefill duration and tokens per second. It’s important to note that you should run a **release** build to obtain accurate metrics.
|
|
149
|
+
|
|
150
|
+
### Presets
|
|
151
|
+
|
|
152
|
+
#### Summarization
|
|
153
|
+
|
|
154
|
+
In this example, we will extract a summary of the input text:
|
|
155
|
+
|
|
156
|
+
```ts
|
|
157
|
+
import { type SessionConfig, type SessionInput } from './uzu'
|
|
158
|
+
|
|
159
|
+
const textToSummarize =
|
|
160
|
+
'A Large Language Model (LLM) is a type of artificial intelligence that processes and generates human-like text. It is trained on vast datasets containing books, articles, and web content, allowing it to understand and predict language patterns. LLMs use deep learning, particularly transformer-based architectures, to analyze text, recognize context, and generate coherent responses. These models have a wide range of applications, including chatbots, content creation, translation, and code generation. One of the key strengths of LLMs is their ability to generate contextually relevant text based on prompts. They utilize self-attention mechanisms to weigh the importance of words within a sentence, improving accuracy and fluency. Examples of popular LLMs include OpenAI's GPT series, Google's BERT, and Meta's LLaMA. As these models grow in size and sophistication, they continue to enhance human-computer interactions, making AI-powered communication more natural and effective.'
|
|
161
|
+
const text = `Text is: "${textToSummarize}". Write only summary itself.`
|
|
162
|
+
|
|
163
|
+
const config: SessionConfig = {
|
|
164
|
+
preset: { type: 'Summarization' },
|
|
165
|
+
samplingSeed: { type: 'Default' },
|
|
166
|
+
contextLength: { type: 'Default' },
|
|
167
|
+
}
|
|
168
|
+
session.load(config)
|
|
169
|
+
|
|
170
|
+
const input: SessionInput = { type: 'Text', text }
|
|
171
|
+
|
|
172
|
+
const output = session.run(
|
|
173
|
+
input,
|
|
174
|
+
{ tokensLimit: 1024, samplingConfig: { type: 'Argmax' } },
|
|
175
|
+
() => true,
|
|
176
|
+
)
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
This will generate 34 output tokens with only 5 model runs during the generation phase, instead of 34 runs.
|
|
180
|
+
|
|
181
|
+
#### Classification
|
|
182
|
+
|
|
183
|
+
Let’s look at a case where you need to classify input text based on a specific feature, such as `sentiment`:
|
|
184
|
+
|
|
185
|
+
```ts
|
|
186
|
+
import { type SessionClassificationFeature, type SessionConfig, type SessionInput } from './uzu'
|
|
187
|
+
|
|
188
|
+
const feature: SessionClassificationFeature = {
|
|
189
|
+
name: 'sentiment',
|
|
190
|
+
values: ['Happy', 'Sad', 'Angry', 'Fearful', 'Surprised', 'Disgusted'],
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const textToDetectFeature = "Today's been awesome! Everything just feels right, and I can't stop smiling."
|
|
194
|
+
const text = `Text is: "${textToDetectFeature}". Choose ${feature.name} from the list: ${feature.values.join(
|
|
195
|
+
', ',
|
|
196
|
+
)}. Answer with one word. Don't add a dot at the end.`
|
|
197
|
+
|
|
198
|
+
const config: SessionConfig = {
|
|
199
|
+
preset: { type: 'Classification', feature },
|
|
200
|
+
samplingSeed: { type: 'Default' },
|
|
201
|
+
contextLength: { type: 'Default' },
|
|
202
|
+
}
|
|
203
|
+
session.load(config)
|
|
204
|
+
|
|
205
|
+
const input: SessionInput = { type: 'Text', text }
|
|
206
|
+
|
|
207
|
+
const output = session.run(
|
|
208
|
+
input,
|
|
209
|
+
{ tokensLimit: 32, samplingConfig: { type: 'Argmax' } },
|
|
210
|
+
() => true,
|
|
211
|
+
)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
In this example, you will get the answer `Happy` immediately after the prefill step, and the actual generation won't even start.
|
|
215
|
+
|
|
216
|
+
## License
|
|
217
|
+
|
|
218
|
+
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
package/package.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@trymirai/uzu",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.4",
|
|
4
4
|
"private": false,
|
|
5
5
|
"main": "uzu.node",
|
|
6
6
|
"types": "uzu.d.ts",
|
|
7
7
|
"packageManager": "pnpm@10.14.0",
|
|
8
8
|
"scripts": {
|
|
9
|
-
"build": "./build.sh",
|
|
10
9
|
"chat": "ts-node examples/chat_example.ts",
|
|
11
10
|
"summarize": "ts-node examples/summarization_example.ts",
|
|
12
|
-
"classify": "ts-node examples/classification_example.ts"
|
|
11
|
+
"classify": "ts-node examples/classification_example.ts",
|
|
12
|
+
"cloud:list": "ts-node examples/list_cloud_models.ts"
|
|
13
13
|
},
|
|
14
14
|
"devDependencies": {
|
|
15
15
|
"@napi-rs/cli": "3.1.2",
|
package/uzu.d.ts
CHANGED
|
@@ -13,19 +13,21 @@ export declare class DownloadHandle {
|
|
|
13
13
|
|
|
14
14
|
/** Thin FFI wrapper around `ModelStorage`. */
|
|
15
15
|
export declare class Engine {
|
|
16
|
+
getCloudModels(): Promise<Array<CloudModel>>
|
|
17
|
+
fetchCloudModels(): Promise<Array<CloudModel>>
|
|
16
18
|
/** Returns a `DownloadHandle` for the given model identifier. */
|
|
17
19
|
downloadHandle(identifier: string): DownloadHandle
|
|
18
|
-
|
|
20
|
+
getLocalModels(): Array<LocalModel>
|
|
19
21
|
getState(identifier: string): ModelDownloadState
|
|
20
22
|
download(identifier: string): void
|
|
21
23
|
pause(identifier: string): void
|
|
22
24
|
resume(identifier: string): void
|
|
23
25
|
stop(identifier: string): void
|
|
24
26
|
delete(identifier: string): void
|
|
25
|
-
updateRegistry(): Promise<Array<
|
|
27
|
+
updateRegistry(): Promise<Array<LocalModel>>
|
|
26
28
|
activate(apiKey: string): Promise<LicenseStatus>
|
|
27
29
|
constructor()
|
|
28
|
-
createSession(modelId:
|
|
30
|
+
createSession(modelId: ModelID): Session
|
|
29
31
|
}
|
|
30
32
|
|
|
31
33
|
export declare class ProgressStream {
|
|
@@ -42,12 +44,16 @@ export declare class ProgressUpdate {
|
|
|
42
44
|
}
|
|
43
45
|
|
|
44
46
|
export declare class Session {
|
|
45
|
-
constructor(modelDir: string)
|
|
46
|
-
/** Loads the model configuration. Must be called before `run`. */
|
|
47
47
|
load(config: SessionConfig): void
|
|
48
48
|
run(input: SessionInput, runConfig: SessionRunConfig, progressCallback?: (arg: SessionOutput) => boolean | undefined | null): SessionOutput
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
+
export interface CloudModel {
|
|
52
|
+
readonly repoId: string
|
|
53
|
+
readonly name: string
|
|
54
|
+
readonly vendor: string
|
|
55
|
+
}
|
|
56
|
+
|
|
51
57
|
export type ContextLength =
|
|
52
58
|
| { type: 'Default' }
|
|
53
59
|
| { type: 'Custom', length: number }
|
|
@@ -78,7 +84,7 @@ export type LicenseStatus =
|
|
|
78
84
|
| { type: 'Timeout' }
|
|
79
85
|
| { type: 'HttpError', code: number }
|
|
80
86
|
|
|
81
|
-
export interface
|
|
87
|
+
export interface LocalModel {
|
|
82
88
|
/** Unique identifier of the model in the form `<vendor>-<name>-<precision>`. */
|
|
83
89
|
readonly identifier: string
|
|
84
90
|
/** Vendor/author of the model (e.g. "Llama"). */
|
|
@@ -104,6 +110,10 @@ export interface ModelDownloadState {
|
|
|
104
110
|
readonly error?: string
|
|
105
111
|
}
|
|
106
112
|
|
|
113
|
+
export type ModelID =
|
|
114
|
+
| { type: 'Local', id: string }
|
|
115
|
+
| { type: 'Cloud', id: string }
|
|
116
|
+
|
|
107
117
|
export declare const enum Phase {
|
|
108
118
|
NotDownloaded = 0,
|
|
109
119
|
Downloading = 1,
|
|
@@ -187,7 +197,7 @@ export interface SessionOutputTotalStats {
|
|
|
187
197
|
|
|
188
198
|
export type SessionPreset =
|
|
189
199
|
| { type: 'General' }
|
|
190
|
-
| { type: 'Classification',
|
|
200
|
+
| { type: 'Classification', feature: SessionClassificationFeature }
|
|
191
201
|
| { type: 'Summarization' }
|
|
192
202
|
|
|
193
203
|
export interface SessionRunConfig {
|
package/uzu.node
CHANGED
|
Binary file
|