@trymirai/uzu 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +105 -119
- package/README.orig.md +215 -0
- package/README.src.md +146 -0
- package/package.json +9 -6
- package/uzu.node +0 -0
package/README.md
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
<a href="https://docsend.com/v/76bpr/mirai2025"><img src="https://img.shields.io/badge/View-Deck-red" alt="View our deck"></a>
|
|
9
9
|
<a href="mailto:alexey@getmirai.co,dima@getmirai.co,aleksei@getmirai.co?subject=Interested%20in%20Mirai"><img src="https://img.shields.io/badge/Send-Email-green" alt="Contact us"></a>
|
|
10
10
|
<a href="https://docs.trymirai.com/components/inference-engine"><img src="https://img.shields.io/badge/Read-Docs-blue" alt="Read docs"></a>
|
|
11
|
+
[](https://www.npmjs.com/package/@trymirai/uzu)
|
|
11
12
|
[](LICENSE)
|
|
12
13
|
|
|
13
14
|
# uzu-ts
|
|
@@ -19,200 +20,185 @@ Node package for [uzu](https://github.com/trymirai/uzu), a **high-performance**
|
|
|
19
20
|
- [Broad model support](https://trymirai.com/models)
|
|
20
21
|
- Observable model manager
|
|
21
22
|
|
|
22
|
-
##
|
|
23
|
+
## Setup
|
|
23
24
|
|
|
24
|
-
|
|
25
|
-
# Set your API key in `examples/api_key.ts`, then run the examples
|
|
26
|
-
pnpm run chat
|
|
27
|
-
pnpm run summarize
|
|
28
|
-
pnpm run classify
|
|
29
|
-
```
|
|
30
|
-
|
|
31
|
-
### Setup
|
|
25
|
+
Add the `uzu` dependency to your project's `package.json`:
|
|
32
26
|
|
|
33
|
-
|
|
27
|
+
```json
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"@trymirai/uzu": "0.1.4"
|
|
30
|
+
}
|
|
31
|
+
```
|
|
34
32
|
|
|
35
|
-
|
|
33
|
+
Create and activate engine:
|
|
36
34
|
|
|
37
35
|
```ts
|
|
38
|
-
import { Engine } from './uzu'
|
|
39
|
-
|
|
40
36
|
const engine = new Engine()
|
|
41
|
-
const licenseStatus = await engine.activate(
|
|
37
|
+
const licenseStatus: LicenseStatus = await engine.activate(apiKey)
|
|
42
38
|
```
|
|
43
39
|
|
|
44
40
|
### Refresh models registry / list cloud models:
|
|
45
41
|
|
|
46
42
|
```ts
|
|
47
|
-
const
|
|
48
|
-
const
|
|
49
|
-
|
|
50
|
-
// To explore available cloud models:
|
|
51
|
-
const cloudModels = await engine.getCloudModels()
|
|
52
|
-
console.log('Cloud models:', cloudModels)
|
|
43
|
+
const localModels: LocalModel[] = await engine.updateRegistry()
|
|
44
|
+
const localModelId = 'Meta-Llama-3.2-1B-Instruct-bfloat16'
|
|
53
45
|
```
|
|
54
46
|
|
|
55
47
|
### Download with progress handle
|
|
56
48
|
|
|
57
49
|
```ts
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
const handle = engine.downloadHandle(modelIdentifier)
|
|
61
|
-
handle.start()
|
|
50
|
+
const donwloadHandle = engine.downloadHandle(localModelId)
|
|
51
|
+
donwloadHandle.start()
|
|
62
52
|
|
|
63
|
-
for await (const
|
|
64
|
-
|
|
53
|
+
for await (const donwloadProgress of donwloadHandle.progress()) {
|
|
54
|
+
handleProgress(donwloadProgress)
|
|
65
55
|
}
|
|
66
56
|
```
|
|
67
57
|
|
|
68
58
|
Alternatively, you may use engine to control and observe model download:
|
|
69
59
|
|
|
70
60
|
```ts
|
|
71
|
-
engine.download(
|
|
72
|
-
engine.pause(
|
|
73
|
-
engine.resume(
|
|
74
|
-
engine.
|
|
61
|
+
// engine.download(localModelId)
|
|
62
|
+
// engine.pause(localModelId)
|
|
63
|
+
// engine.resume(localModelId)
|
|
64
|
+
// engine.stop(localModelId)
|
|
65
|
+
// engine.delete(localModelId)
|
|
75
66
|
|
|
76
|
-
|
|
77
|
-
const state = engine.getState(modelIdentifier)
|
|
67
|
+
const modelState: ModelDownloadState = engine.getState(localModelId)
|
|
78
68
|
```
|
|
79
69
|
|
|
80
|
-
Possible model state values:
|
|
81
|
-
|
|
82
|
-
- `.notDownloaded`
|
|
83
|
-
- `.downloading(progress: Double)`
|
|
84
|
-
- `.paused(progress: Double)`
|
|
85
|
-
- `.downloaded`
|
|
86
|
-
- `.error(message: String)`
|
|
87
|
-
|
|
88
70
|
### Session
|
|
89
71
|
|
|
90
72
|
`Session` is the core entity used to communicate with the model:
|
|
91
73
|
|
|
92
74
|
```ts
|
|
93
|
-
import { type ModelID } from './uzu'
|
|
94
|
-
|
|
95
|
-
const modelIdentifier = 'Meta-Llama-3.2-1B-Instruct-bfloat16'
|
|
96
|
-
|
|
97
75
|
// Choose one of the two options below by commenting/uncommenting:
|
|
98
|
-
//
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
// 2) Cloud model (uncomment and set your repoId; you can list with engine.getCloudModels())
|
|
102
|
-
const cloudRepoId = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct'
|
|
103
|
-
const modelId: ModelID = { type: 'Cloud', id: cloudRepoId }
|
|
104
|
-
|
|
105
|
-
const session = engine.createSession(modelId)
|
|
76
|
+
// const modelId: ModelID = { type: 'Cloud', id: cloudRepoId }
|
|
77
|
+
const modelId: ModelID = { type: 'Local', id: localModelId }
|
|
78
|
+
const session: Session = engine.createSession(modelId)
|
|
106
79
|
```
|
|
107
80
|
|
|
108
|
-
|
|
81
|
+
### Chat
|
|
109
82
|
|
|
110
|
-
|
|
111
|
-
import { type SessionConfig } from './uzu'
|
|
83
|
+
Load `Session` with a chat-configured config and run it with a specific prompt or a list of messages:
|
|
112
84
|
|
|
85
|
+
```ts
|
|
113
86
|
const config: SessionConfig = {
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
87
|
+
preset: { type: 'General' },
|
|
88
|
+
samplingSeed: { type: 'Custom', seed: 12345 },
|
|
89
|
+
contextLength: { type: 'Default' },
|
|
117
90
|
}
|
|
118
91
|
session.load(config)
|
|
119
92
|
```
|
|
120
93
|
|
|
121
|
-
Once loaded, the same `Session` can be reused for multiple requests until you drop it. Each model may consume a significant amount of RAM, so it's important to keep only one session loaded at a time.
|
|
122
|
-
|
|
123
|
-
### Inference
|
|
124
|
-
|
|
125
|
-
After loading, you can run the `Session` with a specific prompt or a list of messages:
|
|
126
|
-
|
|
127
94
|
```ts
|
|
128
|
-
import { SessionMessageRole, type SessionInput } from './uzu'
|
|
129
|
-
|
|
130
95
|
const input: SessionInput = {
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
96
|
+
type: 'Messages',
|
|
97
|
+
messages: [
|
|
98
|
+
{
|
|
99
|
+
role: SessionMessageRole.System,
|
|
100
|
+
content: 'You are a helpful assistant.'
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
role: SessionMessageRole.User,
|
|
104
|
+
content: 'Tell me a short, funny story about a robot.'
|
|
105
|
+
},
|
|
106
|
+
],
|
|
136
107
|
}
|
|
137
|
-
|
|
138
|
-
const output = session.run(
|
|
139
|
-
input,
|
|
140
|
-
{ tokensLimit: 128, samplingConfig: { type: 'Argmax' } },
|
|
141
|
-
(partialOutput) => {
|
|
142
|
-
// Access the current text using partialOutput.text
|
|
143
|
-
return true // Return true to continue generation
|
|
144
|
-
},
|
|
145
|
-
)
|
|
146
108
|
```
|
|
147
109
|
|
|
148
|
-
|
|
110
|
+
```ts
|
|
111
|
+
const runConfig: SessionRunConfig = {
|
|
112
|
+
tokensLimit: 128,
|
|
113
|
+
samplingConfig: { type: 'Argmax' },
|
|
114
|
+
}
|
|
115
|
+
```
|
|
149
116
|
|
|
150
|
-
|
|
117
|
+
```ts
|
|
118
|
+
const output = session.run(input, runConfig, (partialOutput) => {
|
|
119
|
+
return handlePartialOutput(partialOutput)
|
|
120
|
+
})
|
|
121
|
+
```
|
|
151
122
|
|
|
152
|
-
|
|
123
|
+
### Summarization
|
|
153
124
|
|
|
154
125
|
In this example, we will extract a summary of the input text:
|
|
155
126
|
|
|
156
127
|
```ts
|
|
157
|
-
import { type SessionConfig, type SessionInput } from './uzu'
|
|
158
|
-
|
|
159
|
-
const textToSummarize =
|
|
160
|
-
'A Large Language Model (LLM) is a type of artificial intelligence that processes and generates human-like text. It is trained on vast datasets containing books, articles, and web content, allowing it to understand and predict language patterns. LLMs use deep learning, particularly transformer-based architectures, to analyze text, recognize context, and generate coherent responses. These models have a wide range of applications, including chatbots, content creation, translation, and code generation. One of the key strengths of LLMs is their ability to generate contextually relevant text based on prompts. They utilize self-attention mechanisms to weigh the importance of words within a sentence, improving accuracy and fluency. Examples of popular LLMs include OpenAI's GPT series, Google's BERT, and Meta's LLaMA. As these models grow in size and sophistication, they continue to enhance human-computer interactions, making AI-powered communication more natural and effective.'
|
|
161
|
-
const text = `Text is: "${textToSummarize}". Write only summary itself.`
|
|
162
|
-
|
|
163
128
|
const config: SessionConfig = {
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
129
|
+
preset: { type: 'Summarization' },
|
|
130
|
+
samplingSeed: { type: 'Default' },
|
|
131
|
+
contextLength: { type: 'Default' },
|
|
167
132
|
}
|
|
168
133
|
session.load(config)
|
|
134
|
+
```
|
|
169
135
|
|
|
170
|
-
|
|
136
|
+
```ts
|
|
137
|
+
const input: SessionInput = {
|
|
138
|
+
type: 'Text',
|
|
139
|
+
text: `Text is: "${textToSummarize}". Write only summary itself.`,
|
|
140
|
+
}
|
|
141
|
+
```
|
|
171
142
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
143
|
+
```ts
|
|
144
|
+
const runConfig: SessionRunConfig = {
|
|
145
|
+
tokensLimit: 256,
|
|
146
|
+
samplingConfig: { type: 'Argmax' },
|
|
147
|
+
}
|
|
177
148
|
```
|
|
178
149
|
|
|
179
|
-
|
|
150
|
+
```ts
|
|
151
|
+
const output = session.run(input, runConfig, (partialOutput) => {
|
|
152
|
+
return handlePartialOutput(partialOutput)
|
|
153
|
+
})
|
|
154
|
+
```
|
|
180
155
|
|
|
181
|
-
|
|
156
|
+
### Classification
|
|
182
157
|
|
|
183
158
|
Let’s look at a case where you need to classify input text based on a specific feature, such as `sentiment`:
|
|
184
159
|
|
|
185
160
|
```ts
|
|
186
|
-
import { type SessionClassificationFeature, type SessionConfig, type SessionInput } from './uzu'
|
|
187
|
-
|
|
188
161
|
const feature: SessionClassificationFeature = {
|
|
189
|
-
|
|
190
|
-
|
|
162
|
+
name: 'sentiment',
|
|
163
|
+
values: ['Happy', 'Sad', 'Angry', 'Fearful', 'Surprised', 'Disgusted'],
|
|
191
164
|
}
|
|
165
|
+
```
|
|
192
166
|
|
|
193
|
-
|
|
194
|
-
const text = `Text is: "${textToDetectFeature}". Choose ${feature.name} from the list: ${feature.values.join(
|
|
195
|
-
', ',
|
|
196
|
-
)}. Answer with one word. Don't add a dot at the end.`
|
|
197
|
-
|
|
167
|
+
```ts
|
|
198
168
|
const config: SessionConfig = {
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
169
|
+
preset: { type: 'Classification', feature },
|
|
170
|
+
samplingSeed: { type: 'Default' },
|
|
171
|
+
contextLength: { type: 'Default' },
|
|
202
172
|
}
|
|
203
173
|
session.load(config)
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
```ts
|
|
177
|
+
const textToDetectFeature =
|
|
178
|
+
"Today's been awesome! Everything just feels right, and I can't stop smiling."
|
|
179
|
+
const classificationPrompt =
|
|
180
|
+
`Text is: "${textToDetectFeature}". Choose ${feature.name} from the list: ${feature.values.join(', ')}. ` +
|
|
181
|
+
"Answer with one word. Don't add a dot at the end."
|
|
182
|
+
const input: SessionInput = { type: 'Text', text: classificationPrompt }
|
|
183
|
+
```
|
|
204
184
|
|
|
205
|
-
|
|
185
|
+
```ts
|
|
186
|
+
const runConfig: SessionRunConfig = {
|
|
187
|
+
tokensLimit: 32,
|
|
188
|
+
samplingConfig: { type: 'Argmax' },
|
|
189
|
+
}
|
|
190
|
+
```
|
|
206
191
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
)
|
|
192
|
+
```ts
|
|
193
|
+
const output = session.run(input, runConfig, (partialOutput) => {
|
|
194
|
+
return handlePartialOutput(partialOutput)
|
|
195
|
+
})
|
|
212
196
|
```
|
|
213
197
|
|
|
214
198
|
In this example, you will get the answer `Happy` immediately after the prefill step, and the actual generation won't even start.
|
|
215
199
|
|
|
216
200
|
## License
|
|
217
201
|
|
|
218
|
-
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
|
202
|
+
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
|
203
|
+
|
|
204
|
+
|
package/README.orig.md
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<picture>
|
|
3
|
+
<img alt="Mirai" src="https://artifacts.trymirai.com/social/github/uzu-typescript.jpg" style="max-width: 100%;">
|
|
4
|
+
</picture>
|
|
5
|
+
</p>
|
|
6
|
+
|
|
7
|
+
<a href="https://artifacts.trymirai.com/social/about_us.mp3"><img src="https://img.shields.io/badge/Listen-Podcast-red" alt="Listen to our podcast"></a>
|
|
8
|
+
<a href="https://docsend.com/v/76bpr/mirai2025"><img src="https://img.shields.io/badge/View-Deck-red" alt="View our deck"></a>
|
|
9
|
+
<a href="mailto:alexey@getmirai.co,dima@getmirai.co,aleksei@getmirai.co?subject=Interested%20in%20Mirai"><img src="https://img.shields.io/badge/Send-Email-green" alt="Contact us"></a>
|
|
10
|
+
<a href="https://docs.trymirai.com/components/inference-engine"><img src="https://img.shields.io/badge/Read-Docs-blue" alt="Read docs"></a>
|
|
11
|
+
[](https://www.npmjs.com/package/@trymirai/uzu)
|
|
12
|
+
[](LICENSE)
|
|
13
|
+
|
|
14
|
+
# uzu-ts
|
|
15
|
+
|
|
16
|
+
Node package for [uzu](https://github.com/trymirai/uzu), a **high-performance** inference engine for AI models on Apple Silicon. It allows you to deploy AI directly in your app with **zero latency**, **full data privacy**, and **no inference costs**. You don’t need an ML team or weeks of setup - one developer can handle everything in minutes. Key features:
|
|
17
|
+
|
|
18
|
+
- Simple, high-level API
|
|
19
|
+
- Specialized configurations with significant performance boosts for common use cases like classification and summarization
|
|
20
|
+
- [Broad model support](https://trymirai.com/models)
|
|
21
|
+
- Observable model manager
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
Set up your project via [Platform](https://platform.trymirai.com), obtain an `MIRAI_API_KEY`.
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# Set your API key in `examples/api_key.ts`, then run the examples
|
|
29
|
+
pnpm run chat
|
|
30
|
+
pnpm run summarisation
|
|
31
|
+
pnpm run classification
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Setup
|
|
35
|
+
|
|
36
|
+
Add the `uzu` dependency to your project's `package.json`:
|
|
37
|
+
|
|
38
|
+
```json
|
|
39
|
+
"dependencies": {
|
|
40
|
+
"@trymirai/uzu": "0.1.4"
|
|
41
|
+
}
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Create and activate engine:
|
|
45
|
+
|
|
46
|
+
```ts
|
|
47
|
+
const engine = new Engine()
|
|
48
|
+
const licenseStatus: LicenseStatus = await engine.activate(apiKey)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Refresh models registry / list cloud models:
|
|
52
|
+
|
|
53
|
+
```ts
|
|
54
|
+
const localModels: LocalModel[] = await engine.updateRegistry()
|
|
55
|
+
const localModelId = 'Meta-Llama-3.2-1B-Instruct-bfloat16'
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Download with progress handle
|
|
59
|
+
|
|
60
|
+
```ts
|
|
61
|
+
const donwloadHandle = engine.downloadHandle(localModelId)
|
|
62
|
+
donwloadHandle.start()
|
|
63
|
+
|
|
64
|
+
for await (const donwloadProgress of donwloadHandle.progress()) {
|
|
65
|
+
handleProgress(donwloadProgress)
|
|
66
|
+
}
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Alternatively, you may use engine to control and observe model download:
|
|
70
|
+
|
|
71
|
+
```ts
|
|
72
|
+
// engine.download(localModelId)
|
|
73
|
+
// engine.pause(localModelId)
|
|
74
|
+
// engine.resume(localModelId)
|
|
75
|
+
// engine.stop(localModelId)
|
|
76
|
+
// engine.delete(localModelId)
|
|
77
|
+
|
|
78
|
+
const modelState: ModelDownloadState = engine.getState(localModelId)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Session
|
|
82
|
+
|
|
83
|
+
`Session` is the core entity used to communicate with the model:
|
|
84
|
+
|
|
85
|
+
```ts
|
|
86
|
+
// Choose one of the two options below by commenting/uncommenting:
|
|
87
|
+
// const modelId: ModelID = { type: 'Cloud', id: cloudRepoId }
|
|
88
|
+
const modelId: ModelID = { type: 'Local', id: localModelId }
|
|
89
|
+
const session: Session = engine.createSession(modelId)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Chat
|
|
93
|
+
|
|
94
|
+
Load `Session` with a chat-configured config and run it with a specific prompt or a list of messages:
|
|
95
|
+
|
|
96
|
+
```ts
|
|
97
|
+
const config: SessionConfig = {
|
|
98
|
+
preset: { type: 'General' },
|
|
99
|
+
samplingSeed: { type: 'Custom', seed: 12345 },
|
|
100
|
+
contextLength: { type: 'Default' },
|
|
101
|
+
}
|
|
102
|
+
session.load(config)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
```ts
|
|
106
|
+
const input: SessionInput = {
|
|
107
|
+
type: 'Messages',
|
|
108
|
+
messages: [
|
|
109
|
+
{
|
|
110
|
+
role: SessionMessageRole.System,
|
|
111
|
+
content: 'You are a helpful assistant.'
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
role: SessionMessageRole.User,
|
|
115
|
+
content: 'Tell me a short, funny story about a robot.'
|
|
116
|
+
},
|
|
117
|
+
],
|
|
118
|
+
}
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
```ts
|
|
122
|
+
const runConfig: SessionRunConfig = {
|
|
123
|
+
tokensLimit: 128,
|
|
124
|
+
samplingConfig: { type: 'Argmax' },
|
|
125
|
+
}
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
```ts
|
|
129
|
+
const output = session.run(input, runConfig, (partialOutput) => {
|
|
130
|
+
return handlePartialOutput(partialOutput)
|
|
131
|
+
})
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Summarization
|
|
135
|
+
|
|
136
|
+
In this example, we will extract a summary of the input text:
|
|
137
|
+
|
|
138
|
+
```ts
|
|
139
|
+
const config: SessionConfig = {
|
|
140
|
+
preset: { type: 'Summarization' },
|
|
141
|
+
samplingSeed: { type: 'Default' },
|
|
142
|
+
contextLength: { type: 'Default' },
|
|
143
|
+
}
|
|
144
|
+
session.load(config)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
```ts
|
|
148
|
+
const input: SessionInput = {
|
|
149
|
+
type: 'Text',
|
|
150
|
+
text: `Text is: "${textToSummarize}". Write only summary itself.`,
|
|
151
|
+
}
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
```ts
|
|
155
|
+
const runConfig: SessionRunConfig = {
|
|
156
|
+
tokensLimit: 256,
|
|
157
|
+
samplingConfig: { type: 'Argmax' },
|
|
158
|
+
}
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
```ts
|
|
162
|
+
const output = session.run(input, runConfig, (partialOutput) => {
|
|
163
|
+
return handlePartialOutput(partialOutput)
|
|
164
|
+
})
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Classification
|
|
168
|
+
|
|
169
|
+
Let’s look at a case where you need to classify input text based on a specific feature, such as `sentiment`:
|
|
170
|
+
|
|
171
|
+
```ts
|
|
172
|
+
const feature: SessionClassificationFeature = {
|
|
173
|
+
name: 'sentiment',
|
|
174
|
+
values: ['Happy', 'Sad', 'Angry', 'Fearful', 'Surprised', 'Disgusted'],
|
|
175
|
+
}
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
```ts
|
|
179
|
+
const config: SessionConfig = {
|
|
180
|
+
preset: { type: 'Classification', feature },
|
|
181
|
+
samplingSeed: { type: 'Default' },
|
|
182
|
+
contextLength: { type: 'Default' },
|
|
183
|
+
}
|
|
184
|
+
session.load(config)
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
```ts
|
|
188
|
+
const textToDetectFeature =
|
|
189
|
+
"Today's been awesome! Everything just feels right, and I can't stop smiling."
|
|
190
|
+
const classificationPrompt =
|
|
191
|
+
`Text is: "${textToDetectFeature}". Choose ${feature.name} from the list: ${feature.values.join(', ')}. ` +
|
|
192
|
+
"Answer with one word. Don't add a dot at the end."
|
|
193
|
+
const input: SessionInput = { type: 'Text', text: classificationPrompt }
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
```ts
|
|
197
|
+
const runConfig: SessionRunConfig = {
|
|
198
|
+
tokensLimit: 32,
|
|
199
|
+
samplingConfig: { type: 'Argmax' },
|
|
200
|
+
}
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
```ts
|
|
204
|
+
const output = session.run(input, runConfig, (partialOutput) => {
|
|
205
|
+
return handlePartialOutput(partialOutput)
|
|
206
|
+
})
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
In this example, you will get the answer `Happy` immediately after the prefill step, and the actual generation won't even start.
|
|
210
|
+
|
|
211
|
+
## License
|
|
212
|
+
|
|
213
|
+
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
|
214
|
+
|
|
215
|
+
|
package/README.src.md
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<picture>
|
|
3
|
+
<img alt="Mirai" src="https://artifacts.trymirai.com/social/github/uzu-typescript.jpg" style="max-width: 100%;">
|
|
4
|
+
</picture>
|
|
5
|
+
</p>
|
|
6
|
+
|
|
7
|
+
<a href="https://artifacts.trymirai.com/social/about_us.mp3"><img src="https://img.shields.io/badge/Listen-Podcast-red" alt="Listen to our podcast"></a>
|
|
8
|
+
<a href="https://docsend.com/v/76bpr/mirai2025"><img src="https://img.shields.io/badge/View-Deck-red" alt="View our deck"></a>
|
|
9
|
+
<a href="mailto:alexey@getmirai.co,dima@getmirai.co,aleksei@getmirai.co?subject=Interested%20in%20Mirai"><img src="https://img.shields.io/badge/Send-Email-green" alt="Contact us"></a>
|
|
10
|
+
<a href="https://docs.trymirai.com/components/inference-engine"><img src="https://img.shields.io/badge/Read-Docs-blue" alt="Read docs"></a>
|
|
11
|
+
[](https://www.npmjs.com/package/@trymirai/uzu)
|
|
12
|
+
[](LICENSE)
|
|
13
|
+
|
|
14
|
+
# uzu-ts
|
|
15
|
+
|
|
16
|
+
Node package for [uzu](https://github.com/trymirai/uzu), a **high-performance** inference engine for AI models on Apple Silicon. It allows you to deploy AI directly in your app with **zero latency**, **full data privacy**, and **no inference costs**. You don’t need an ML team or weeks of setup - one developer can handle everything in minutes. Key features:
|
|
17
|
+
|
|
18
|
+
- Simple, high-level API
|
|
19
|
+
- Specialized configurations with significant performance boosts for common use cases like classification and summarization
|
|
20
|
+
- [Broad model support](https://trymirai.com/models)
|
|
21
|
+
- Observable model manager
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
Set up your project via [Platform](https://platform.trymirai.com), obtain an `MIRAI_API_KEY`.
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# Set your API key in `examples/api_key.ts`, then run the examples
|
|
29
|
+
pnpm run chat
|
|
30
|
+
pnpm run summarisation
|
|
31
|
+
pnpm run classification
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Setup
|
|
35
|
+
|
|
36
|
+
Add the `uzu` dependency to your project's `package.json`:
|
|
37
|
+
|
|
38
|
+
```json
|
|
39
|
+
"dependencies": {
|
|
40
|
+
"@trymirai/uzu": "0.1.4"
|
|
41
|
+
}
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Create and activate engine:
|
|
45
|
+
|
|
46
|
+
```ts
|
|
47
|
+
// include:examples/chat.ts#activation lang=ts
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Refresh models registry / list cloud models:
|
|
51
|
+
|
|
52
|
+
```ts
|
|
53
|
+
// include:examples/chat.ts#registry lang=ts
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Download with progress handle
|
|
57
|
+
|
|
58
|
+
```ts
|
|
59
|
+
// include:examples/chat.ts#download lang=ts
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Alternatively, you may use engine to control and observe model download:
|
|
63
|
+
|
|
64
|
+
```ts
|
|
65
|
+
// include:examples/chat.ts#model-state lang=ts
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Session
|
|
69
|
+
|
|
70
|
+
`Session` is the core entity used to communicate with the model:
|
|
71
|
+
|
|
72
|
+
```ts
|
|
73
|
+
// include:examples/chat.ts#session-create lang=ts
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Chat
|
|
77
|
+
|
|
78
|
+
Load `Session` with a chat-configured config and run it with a specific prompt or a list of messages:
|
|
79
|
+
|
|
80
|
+
```ts
|
|
81
|
+
// include:examples/chat.ts#session-load lang=ts
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
```ts
|
|
85
|
+
// include:examples/chat.ts#session-input lang=ts
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
```ts
|
|
89
|
+
// include:examples/chat.ts#session-run-config lang=ts
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
```ts
|
|
93
|
+
// include:examples/chat.ts#session-run lang=ts
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Summarization
|
|
97
|
+
|
|
98
|
+
In this example, we will extract a summary of the input text:
|
|
99
|
+
|
|
100
|
+
```ts
|
|
101
|
+
// include:examples/summarisation.ts#session-load lang=ts
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
```ts
|
|
105
|
+
// include:examples/summarisation.ts#session-input lang=ts
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
```ts
|
|
109
|
+
// include:examples/summarisation.ts#session-run-config lang=ts
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
```ts
|
|
113
|
+
// include:examples/summarisation.ts#session-run lang=ts
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Classification
|
|
117
|
+
|
|
118
|
+
Let’s look at a case where you need to classify input text based on a specific feature, such as `sentiment`:
|
|
119
|
+
|
|
120
|
+
```ts
|
|
121
|
+
// include:examples/classification.ts#classification-feature lang=ts
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
```ts
|
|
125
|
+
// include:examples/classification.ts#session-load lang=ts
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
```ts
|
|
129
|
+
// include:examples/classification.ts#session-input lang=ts
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
```ts
|
|
133
|
+
// include:examples/classification.ts#session-run-config lang=ts
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
```ts
|
|
137
|
+
// include:examples/classification.ts#session-run lang=ts
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
In this example, you will get the answer `Happy` immediately after the prefill step, and the actual generation won't even start.
|
|
141
|
+
|
|
142
|
+
## License
|
|
143
|
+
|
|
144
|
+
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
|
145
|
+
|
|
146
|
+
|
package/package.json
CHANGED
|
@@ -1,22 +1,25 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@trymirai/uzu",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"private": false,
|
|
5
5
|
"main": "uzu.node",
|
|
6
6
|
"types": "uzu.d.ts",
|
|
7
7
|
"packageManager": "pnpm@10.14.0",
|
|
8
8
|
"scripts": {
|
|
9
|
-
"
|
|
10
|
-
"
|
|
11
|
-
"
|
|
12
|
-
"
|
|
9
|
+
"lint": "eslint .",
|
|
10
|
+
"generate:readme": "cargo run -q --manifest-path ../../Cargo.toml --bin uzu_docgen -- --root $PWD --src $PWD/README.src.md --dest $PWD/README.md",
|
|
11
|
+
"chat": "ts-node examples/chat.ts",
|
|
12
|
+
"summarisation": "ts-node examples/summarisation.ts",
|
|
13
|
+
"classification": "ts-node examples/classification.ts"
|
|
13
14
|
},
|
|
14
15
|
"devDependencies": {
|
|
15
16
|
"@napi-rs/cli": "3.1.2",
|
|
16
17
|
"@types/node": "24.2.0",
|
|
18
|
+
"@types/progress": "2.0.7",
|
|
19
|
+
"eslint": "9.33.0",
|
|
17
20
|
"ts-node": "10.9.2",
|
|
18
21
|
"typescript": "5.9.2",
|
|
19
|
-
"
|
|
22
|
+
"typescript-eslint": "8.40.0"
|
|
20
23
|
},
|
|
21
24
|
"dependencies": {
|
|
22
25
|
"progress": "2.0.3"
|
package/uzu.node
CHANGED
|
Binary file
|