@trymirai/uzu 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -26,7 +26,7 @@ Add the `uzu` dependency to your project's `package.json`:
26
26
 
27
27
  ```json
28
28
  "dependencies": {
29
- "@trymirai/uzu": "0.1.19"
29
+ "@trymirai/uzu": "0.1.21"
30
30
  }
31
31
  ```
32
32
 
@@ -41,7 +41,7 @@ const licenseStatus: LicenseStatus = await engine.activate(apiKey)
41
41
 
42
42
  ```ts
43
43
  const localModels: LocalModel[] = await engine.updateRegistry()
44
- const localModelId = 'Meta-Llama-3.2-1B-Instruct-bfloat16'
44
+ const localModelId = 'Meta-Llama-3.2-1B-Instruct'
45
45
  ```
46
46
 
47
47
  ### Download with progress handle
@@ -51,6 +51,7 @@ const donwloadHandle = engine.downloadHandle(localModelId)
51
51
  donwloadHandle.start()
52
52
 
53
53
  for await (const donwloadProgress of donwloadHandle.progress()) {
54
+ // Implement a custom download progress handler
54
55
  handleProgress(donwloadProgress)
55
56
  }
56
57
  ```
@@ -58,13 +59,13 @@ for await (const donwloadProgress of donwloadHandle.progress()) {
58
59
  Alternatively, you may use engine to control and observe model download:
59
60
 
60
61
  ```ts
62
+ const modelState: ModelDownloadState = engine.getState(localModelId)
63
+
61
64
  // engine.download(localModelId)
62
65
  // engine.pause(localModelId)
63
66
  // engine.resume(localModelId)
64
67
  // engine.stop(localModelId)
65
68
  // engine.delete(localModelId)
66
-
67
- const modelState: ModelDownloadState = engine.getState(localModelId)
68
69
  ```
69
70
 
70
71
  ### Session
@@ -72,35 +73,30 @@ const modelState: ModelDownloadState = engine.getState(localModelId)
72
73
  `Session` is the core entity used to communicate with the model:
73
74
 
74
75
  ```ts
75
- // Choose one of the two options below by commenting/uncommenting:
76
- // const modelId: ModelID = { type: 'Cloud', id: cloudRepoId }
77
76
  const modelId: ModelID = { type: 'Local', id: localModelId }
78
- const session: Session = engine.createSession(modelId)
79
- ```
80
-
81
- ### Chat
82
-
83
- Load `Session` with a chat-configured config and run it with a specific prompt or a list of messages:
84
-
85
- ```ts
86
- const config: SessionConfig = {
77
+ const config: Config = {
87
78
  preset: { type: 'General' },
88
- samplingSeed: { type: 'Custom', seed: 12345 },
79
+ prefillStepSize: { type: 'Default' },
89
80
  contextLength: { type: 'Default' },
81
+ samplingSeed: { type: 'Default' },
90
82
  }
91
- session.load(config)
83
+ const session: Session = engine.createSession(modelId, config)
92
84
  ```
93
85
 
86
+ ### Chat
87
+
88
+ After creating it, you can run the `Session` with a specific prompt or a list of messages:
89
+
94
90
  ```ts
95
- const input: SessionInput = {
91
+ const input: Input = {
96
92
  type: 'Messages',
97
93
  messages: [
98
94
  {
99
- role: SessionMessageRole.System,
95
+ role: Role.System,
100
96
  content: 'You are a helpful assistant.'
101
97
  },
102
98
  {
103
- role: SessionMessageRole.User,
99
+ role: Role.User,
104
100
  content: 'Tell me a short, funny story about a robot.'
105
101
  },
106
102
  ],
@@ -108,14 +104,14 @@ const input: SessionInput = {
108
104
  ```
109
105
 
110
106
  ```ts
111
- const runConfig: SessionRunConfig = {
107
+ const runConfig: RunConfig = {
112
108
  tokensLimit: 128,
113
- samplingConfig: { type: 'Argmax' },
109
+ enableThinking: true,
110
+ samplingPolicy: { type: 'Default' },
114
111
  }
115
- ```
116
112
 
117
- ```ts
118
113
  const output = session.run(input, runConfig, (partialOutput) => {
114
+ // Implement a custom partial output handler
119
115
  return handlePartialOutput(partialOutput)
120
116
  })
121
117
  ```
@@ -125,30 +121,34 @@ const output = session.run(input, runConfig, (partialOutput) => {
125
121
  In this example, we will extract a summary of the input text:
126
122
 
127
123
  ```ts
128
- const config: SessionConfig = {
124
+ const modelId: ModelID = { type: 'Local', id: localModelId }
125
+ const config: Config = {
129
126
  preset: { type: 'Summarization' },
130
- samplingSeed: { type: 'Default' },
127
+ prefillStepSize: { type: 'Default' },
131
128
  contextLength: { type: 'Default' },
129
+ samplingSeed: { type: 'Default' },
132
130
  }
133
- session.load(config)
131
+ const session = engine.createSession(modelId, config)
134
132
  ```
135
133
 
136
134
  ```ts
137
- const input: SessionInput = {
135
+ const textToSummarize =
136
+ 'A Large Language Model (LLM) is a type of AI that processes and generates text using transformer-based architectures trained on vast datasets. They power chatbots, translation, code assistants, and more.'
137
+ const input: Input = {
138
138
  type: 'Text',
139
139
  text: `Text is: "${textToSummarize}". Write only summary itself.`,
140
140
  }
141
141
  ```
142
142
 
143
143
  ```ts
144
- const runConfig: SessionRunConfig = {
144
+ const runConfig: RunConfig = {
145
145
  tokensLimit: 256,
146
- samplingConfig: { type: 'Argmax' },
146
+ enableThinking: true,
147
+ samplingPolicy: { type: 'Custom', value: { type: 'Greedy' } },
147
148
  }
148
- ```
149
149
 
150
- ```ts
151
150
  const output = session.run(input, runConfig, (partialOutput) => {
151
+ // Implement a custom partial output handler
152
152
  return handlePartialOutput(partialOutput)
153
153
  })
154
154
  ```
@@ -158,19 +158,19 @@ const output = session.run(input, runConfig, (partialOutput) => {
158
158
  Let’s look at a case where you need to classify input text based on a specific feature, such as `sentiment`:
159
159
 
160
160
  ```ts
161
- const feature: SessionClassificationFeature = {
161
+ const feature: ClassificationFeature = {
162
162
  name: 'sentiment',
163
163
  values: ['Happy', 'Sad', 'Angry', 'Fearful', 'Surprised', 'Disgusted'],
164
164
  }
165
- ```
166
-
167
- ```ts
168
- const config: SessionConfig = {
165
+ const config: Config = {
169
166
  preset: { type: 'Classification', feature },
170
- samplingSeed: { type: 'Default' },
167
+ prefillStepSize: { type: 'Default' },
171
168
  contextLength: { type: 'Default' },
169
+ samplingSeed: { type: 'Default' },
172
170
  }
173
- session.load(config)
171
+
172
+ const modelId: ModelID = { type: 'Local', id: localModelId }
173
+ const session = engine.createSession(modelId, config)
174
174
  ```
175
175
 
176
176
  ```ts
@@ -179,18 +179,18 @@ const textToDetectFeature =
179
179
  const classificationPrompt =
180
180
  `Text is: "${textToDetectFeature}". Choose ${feature.name} from the list: ${feature.values.join(', ')}. ` +
181
181
  "Answer with one word. Don't add a dot at the end."
182
- const input: SessionInput = { type: 'Text', text: classificationPrompt }
182
+ const input: Input = { type: 'Text', text: classificationPrompt }
183
183
  ```
184
184
 
185
185
  ```ts
186
- const runConfig: SessionRunConfig = {
186
+ const runConfig: RunConfig = {
187
187
  tokensLimit: 32,
188
- samplingConfig: { type: 'Argmax' },
188
+ enableThinking: true,
189
+ samplingPolicy: { type: 'Custom', value: { type: 'Greedy' } },
189
190
  }
190
- ```
191
191
 
192
- ```ts
193
192
  const output = session.run(input, runConfig, (partialOutput) => {
193
+ // Implement a custom partial output handler
194
194
  return handlePartialOutput(partialOutput)
195
195
  })
196
196
  ```
@@ -199,6 +199,4 @@ In this example, you will get the answer `Happy` immediately after the prefill s
199
199
 
200
200
  ## License
201
201
 
202
- This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
203
-
204
-
202
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
package/README.orig.md CHANGED
@@ -25,7 +25,7 @@ Node package for [uzu](https://github.com/trymirai/uzu), a **high-performance**
25
25
  Set up your project via [Platform](https://platform.trymirai.com), obtain an `MIRAI_API_KEY`.
26
26
 
27
27
  ```bash
28
- # Set your API key in `examples/api_key.ts`, then run the examples
28
+ # Set your API key in `examples/common.ts`, then run the examples
29
29
  pnpm run chat
30
30
  pnpm run summarisation
31
31
  pnpm run classification
@@ -37,7 +37,7 @@ Add the `uzu` dependency to your project's `package.json`:
37
37
 
38
38
  ```json
39
39
  "dependencies": {
40
- "@trymirai/uzu": "0.1.19"
40
+ "@trymirai/uzu": "0.1.21"
41
41
  }
42
42
  ```
43
43
 
@@ -52,7 +52,7 @@ const licenseStatus: LicenseStatus = await engine.activate(apiKey)
52
52
 
53
53
  ```ts
54
54
  const localModels: LocalModel[] = await engine.updateRegistry()
55
- const localModelId = 'Meta-Llama-3.2-1B-Instruct-bfloat16'
55
+ const localModelId = 'Meta-Llama-3.2-1B-Instruct'
56
56
  ```
57
57
 
58
58
  ### Download with progress handle
@@ -62,6 +62,7 @@ const donwloadHandle = engine.downloadHandle(localModelId)
62
62
  donwloadHandle.start()
63
63
 
64
64
  for await (const donwloadProgress of donwloadHandle.progress()) {
65
+ // Implement a custom download progress handler
65
66
  handleProgress(donwloadProgress)
66
67
  }
67
68
  ```
@@ -69,13 +70,13 @@ for await (const donwloadProgress of donwloadHandle.progress()) {
69
70
  Alternatively, you may use engine to control and observe model download:
70
71
 
71
72
  ```ts
73
+ const modelState: ModelDownloadState = engine.getState(localModelId)
74
+
72
75
  // engine.download(localModelId)
73
76
  // engine.pause(localModelId)
74
77
  // engine.resume(localModelId)
75
78
  // engine.stop(localModelId)
76
79
  // engine.delete(localModelId)
77
-
78
- const modelState: ModelDownloadState = engine.getState(localModelId)
79
80
  ```
80
81
 
81
82
  ### Session
@@ -83,35 +84,30 @@ const modelState: ModelDownloadState = engine.getState(localModelId)
83
84
  `Session` is the core entity used to communicate with the model:
84
85
 
85
86
  ```ts
86
- // Choose one of the two options below by commenting/uncommenting:
87
- // const modelId: ModelID = { type: 'Cloud', id: cloudRepoId }
88
87
  const modelId: ModelID = { type: 'Local', id: localModelId }
89
- const session: Session = engine.createSession(modelId)
90
- ```
91
-
92
- ### Chat
93
-
94
- Load `Session` with a chat-configured config and run it with a specific prompt or a list of messages:
95
-
96
- ```ts
97
- const config: SessionConfig = {
88
+ const config: Config = {
98
89
  preset: { type: 'General' },
99
- samplingSeed: { type: 'Custom', seed: 12345 },
90
+ prefillStepSize: { type: 'Default' },
100
91
  contextLength: { type: 'Default' },
92
+ samplingSeed: { type: 'Default' },
101
93
  }
102
- session.load(config)
94
+ const session: Session = engine.createSession(modelId, config)
103
95
  ```
104
96
 
97
+ ### Chat
98
+
99
+ After creating it, you can run the `Session` with a specific prompt or a list of messages:
100
+
105
101
  ```ts
106
- const input: SessionInput = {
102
+ const input: Input = {
107
103
  type: 'Messages',
108
104
  messages: [
109
105
  {
110
- role: SessionMessageRole.System,
106
+ role: Role.System,
111
107
  content: 'You are a helpful assistant.'
112
108
  },
113
109
  {
114
- role: SessionMessageRole.User,
110
+ role: Role.User,
115
111
  content: 'Tell me a short, funny story about a robot.'
116
112
  },
117
113
  ],
@@ -119,14 +115,14 @@ const input: SessionInput = {
119
115
  ```
120
116
 
121
117
  ```ts
122
- const runConfig: SessionRunConfig = {
118
+ const runConfig: RunConfig = {
123
119
  tokensLimit: 128,
124
- samplingConfig: { type: 'Argmax' },
120
+ enableThinking: true,
121
+ samplingPolicy: { type: 'Default' },
125
122
  }
126
- ```
127
123
 
128
- ```ts
129
124
  const output = session.run(input, runConfig, (partialOutput) => {
125
+ // Implement a custom partial output handler
130
126
  return handlePartialOutput(partialOutput)
131
127
  })
132
128
  ```
@@ -136,30 +132,34 @@ const output = session.run(input, runConfig, (partialOutput) => {
136
132
  In this example, we will extract a summary of the input text:
137
133
 
138
134
  ```ts
139
- const config: SessionConfig = {
135
+ const modelId: ModelID = { type: 'Local', id: localModelId }
136
+ const config: Config = {
140
137
  preset: { type: 'Summarization' },
141
- samplingSeed: { type: 'Default' },
138
+ prefillStepSize: { type: 'Default' },
142
139
  contextLength: { type: 'Default' },
140
+ samplingSeed: { type: 'Default' },
143
141
  }
144
- session.load(config)
142
+ const session = engine.createSession(modelId, config)
145
143
  ```
146
144
 
147
145
  ```ts
148
- const input: SessionInput = {
146
+ const textToSummarize =
147
+ 'A Large Language Model (LLM) is a type of AI that processes and generates text using transformer-based architectures trained on vast datasets. They power chatbots, translation, code assistants, and more.'
148
+ const input: Input = {
149
149
  type: 'Text',
150
150
  text: `Text is: "${textToSummarize}". Write only summary itself.`,
151
151
  }
152
152
  ```
153
153
 
154
154
  ```ts
155
- const runConfig: SessionRunConfig = {
155
+ const runConfig: RunConfig = {
156
156
  tokensLimit: 256,
157
- samplingConfig: { type: 'Argmax' },
157
+ enableThinking: true,
158
+ samplingPolicy: { type: 'Custom', value: { type: 'Greedy' } },
158
159
  }
159
- ```
160
160
 
161
- ```ts
162
161
  const output = session.run(input, runConfig, (partialOutput) => {
162
+ // Implement a custom partial output handler
163
163
  return handlePartialOutput(partialOutput)
164
164
  })
165
165
  ```
@@ -169,19 +169,19 @@ const output = session.run(input, runConfig, (partialOutput) => {
169
169
  Let’s look at a case where you need to classify input text based on a specific feature, such as `sentiment`:
170
170
 
171
171
  ```ts
172
- const feature: SessionClassificationFeature = {
172
+ const feature: ClassificationFeature = {
173
173
  name: 'sentiment',
174
174
  values: ['Happy', 'Sad', 'Angry', 'Fearful', 'Surprised', 'Disgusted'],
175
175
  }
176
- ```
177
-
178
- ```ts
179
- const config: SessionConfig = {
176
+ const config: Config = {
180
177
  preset: { type: 'Classification', feature },
181
- samplingSeed: { type: 'Default' },
178
+ prefillStepSize: { type: 'Default' },
182
179
  contextLength: { type: 'Default' },
180
+ samplingSeed: { type: 'Default' },
183
181
  }
184
- session.load(config)
182
+
183
+ const modelId: ModelID = { type: 'Local', id: localModelId }
184
+ const session = engine.createSession(modelId, config)
185
185
  ```
186
186
 
187
187
  ```ts
@@ -190,18 +190,18 @@ const textToDetectFeature =
190
190
  const classificationPrompt =
191
191
  `Text is: "${textToDetectFeature}". Choose ${feature.name} from the list: ${feature.values.join(', ')}. ` +
192
192
  "Answer with one word. Don't add a dot at the end."
193
- const input: SessionInput = { type: 'Text', text: classificationPrompt }
193
+ const input: Input = { type: 'Text', text: classificationPrompt }
194
194
  ```
195
195
 
196
196
  ```ts
197
- const runConfig: SessionRunConfig = {
197
+ const runConfig: RunConfig = {
198
198
  tokensLimit: 32,
199
- samplingConfig: { type: 'Argmax' },
199
+ enableThinking: true,
200
+ samplingPolicy: { type: 'Custom', value: { type: 'Greedy' } },
200
201
  }
201
- ```
202
202
 
203
- ```ts
204
203
  const output = session.run(input, runConfig, (partialOutput) => {
204
+ // Implement a custom partial output handler
205
205
  return handlePartialOutput(partialOutput)
206
206
  })
207
207
  ```
@@ -210,6 +210,4 @@ In this example, you will get the answer `Happy` immediately after the prefill s
210
210
 
211
211
  ## License
212
212
 
213
- This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
214
-
215
-
213
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
package/README.src.md CHANGED
@@ -25,7 +25,7 @@ Node package for [uzu](https://github.com/trymirai/uzu), a **high-performance**
25
25
  Set up your project via [Platform](https://platform.trymirai.com), obtain an `MIRAI_API_KEY`.
26
26
 
27
27
  ```bash
28
- # Set your API key in `examples/api_key.ts`, then run the examples
28
+ # Set your API key in `examples/common.ts`, then run the examples
29
29
  pnpm run chat
30
30
  pnpm run summarisation
31
31
  pnpm run classification
@@ -56,13 +56,13 @@ Create and activate engine:
56
56
  ### Download with progress handle
57
57
 
58
58
  ```ts
59
- // include:examples/chat.ts#download lang=ts
59
+ // include:examples/chat.ts#download-handle lang=ts
60
60
  ```
61
61
 
62
62
  Alternatively, you may use engine to control and observe model download:
63
63
 
64
64
  ```ts
65
- // include:examples/chat.ts#model-state lang=ts
65
+ // include:examples/chat.ts#storage-methods lang=ts
66
66
  ```
67
67
 
68
68
  ### Session
@@ -70,27 +70,19 @@ Alternatively, you may use engine to control and observe model download:
70
70
  `Session` is the core entity used to communicate with the model:
71
71
 
72
72
  ```ts
73
- // include:examples/chat.ts#session-create lang=ts
73
+ // include:examples/chat.ts#session-create-general lang=ts
74
74
  ```
75
75
 
76
76
  ### Chat
77
77
 
78
- Load `Session` with a chat-configured config and run it with a specific prompt or a list of messages:
78
+ After creating it, you can run the `Session` with a specific prompt or a list of messages:
79
79
 
80
80
  ```ts
81
- // include:examples/chat.ts#session-load lang=ts
81
+ // include:examples/chat.ts#session-input-general lang=ts
82
82
  ```
83
83
 
84
84
  ```ts
85
- // include:examples/chat.ts#session-input lang=ts
86
- ```
87
-
88
- ```ts
89
- // include:examples/chat.ts#session-run-config lang=ts
90
- ```
91
-
92
- ```ts
93
- // include:examples/chat.ts#session-run lang=ts
85
+ // include:examples/chat.ts#session-run-general lang=ts
94
86
  ```
95
87
 
96
88
  ### Summarization
@@ -98,19 +90,15 @@ Load `Session` with a chat-configured config and run it with a specific prompt o
98
90
  In this example, we will extract a summary of the input text:
99
91
 
100
92
  ```ts
101
- // include:examples/summarisation.ts#session-load lang=ts
102
- ```
103
-
104
- ```ts
105
- // include:examples/summarisation.ts#session-input lang=ts
93
+ // include:examples/summarisation.ts#session-create-summarization lang=ts
106
94
  ```
107
95
 
108
96
  ```ts
109
- // include:examples/summarisation.ts#session-run-config lang=ts
97
+ // include:examples/summarisation.ts#session-input-summarization lang=ts
110
98
  ```
111
99
 
112
100
  ```ts
113
- // include:examples/summarisation.ts#session-run lang=ts
101
+ // include:examples/summarisation.ts#session-run-summarization lang=ts
114
102
  ```
115
103
 
116
104
  ### Classification
@@ -118,29 +106,19 @@ In this example, we will extract a summary of the input text:
118
106
  Let’s look at a case where you need to classify input text based on a specific feature, such as `sentiment`:
119
107
 
120
108
  ```ts
121
- // include:examples/classification.ts#classification-feature lang=ts
109
+ // include:examples/classification.ts#session-create-classification lang=ts
122
110
  ```
123
111
 
124
112
  ```ts
125
- // include:examples/classification.ts#session-load lang=ts
113
+ // include:examples/classification.ts#session-input-classification lang=ts
126
114
  ```
127
115
 
128
116
  ```ts
129
- // include:examples/classification.ts#session-input lang=ts
130
- ```
131
-
132
- ```ts
133
- // include:examples/classification.ts#session-run-config lang=ts
134
- ```
135
-
136
- ```ts
137
- // include:examples/classification.ts#session-run lang=ts
117
+ // include:examples/classification.ts#session-run-classification lang=ts
138
118
  ```
139
119
 
140
120
  In this example, you will get the answer `Happy` immediately after the prefill step, and the actual generation won't even start.
141
121
 
142
122
  ## License
143
123
 
144
- This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
145
-
146
-
124
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
package/package.json CHANGED
@@ -1,11 +1,12 @@
1
1
  {
2
2
  "name": "@trymirai/uzu",
3
- "version": "0.1.19",
3
+ "version": "0.1.21",
4
4
  "private": false,
5
5
  "main": "uzu.node",
6
6
  "types": "uzu.d.ts",
7
7
  "packageManager": "pnpm@10.14.0",
8
8
  "scripts": {
9
+ "quick-start": "ts-node examples/quick-start.ts",
9
10
  "chat": "ts-node examples/chat.ts",
10
11
  "summarisation": "ts-node examples/summarisation.ts",
11
12
  "classification": "ts-node examples/classification.ts"
package/uzu.d.ts CHANGED
@@ -27,7 +27,7 @@ export declare class Engine {
27
27
  updateRegistry(): Promise<Array<LocalModel>>
28
28
  activate(apiKey: string): Promise<LicenseStatus>
29
29
  constructor()
30
- createSession(modelId: ModelID): Session
30
+ createSession(modelId: ModelID, config: Config): Session
31
31
  }
32
32
 
33
33
  export declare class ProgressStream {
@@ -44,8 +44,12 @@ export declare class ProgressUpdate {
44
44
  }
45
45
 
46
46
  export declare class Session {
47
- load(config: SessionConfig): void
48
- run(input: SessionInput, runConfig: SessionRunConfig, progressCallback?: (arg: SessionOutput) => boolean | undefined | null): SessionOutput
47
+ run(input: Input, config: RunConfig, progress?: (arg: Output) => boolean | undefined | null): Output
48
+ }
49
+
50
+ export interface ClassificationFeature {
51
+ name: string
52
+ values: Array<string>
49
53
  }
50
54
 
51
55
  export interface CloudModel {
@@ -54,25 +58,35 @@ export interface CloudModel {
54
58
  readonly vendor: string
55
59
  }
56
60
 
61
+ export interface Config {
62
+ preset: Preset
63
+ prefillStepSize: PrefillStepSize
64
+ contextLength: ContextLength
65
+ samplingSeed: SamplingSeed
66
+ }
67
+
57
68
  export type ContextLength =
58
69
  | { type: 'Default' }
70
+ | { type: 'Maximal' }
59
71
  | { type: 'Custom', length: number }
60
72
 
61
- export interface DecoderTestResult {
62
- placementLog: string
63
- iterations: number
64
- timePerToken: number
65
- tokensPerSecond: number
66
- success: boolean
67
- error?: string
68
- }
69
-
70
73
  export type DownloaderError =
71
74
  | { type: 'Http', message: string }
72
75
  | { type: 'Io', message: string }
73
76
  | { type: 'AlreadyExists', path: string }
74
77
  | { type: 'Generic', message: string }
75
78
 
79
+ export declare const enum FinishReason {
80
+ Stop = 0,
81
+ Length = 1,
82
+ Cancelled = 2,
83
+ ContextLimitReached = 3
84
+ }
85
+
86
+ export type Input =
87
+ | { type: 'Text', text: string }
88
+ | { type: 'Messages', messages: Array<Message> }
89
+
76
90
  export type LicenseStatus =
77
91
  | { type: 'NotActivated' }
78
92
  | { type: 'PaymentRequired' }
@@ -85,14 +99,12 @@ export type LicenseStatus =
85
99
  | { type: 'HttpError', code: number }
86
100
 
87
101
  export interface LocalModel {
88
- /** Unique identifier of the model in the form `<vendor>-<name>-<precision>`. */
102
+ /** Unique identifier of the model in the form `<vendor>-<name>`. */
89
103
  readonly identifier: string
90
104
  /** Vendor/author of the model (e.g. "Llama"). */
91
105
  readonly vendor: string
92
- /** Human-readable model name without vendor/precision (e.g. "3B-Instruct"). */
106
+ /** Human-readable model name without vendor (e.g. "3B-Instruct"). */
93
107
  readonly name: string
94
- /** Numerical precision of the weights (e.g. "float16"). */
95
- readonly precision: string
96
108
  /** Quantization type if the model is quantized (e.g. "uint4"). */
97
109
  readonly quantization?: string
98
110
  /** Optional regex to parse model output provided by the backend. */
@@ -101,6 +113,12 @@ export interface LocalModel {
101
113
  readonly state: ModelDownloadState
102
114
  }
103
115
 
116
+ export interface Message {
117
+ role: Role
118
+ content: string
119
+ reasoningContent?: string
120
+ }
121
+
104
122
  export interface ModelDownloadState {
105
123
  /** Total size of all model files in kilobytes. */
106
124
  readonly totalKbytes: number
@@ -116,6 +134,17 @@ export type ModelID =
116
134
  | { type: 'Local', id: string }
117
135
  | { type: 'Cloud', id: string }
118
136
 
137
+ export interface Output {
138
+ text: Text
139
+ stats: Stats
140
+ finishReason?: FinishReason
141
+ }
142
+
143
+ export interface ParsedText {
144
+ chainOfThought?: string
145
+ response?: string
146
+ }
147
+
119
148
  export declare const enum Phase {
120
149
  NotDownloaded = 0,
121
150
  Downloading = 1,
@@ -124,87 +153,59 @@ export declare const enum Phase {
124
153
  Error = 4
125
154
  }
126
155
 
127
- export type SamplingConfig =
128
- | { type: 'Argmax' }
129
- | { type: 'TopP', topP: number }
130
- | { type: 'Categorical', temperature: number }
131
-
132
- export type SamplingSeed =
156
+ export type PrefillStepSize =
133
157
  | { type: 'Default' }
134
- | { type: 'Custom', seed: number }
135
-
136
- export interface SessionClassificationFeature {
137
- name: string
138
- values: Array<string>
139
- }
140
-
141
- export interface SessionConfig {
142
- preset: SessionPreset
143
- samplingSeed: SamplingSeed
144
- contextLength: ContextLength
145
- }
146
-
147
- export type SessionInput =
148
- | { type: 'Text', text: string }
149
- | { type: 'Messages', messages: Array<SessionMessage> }
158
+ | { type: 'Maximal' }
159
+ | { type: 'Custom', length: number }
150
160
 
151
- export interface SessionMessage {
152
- role: SessionMessageRole
153
- content: string
154
- }
161
+ export type Preset =
162
+ | { type: 'General' }
163
+ | { type: 'Classification', feature: ClassificationFeature }
164
+ | { type: 'Summarization' }
155
165
 
156
- export declare const enum SessionMessageRole {
166
+ export declare const enum Role {
157
167
  System = 0,
158
168
  User = 1,
159
169
  Assistant = 2
160
170
  }
161
171
 
162
- export interface SessionOutput {
163
- text: string
164
- stats: SessionOutputStats
165
- finishReason?: SessionOutputFinishReason
166
- }
167
-
168
- export declare const enum SessionOutputFinishReason {
169
- Stop = 0,
170
- Length = 1,
171
- Cancelled = 2
172
+ export interface RunConfig {
173
+ tokensLimit: number
174
+ enableThinking: boolean
175
+ samplingPolicy: SamplingPolicy
172
176
  }
173
177
 
174
- export interface SessionOutputRunStats {
178
+ export interface RunStats {
175
179
  count: bigint
176
180
  averageDuration: number
177
181
  }
178
182
 
179
- export interface SessionOutputStats {
180
- prefillStats: SessionOutputStepStats
181
- generateStats?: SessionOutputStepStats
182
- totalStats: SessionOutputTotalStats
183
+ export type SamplingMethod =
184
+ | { type: 'Greedy' }
185
+ | { type: 'Temperature', temperature: number }
186
+ | { type: 'TopP', topP: number }
187
+
188
+ export type SamplingPolicy =
189
+ | { type: 'Default' }
190
+ | { type: 'Custom', value: SamplingMethod }
191
+
192
+ export type SamplingSeed =
193
+ | { type: 'Default' }
194
+ | { type: 'Custom', seed: number }
195
+
196
+ export interface Stats {
197
+ prefillStats: StepStats
198
+ generateStats?: StepStats
199
+ totalStats: TotalStats
183
200
  }
184
201
 
185
- export interface SessionOutputStepStats {
202
+ export interface StepStats {
186
203
  duration: number
187
204
  suffixLength: bigint
188
205
  tokensCount: bigint
189
206
  tokensPerSecond: number
190
- modelRun: SessionOutputRunStats
191
- run?: SessionOutputRunStats
192
- }
193
-
194
- export interface SessionOutputTotalStats {
195
- duration: number
196
- tokensCountInput: bigint
197
- tokensCountOutput: bigint
198
- }
199
-
200
- export type SessionPreset =
201
- | { type: 'General' }
202
- | { type: 'Classification', feature: SessionClassificationFeature }
203
- | { type: 'Summarization' }
204
-
205
- export interface SessionRunConfig {
206
- tokensLimit: number
207
- samplingConfig?: SamplingConfig
207
+ modelRun: RunStats
208
+ run?: RunStats
208
209
  }
209
210
 
210
211
  export type StorageError =
@@ -214,4 +215,15 @@ export type StorageError =
214
215
  | { type: 'MutexPoisoned', message: string }
215
216
  | { type: 'LicenseNotActivated' }
216
217
 
218
+ export interface Text {
219
+ original: string
220
+ parsed: ParsedText
221
+ }
222
+
223
+ export interface TotalStats {
224
+ duration: number
225
+ tokensCountInput: bigint
226
+ tokensCountOutput: bigint
227
+ }
228
+
217
229
  export declare function version(): string
package/uzu.node CHANGED
Binary file