@trymirai/uzu 0.2.6 → 0.2.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,8 +4,6 @@
4
4
  </picture>
5
5
  </p>
6
6
 
7
- <a href="https://artifacts.trymirai.com/social/about_us.mp3"><img src="https://img.shields.io/badge/Listen-Podcast-red" alt="Listen to our podcast"></a>
8
- <a href="https://docsend.com/v/76bpr/mirai2025"><img src="https://img.shields.io/badge/View-Deck-red" alt="View our deck"></a>
9
7
  <a href="https://discord.com/invite/trymirai"><img src="https://img.shields.io/discord/1377764166764462120?label=Discord" alt="Discord"></a>
10
8
  <a href="mailto:contact@getmirai.co?subject=Interested%20in%20Mirai"><img src="https://img.shields.io/badge/Send-Email-green" alt="Contact us"></a>
11
9
  <a href="https://docs.trymirai.com/app-integration/overview"><img src="https://img.shields.io/badge/Read-Docs-blue" alt="Read docs"></a>
@@ -26,7 +24,7 @@ Add the `uzu` dependency to your project's `package.json`:
26
24
 
27
25
  ```json
28
26
  "dependencies": {
29
- "@trymirai/uzu": "0.2.6"
27
+ "@trymirai/uzu": "0.2.20"
30
28
  }
31
29
  ```
32
30
 
@@ -47,6 +45,7 @@ Place the `API_KEY` you obtained earlier in the corresponding example file, and
47
45
 
48
46
  ```bash
49
47
  pnpm run tsn examples/chat.ts
48
+ pnpm run tsn examples/chatWithSpeculator.ts
50
49
  pnpm run tsn examples/chatDynamicContext.ts
51
50
  pnpm run tsn examples/chatStaticContext.ts
52
51
  pnpm run tsn examples/summarization.ts
@@ -85,6 +84,51 @@ main().catch((error) => {
85
84
  });
86
85
  ```
87
86
 
87
+ ### Speedup with speculative decoding
88
+
89
+ Speculative decoding allows a significant increase in generation speed. For each model, we train a small n-gram model (under 50 MB) tailored to a specific domain or use case. In general chat scenarios, you can use the `Chat` preset, which will automatically use the corresponding speculator:
90
+
91
+ ```ts
92
+ import Engine, { Message, Preset } from '@trymirai/uzu';
93
+
94
+ async function main() {
95
+ const engine = Engine.create('API_KEY');
96
+ const model = engine
97
+ .chatModel('Qwen/Qwen3-0.6B')
98
+ .download((update) => {
99
+ console.log('Progress:', update.progress);
100
+ });
101
+
102
+ const messages = [
103
+ Message.system('You are a helpful assistant'),
104
+ Message.user('Tell me a short, funny story about a robot')
105
+ ];
106
+
107
+ const outputGeneral = await model
108
+ .replyToMessages(
109
+ messages,
110
+ (partialOutput) => {
111
+ return true;
112
+ },
113
+ );
114
+ console.log('Generation speed t/s (general):', outputGeneral.stats.generateStats?.tokensPerSecond ?? 0);
115
+
116
+ const outputWithSpeculator = await model
117
+ .preset(Preset.chat())
118
+ .replyToMessages(
119
+ messages,
120
+ (partialOutput) => {
121
+ return true;
122
+ },
123
+ );
124
+ console.log('Generation speed t/s (with chat speculator):', outputWithSpeculator.stats.generateStats?.tokensPerSecond ?? 0);
125
+ }
126
+
127
+ main().catch((error) => {
128
+ console.error(error);
129
+ });
130
+ ```
131
+
88
132
  ### Chat with dynamic context
89
133
 
90
134
  In this example, we will use the dynamic `ContextMode`, which automatically maintains a continuous conversation history instead of resetting the context with each new input. Every new message is added to the ongoing chat, allowing the model to remember what has already been said and respond with full context.
@@ -7,6 +7,8 @@ export declare class Preset implements ToNapi<NapiPreset> {
7
7
  static general(): Preset;
8
8
  static classification(feature: ClassificationFeature): Preset;
9
9
  static summarization(): Preset;
10
+ static nGramSpeculator(useCase: string, numberOfSpeculatedTokens: number): Preset;
11
+ static chat(): Preset;
10
12
  toNapi(): NapiPreset;
11
13
  }
12
14
  //# sourceMappingURL=preset.d.mts.map
@@ -1 +1 @@
1
- {"version":3,"file":"preset.d.mts","sourceRoot":"","sources":["../src/bridging/preset.ts"],"names":[],"mappings":"OAAO,EAAE,MAAM,IAAI,UAAU,EAAE;OACxB,EAAE,qBAAqB,EAAE;OACzB,EAAE,MAAM,EAAE;AAEjB,qBAAa,MAAO,YAAW,MAAM,CAAC,UAAU,CAAC;IAC7C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAa;IAExC,OAAO;IAIP,MAAM,CAAC,OAAO,IAAI,MAAM;IAKxB,MAAM,CAAC,cAAc,CAAC,OAAO,EAAE,qBAAqB,GAAG,MAAM;IAS7D,MAAM,CAAC,aAAa,IAAI,MAAM;IAK9B,MAAM,IAAI,UAAU;CAGvB"}
1
+ {"version":3,"file":"preset.d.mts","sourceRoot":"","sources":["../src/bridging/preset.ts"],"names":[],"mappings":"OAAO,EAAE,MAAM,IAAI,UAAU,EAAE;OACxB,EAAE,qBAAqB,EAAE;OACzB,EAAE,MAAM,EAAE;AAEjB,qBAAa,MAAO,YAAW,MAAM,CAAC,UAAU,CAAC;IAC7C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAa;IAExC,OAAO;IAIP,MAAM,CAAC,OAAO,IAAI,MAAM;IAKxB,MAAM,CAAC,cAAc,CAAC,OAAO,EAAE,qBAAqB,GAAG,MAAM;IAS7D,MAAM,CAAC,aAAa,IAAI,MAAM;IAK9B,MAAM,CAAC,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,wBAAwB,EAAE,MAAM,GAAG,MAAM;IASjF,MAAM,CAAC,IAAI,IAAI,MAAM;IAKrB,MAAM,IAAI,UAAU;CAGvB"}
@@ -7,6 +7,8 @@ export declare class Preset implements ToNapi<NapiPreset> {
7
7
  static general(): Preset;
8
8
  static classification(feature: ClassificationFeature): Preset;
9
9
  static summarization(): Preset;
10
+ static nGramSpeculator(useCase: string, numberOfSpeculatedTokens: number): Preset;
11
+ static chat(): Preset;
10
12
  toNapi(): NapiPreset;
11
13
  }
12
14
  //# sourceMappingURL=preset.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"preset.d.ts","sourceRoot":"","sources":["../src/bridging/preset.ts"],"names":[],"mappings":"OAAO,EAAE,MAAM,IAAI,UAAU,EAAE;OACxB,EAAE,qBAAqB,EAAE;OACzB,EAAE,MAAM,EAAE;AAEjB,qBAAa,MAAO,YAAW,MAAM,CAAC,UAAU,CAAC;IAC7C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAa;IAExC,OAAO;IAIP,MAAM,CAAC,OAAO,IAAI,MAAM;IAKxB,MAAM,CAAC,cAAc,CAAC,OAAO,EAAE,qBAAqB,GAAG,MAAM;IAS7D,MAAM,CAAC,aAAa,IAAI,MAAM;IAK9B,MAAM,IAAI,UAAU;CAGvB"}
1
+ {"version":3,"file":"preset.d.ts","sourceRoot":"","sources":["../src/bridging/preset.ts"],"names":[],"mappings":"OAAO,EAAE,MAAM,IAAI,UAAU,EAAE;OACxB,EAAE,qBAAqB,EAAE;OACzB,EAAE,MAAM,EAAE;AAEjB,qBAAa,MAAO,YAAW,MAAM,CAAC,UAAU,CAAC;IAC7C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAa;IAExC,OAAO;IAIP,MAAM,CAAC,OAAO,IAAI,MAAM;IAKxB,MAAM,CAAC,cAAc,CAAC,OAAO,EAAE,qBAAqB,GAAG,MAAM;IAS7D,MAAM,CAAC,aAAa,IAAI,MAAM;IAK9B,MAAM,CAAC,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,wBAAwB,EAAE,MAAM,GAAG,MAAM;IASjF,MAAM,CAAC,IAAI,IAAI,MAAM;IAKrB,MAAM,IAAI,UAAU;CAGvB"}
@@ -21,6 +21,18 @@ class Preset {
21
21
  const napiPreset = { type: 'Summarization' };
22
22
  return new Preset(napiPreset);
23
23
  }
24
+ static nGramSpeculator(useCase, numberOfSpeculatedTokens) {
25
+ const napiPreset = {
26
+ type: 'NGramSpeculator',
27
+ useCase,
28
+ numberOfSpeculatedTokens,
29
+ };
30
+ return new Preset(napiPreset);
31
+ }
32
+ static chat() {
33
+ const napiPreset = { type: 'Chat' };
34
+ return new Preset(napiPreset);
35
+ }
24
36
  toNapi() {
25
37
  return this.napiPreset;
26
38
  }
@@ -1 +1 @@
1
- {"version":3,"file":"preset.js","sourceRoot":"","sources":["../src/bridging/preset.ts"],"names":[],"mappings":";;;AAIA,MAAa,MAAM;IAGf,YAAoB,UAAsB;QACtC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IACjC,CAAC;IAED,MAAM,CAAC,OAAO;QACV,MAAM,UAAU,GAAe,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QACnD,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,CAAC,cAAc,CAAC,OAA8B;QAChD,MAAM,yBAAyB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;QACnD,MAAM,UAAU,GAAe;YAC3B,IAAI,EAAE,gBAAgB;YACtB,OAAO,EAAE,yBAAyB;SACrC,CAAC;QACF,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,CAAC,aAAa;QAChB,MAAM,UAAU,GAAe,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC;QACzD,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM;QACF,OAAO,IAAI,CAAC,UAAU,CAAC;IAC3B,CAAC;CACJ;AA7BD,wBA6BC"}
1
+ {"version":3,"file":"preset.js","sourceRoot":"","sources":["../src/bridging/preset.ts"],"names":[],"mappings":";;;AAIA,MAAa,MAAM;IAGf,YAAoB,UAAsB;QACtC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IACjC,CAAC;IAED,MAAM,CAAC,OAAO;QACV,MAAM,UAAU,GAAe,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QACnD,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,CAAC,cAAc,CAAC,OAA8B;QAChD,MAAM,yBAAyB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;QACnD,MAAM,UAAU,GAAe;YAC3B,IAAI,EAAE,gBAAgB;YACtB,OAAO,EAAE,yBAAyB;SACrC,CAAC;QACF,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,CAAC,aAAa;QAChB,MAAM,UAAU,GAAe,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC;QACzD,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,CAAC,eAAe,CAAC,OAAe,EAAE,wBAAgC;QACpE,MAAM,UAAU,GAAe;YAC3B,IAAI,EAAE,iBAAiB;YACvB,OAAO;YACP,wBAAwB;SAC3B,CAAC;QACF,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,CAAC,IAAI;QACP,MAAM,UAAU,GAAe,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;QAChD,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM;QACF,OAAO,IAAI,CAAC,UAAU,CAAC;IAC3B,CAAC;CACJ;AA3CD,wBA2CC"}
@@ -18,6 +18,18 @@ export class Preset {
18
18
  const napiPreset = { type: 'Summarization' };
19
19
  return new Preset(napiPreset);
20
20
  }
21
+ static nGramSpeculator(useCase, numberOfSpeculatedTokens) {
22
+ const napiPreset = {
23
+ type: 'NGramSpeculator',
24
+ useCase,
25
+ numberOfSpeculatedTokens,
26
+ };
27
+ return new Preset(napiPreset);
28
+ }
29
+ static chat() {
30
+ const napiPreset = { type: 'Chat' };
31
+ return new Preset(napiPreset);
32
+ }
21
33
  toNapi() {
22
34
  return this.napiPreset;
23
35
  }
@@ -1 +1 @@
1
- {"version":3,"file":"preset.mjs","sourceRoot":"","sources":["../src/bridging/preset.ts"],"names":[],"mappings":"AAIA,MAAM,OAAO,MAAM;IAGf,YAAoB,UAAsB;QACtC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IACjC,CAAC;IAED,MAAM,CAAC,OAAO;QACV,MAAM,UAAU,GAAe,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QACnD,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,CAAC,cAAc,CAAC,OAA8B;QAChD,MAAM,yBAAyB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;QACnD,MAAM,UAAU,GAAe;YAC3B,IAAI,EAAE,gBAAgB;YACtB,OAAO,EAAE,yBAAyB;SACrC,CAAC;QACF,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,CAAC,aAAa;QAChB,MAAM,UAAU,GAAe,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC;QACzD,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM;QACF,OAAO,IAAI,CAAC,UAAU,CAAC;IAC3B,CAAC;CACJ"}
1
+ {"version":3,"file":"preset.mjs","sourceRoot":"","sources":["../src/bridging/preset.ts"],"names":[],"mappings":"AAIA,MAAM,OAAO,MAAM;IAGf,YAAoB,UAAsB;QACtC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IACjC,CAAC;IAED,MAAM,CAAC,OAAO;QACV,MAAM,UAAU,GAAe,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QACnD,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,CAAC,cAAc,CAAC,OAA8B;QAChD,MAAM,yBAAyB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;QACnD,MAAM,UAAU,GAAe;YAC3B,IAAI,EAAE,gBAAgB;YACtB,OAAO,EAAE,yBAAyB;SACrC,CAAC;QACF,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,CAAC,aAAa;QAChB,MAAM,UAAU,GAAe,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC;QACzD,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,CAAC,eAAe,CAAC,OAAe,EAAE,wBAAgC;QACpE,MAAM,UAAU,GAAe;YAC3B,IAAI,EAAE,iBAAiB;YACvB,OAAO;YACP,wBAAwB;SAC3B,CAAC;QACF,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,CAAC,IAAI;QACP,MAAM,UAAU,GAAe,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;QAChD,OAAO,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM;QACF,OAAO,IAAI,CAAC,UAAU,CAAC;IAC3B,CAAC;CACJ"}
package/napi/uzu.d.ts CHANGED
@@ -10,7 +10,7 @@ export declare class Engine {
10
10
  getChatModels(types: Array<ModelType>): Promise<Array<ChatModel>>
11
11
  getModelDownloadState(repoId: string): ModelDownloadState
12
12
  createModelDownloadHandle(repoId: string): ModelDownloadHandle
13
- benchmark(task: BenchmarksTask): Promise<Array<BenchmarksResult>>
13
+ benchmark(task: BenchmarksTask, prefillStepSize?: number | undefined | null): Promise<Array<BenchmarksResult>>
14
14
  constructor()
15
15
  createChatSession(model: ChatModel, config: Config): ChatSession
16
16
  registerLicenseStatusHandler(callback?: ((arg: LicenseStatus) => void) | undefined | null): void
@@ -203,6 +203,8 @@ export type Preset =
203
203
  | { type: 'General' }
204
204
  | { type: 'Classification', feature: ClassificationFeature }
205
205
  | { type: 'Summarization' }
206
+ | { type: 'NGramSpeculator', useCase: string, numberOfSpeculatedTokens: number }
207
+ | { type: 'Chat' }
206
208
 
207
209
  export declare const enum Role {
208
210
  System = 0,
package/napi/uzu.node CHANGED
Binary file
package/package.json CHANGED
@@ -101,5 +101,5 @@
101
101
  },
102
102
  "type": "commonjs",
103
103
  "types": "./index.d.ts",
104
- "version": "0.2.6"
104
+ "version": "0.2.20"
105
105
  }
@@ -28,6 +28,20 @@ export class Preset implements ToNapi<NapiPreset> {
28
28
  return new Preset(napiPreset);
29
29
  }
30
30
 
31
+ static nGramSpeculator(useCase: string, numberOfSpeculatedTokens: number): Preset {
32
+ const napiPreset: NapiPreset = {
33
+ type: 'NGramSpeculator',
34
+ useCase,
35
+ numberOfSpeculatedTokens,
36
+ };
37
+ return new Preset(napiPreset);
38
+ }
39
+
40
+ static chat(): Preset {
41
+ const napiPreset: NapiPreset = { type: 'Chat' };
42
+ return new Preset(napiPreset);
43
+ }
44
+
31
45
  toNapi(): NapiPreset {
32
46
  return this.napiPreset;
33
47
  }
package/src/napi/uzu.d.ts CHANGED
@@ -10,7 +10,7 @@ export declare class Engine {
10
10
  getChatModels(types: Array<ModelType>): Promise<Array<ChatModel>>
11
11
  getModelDownloadState(repoId: string): ModelDownloadState
12
12
  createModelDownloadHandle(repoId: string): ModelDownloadHandle
13
- benchmark(task: BenchmarksTask): Promise<Array<BenchmarksResult>>
13
+ benchmark(task: BenchmarksTask, prefillStepSize?: number | undefined | null): Promise<Array<BenchmarksResult>>
14
14
  constructor()
15
15
  createChatSession(model: ChatModel, config: Config): ChatSession
16
16
  registerLicenseStatusHandler(callback?: ((arg: LicenseStatus) => void) | undefined | null): void
@@ -203,6 +203,8 @@ export type Preset =
203
203
  | { type: 'General' }
204
204
  | { type: 'Classification', feature: ClassificationFeature }
205
205
  | { type: 'Summarization' }
206
+ | { type: 'NGramSpeculator', useCase: string, numberOfSpeculatedTokens: number }
207
+ | { type: 'Chat' }
206
208
 
207
209
  export declare const enum Role {
208
210
  System = 0,
package/src/napi/uzu.node CHANGED
Binary file
package/src/version.ts CHANGED
@@ -1 +1 @@
1
- export const VERSION = '0.2.6';
1
+ export const VERSION = '0.2.20';
package/version.d.mts CHANGED
@@ -1,2 +1,2 @@
1
- export declare const VERSION = "0.2.6";
1
+ export declare const VERSION = "0.2.20";
2
2
  //# sourceMappingURL=version.d.mts.map
package/version.d.mts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"version.d.mts","sourceRoot":"","sources":["src/version.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,OAAO,UAAU,CAAC"}
1
+ {"version":3,"file":"version.d.mts","sourceRoot":"","sources":["src/version.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,OAAO,WAAW,CAAC"}
package/version.d.ts CHANGED
@@ -1,2 +1,2 @@
1
- export declare const VERSION = "0.2.6";
1
+ export declare const VERSION = "0.2.20";
2
2
  //# sourceMappingURL=version.d.ts.map
package/version.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"version.d.ts","sourceRoot":"","sources":["src/version.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,OAAO,UAAU,CAAC"}
1
+ {"version":3,"file":"version.d.ts","sourceRoot":"","sources":["src/version.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,OAAO,WAAW,CAAC"}
package/version.js CHANGED
@@ -1,5 +1,5 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.VERSION = void 0;
4
- exports.VERSION = '0.2.6';
4
+ exports.VERSION = '0.2.20';
5
5
  //# sourceMappingURL=version.js.map
package/version.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"version.js","sourceRoot":"","sources":["src/version.ts"],"names":[],"mappings":";;;AAAa,QAAA,OAAO,GAAG,OAAO,CAAC"}
1
+ {"version":3,"file":"version.js","sourceRoot":"","sources":["src/version.ts"],"names":[],"mappings":";;;AAAa,QAAA,OAAO,GAAG,QAAQ,CAAC"}
package/version.mjs CHANGED
@@ -1,2 +1,2 @@
1
- export const VERSION = '0.2.6';
1
+ export const VERSION = '0.2.20';
2
2
  //# sourceMappingURL=version.mjs.map
package/version.mjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"version.mjs","sourceRoot":"","sources":["src/version.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,OAAO,GAAG,OAAO,CAAC"}
1
+ {"version":3,"file":"version.mjs","sourceRoot":"","sources":["src/version.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,OAAO,GAAG,QAAQ,CAAC"}