@fugood/bricks-project 2.22.0-beta.9 → 2.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/compile/action-name-map.ts +108 -1
- package/compile/index.ts +10 -1
- package/package.json +3 -3
- package/tools/postinstall.ts +16 -9
- package/types/animation.ts +2 -1
- package/types/brick-base.ts +79 -0
- package/types/bricks/3DViewer.ts +200 -0
- package/types/bricks/Camera.ts +195 -0
- package/types/bricks/Chart.ts +362 -0
- package/types/bricks/GenerativeMedia.ts +240 -0
- package/types/bricks/Icon.ts +93 -0
- package/types/bricks/Image.ts +104 -0
- package/types/bricks/Items.ts +461 -0
- package/types/bricks/Lottie.ts +159 -0
- package/types/bricks/QrCode.ts +112 -0
- package/types/bricks/Rect.ts +110 -0
- package/types/bricks/RichText.ts +123 -0
- package/types/bricks/Rive.ts +209 -0
- package/types/bricks/Slideshow.ts +155 -0
- package/types/bricks/Svg.ts +94 -0
- package/types/bricks/Text.ts +143 -0
- package/types/bricks/TextInput.ts +231 -0
- package/types/bricks/Video.ts +170 -0
- package/types/bricks/VideoStreaming.ts +107 -0
- package/types/bricks/WebRtcStream.ts +60 -0
- package/types/bricks/WebView.ts +157 -0
- package/types/bricks/index.ts +20 -0
- package/types/common.ts +8 -3
- package/types/data.ts +6 -0
- package/types/generators/AlarmClock.ts +102 -0
- package/types/generators/Assistant.ts +546 -0
- package/types/generators/BleCentral.ts +225 -0
- package/types/generators/BlePeripheral.ts +202 -0
- package/types/generators/CanvasMap.ts +57 -0
- package/types/generators/CastlesPay.ts +77 -0
- package/types/generators/DataBank.ts +123 -0
- package/types/generators/File.ts +351 -0
- package/types/generators/GraphQl.ts +124 -0
- package/types/generators/Http.ts +117 -0
- package/types/generators/HttpServer.ts +164 -0
- package/types/generators/Information.ts +97 -0
- package/types/generators/Intent.ts +107 -0
- package/types/generators/Iterator.ts +95 -0
- package/types/generators/Keyboard.ts +85 -0
- package/types/generators/LlmAnthropicCompat.ts +188 -0
- package/types/generators/LlmGgml.ts +719 -0
- package/types/generators/LlmOnnx.ts +184 -0
- package/types/generators/LlmOpenAiCompat.ts +206 -0
- package/types/generators/LlmQualcommAiEngine.ts +213 -0
- package/types/generators/Mcp.ts +294 -0
- package/types/generators/McpServer.ts +248 -0
- package/types/generators/MediaFlow.ts +142 -0
- package/types/generators/MqttBroker.ts +121 -0
- package/types/generators/MqttClient.ts +129 -0
- package/types/generators/Question.ts +395 -0
- package/types/generators/RealtimeTranscription.ts +180 -0
- package/types/generators/RerankerGgml.ts +153 -0
- package/types/generators/SerialPort.ts +141 -0
- package/types/generators/SoundPlayer.ts +86 -0
- package/types/generators/SoundRecorder.ts +113 -0
- package/types/generators/SpeechToTextGgml.ts +462 -0
- package/types/generators/SpeechToTextOnnx.ts +227 -0
- package/types/generators/SpeechToTextPlatform.ts +75 -0
- package/types/generators/SqLite.ts +118 -0
- package/types/generators/Step.ts +101 -0
- package/types/generators/TapToPayOnIPhone.ts +175 -0
- package/types/generators/Tcp.ts +120 -0
- package/types/generators/TcpServer.ts +137 -0
- package/types/generators/TextToSpeechGgml.ts +182 -0
- package/types/generators/TextToSpeechOnnx.ts +169 -0
- package/types/generators/TextToSpeechOpenAiLike.ts +113 -0
- package/types/generators/ThermalPrinter.ts +185 -0
- package/types/generators/Tick.ts +75 -0
- package/types/generators/Udp.ts +109 -0
- package/types/generators/VadGgml.ts +211 -0
- package/types/generators/VectorStore.ts +223 -0
- package/types/generators/Watchdog.ts +96 -0
- package/types/generators/WebCrawler.ts +97 -0
- package/types/generators/WebRtc.ts +165 -0
- package/types/generators/WebSocket.ts +142 -0
- package/types/generators/index.ts +51 -0
- package/types/system.ts +64 -0
- package/utils/data.ts +45 -0
- package/utils/event-props.ts +89 -0
- package/types/bricks.ts +0 -3168
- package/types/generators.ts +0 -7633
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
import type { SwitchCondInnerStateCurrentCanvas, SwitchCondData, SwitchDef } from '../switch'
|
|
2
|
+
import type { Data, DataLink } from '../data'
|
|
3
|
+
import type {
|
|
4
|
+
Generator,
|
|
5
|
+
EventAction,
|
|
6
|
+
ActionWithDataParams,
|
|
7
|
+
ActionWithParams,
|
|
8
|
+
Action,
|
|
9
|
+
EventProperty,
|
|
10
|
+
} from '../common'
|
|
11
|
+
|
|
12
|
+
/* Load the model */
|
|
13
|
+
export type GeneratorSpeechInferenceActionLoadModel = Action & {
|
|
14
|
+
__actionName: 'GENERATOR_SPEECH_INFERENCE_LOAD_MODEL'
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/* Transcribe audio file. You can provide `File URL` property, if not provided, it will use the default `File URL` */
|
|
18
|
+
export type GeneratorSpeechInferenceActionTranscribeFile = ActionWithParams & {
|
|
19
|
+
__actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_FILE'
|
|
20
|
+
params?: Array<
|
|
21
|
+
| {
|
|
22
|
+
input: 'fileUrl'
|
|
23
|
+
value?: string | DataLink | EventProperty
|
|
24
|
+
mapping?: string
|
|
25
|
+
}
|
|
26
|
+
| {
|
|
27
|
+
input: 'prompt'
|
|
28
|
+
value?: string | DataLink | EventProperty
|
|
29
|
+
mapping?: string
|
|
30
|
+
}
|
|
31
|
+
| {
|
|
32
|
+
input: 'beamSize'
|
|
33
|
+
value?: number | DataLink | EventProperty
|
|
34
|
+
mapping?: string
|
|
35
|
+
}
|
|
36
|
+
| {
|
|
37
|
+
input: 'language'
|
|
38
|
+
value?: string | DataLink | EventProperty
|
|
39
|
+
mapping?: string
|
|
40
|
+
}
|
|
41
|
+
| {
|
|
42
|
+
input: 'translate'
|
|
43
|
+
value?: boolean | DataLink | EventProperty
|
|
44
|
+
mapping?: string
|
|
45
|
+
}
|
|
46
|
+
>
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/* Transcribe audio data. Currently only support base64 encoded audio data (16-bit PCM, mono, 16kHz) */
|
|
50
|
+
export type GeneratorSpeechInferenceActionTranscribeData = ActionWithParams & {
|
|
51
|
+
__actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_DATA'
|
|
52
|
+
params?: Array<
|
|
53
|
+
| {
|
|
54
|
+
input: 'data'
|
|
55
|
+
value?: any | EventProperty
|
|
56
|
+
mapping?: string
|
|
57
|
+
}
|
|
58
|
+
| {
|
|
59
|
+
input: 'prompt'
|
|
60
|
+
value?: string | DataLink | EventProperty
|
|
61
|
+
mapping?: string
|
|
62
|
+
}
|
|
63
|
+
| {
|
|
64
|
+
input: 'beamSize'
|
|
65
|
+
value?: number | DataLink | EventProperty
|
|
66
|
+
mapping?: string
|
|
67
|
+
}
|
|
68
|
+
| {
|
|
69
|
+
input: 'language'
|
|
70
|
+
value?: string | DataLink | EventProperty
|
|
71
|
+
mapping?: string
|
|
72
|
+
}
|
|
73
|
+
| {
|
|
74
|
+
input: 'translate'
|
|
75
|
+
value?: boolean | DataLink | EventProperty
|
|
76
|
+
mapping?: string
|
|
77
|
+
}
|
|
78
|
+
>
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/* [Deprecated] Transcribe microphone audio source */
|
|
82
|
+
export type GeneratorSpeechInferenceActionTranscribeRealtime = ActionWithParams & {
|
|
83
|
+
__actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME'
|
|
84
|
+
params?: Array<
|
|
85
|
+
| {
|
|
86
|
+
input: 'prompt'
|
|
87
|
+
value?: string | DataLink | EventProperty
|
|
88
|
+
mapping?: string
|
|
89
|
+
}
|
|
90
|
+
| {
|
|
91
|
+
input: 'beamSize'
|
|
92
|
+
value?: number | DataLink | EventProperty
|
|
93
|
+
mapping?: string
|
|
94
|
+
}
|
|
95
|
+
| {
|
|
96
|
+
input: 'language'
|
|
97
|
+
value?: string | DataLink | EventProperty
|
|
98
|
+
mapping?: string
|
|
99
|
+
}
|
|
100
|
+
| {
|
|
101
|
+
input: 'translate'
|
|
102
|
+
value?: boolean | DataLink | EventProperty
|
|
103
|
+
mapping?: string
|
|
104
|
+
}
|
|
105
|
+
| {
|
|
106
|
+
input: 'realtimeAudioSeconds'
|
|
107
|
+
value?: number | DataLink | EventProperty
|
|
108
|
+
mapping?: string
|
|
109
|
+
}
|
|
110
|
+
| {
|
|
111
|
+
input: 'realtimeAudioSliceSeconds'
|
|
112
|
+
value?: number | DataLink | EventProperty
|
|
113
|
+
mapping?: string
|
|
114
|
+
}
|
|
115
|
+
| {
|
|
116
|
+
input: 'realtimeAudioMinSeconds'
|
|
117
|
+
value?: number | DataLink | EventProperty
|
|
118
|
+
mapping?: string
|
|
119
|
+
}
|
|
120
|
+
| {
|
|
121
|
+
input: 'realtimeSaveAudio'
|
|
122
|
+
value?: boolean | DataLink | EventProperty
|
|
123
|
+
mapping?: string
|
|
124
|
+
}
|
|
125
|
+
| {
|
|
126
|
+
input: 'realtimeVadEnabled'
|
|
127
|
+
value?: boolean | DataLink | EventProperty
|
|
128
|
+
mapping?: string
|
|
129
|
+
}
|
|
130
|
+
| {
|
|
131
|
+
input: 'realtimeVadMs'
|
|
132
|
+
value?: number | DataLink | EventProperty
|
|
133
|
+
mapping?: string
|
|
134
|
+
}
|
|
135
|
+
| {
|
|
136
|
+
input: 'realtimeVadThold'
|
|
137
|
+
value?: number | DataLink | EventProperty
|
|
138
|
+
mapping?: string
|
|
139
|
+
}
|
|
140
|
+
| {
|
|
141
|
+
input: 'realtimeVadFreqThold'
|
|
142
|
+
value?: number | DataLink | EventProperty
|
|
143
|
+
mapping?: string
|
|
144
|
+
}
|
|
145
|
+
>
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/* [Deprecated] Stop transcribing microphone audio source */
|
|
149
|
+
export type GeneratorSpeechInferenceActionTranscribeRealtimeStop = Action & {
|
|
150
|
+
__actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME_STOP'
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/* Clear downloaded files (model, audio) & current jobs */
|
|
154
|
+
export type GeneratorSpeechInferenceActionClearDownload = Action & {
|
|
155
|
+
__actionName: 'GENERATOR_SPEECH_INFERENCE_CLEAR_DOWNLOAD'
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/* Release context */
|
|
159
|
+
export type GeneratorSpeechInferenceActionReleaseContext = Action & {
|
|
160
|
+
__actionName: 'GENERATOR_SPEECH_INFERENCE_RELEASE_CONTEXT'
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
interface GeneratorSpeechInferenceDef {
|
|
164
|
+
/*
|
|
165
|
+
Default property:
|
|
166
|
+
{
|
|
167
|
+
"init": false,
|
|
168
|
+
"accelVariant": "default",
|
|
169
|
+
"modelName": "base-q8_0",
|
|
170
|
+
"modelUseCoreML": false,
|
|
171
|
+
"modelUseGPU": true,
|
|
172
|
+
"modelUseFlashAttn": false,
|
|
173
|
+
"inferLanguage": "Auto",
|
|
174
|
+
"inferRealtimeAudioSeconds": 30,
|
|
175
|
+
"inferRealtimeAudioSliceSeconds": 30,
|
|
176
|
+
"inferRealtimeSaveAudio": false,
|
|
177
|
+
"inferRealtimeVadEnabled": false,
|
|
178
|
+
"inferRealtimeVadMs": 2000,
|
|
179
|
+
"inferRealtimeVadThold": 0.6,
|
|
180
|
+
"inferRealtimeVadFreqThold": 100
|
|
181
|
+
}
|
|
182
|
+
*/
|
|
183
|
+
property?: {
|
|
184
|
+
/* Initialize the Whisper context on generator initialization
|
|
185
|
+
Please note that it will take some RAM depending on the model size */
|
|
186
|
+
init?: boolean | DataLink
|
|
187
|
+
/* Accelerator variant (Only for desktop)
|
|
188
|
+
`default` - CPU / Metal (macOS)
|
|
189
|
+
`vulkan` - Use Vulkan
|
|
190
|
+
`cuda` - Use CUDA */
|
|
191
|
+
accelVariant?: 'default' | 'vulkan' | 'cuda' | DataLink
|
|
192
|
+
/* Use model name, the model download progress will be done in preload stage or the generator initialization stage.
|
|
193
|
+
We used `ggml` format model, please refer to https://huggingface.co/BricksDisplay/whisper-ggml
|
|
194
|
+
You can also choose `custom` option and set `Model URL` and `Model MD5` to use your own model */
|
|
195
|
+
modelName?:
|
|
196
|
+
| 'custom'
|
|
197
|
+
| 'tiny'
|
|
198
|
+
| 'tiny-q5_1'
|
|
199
|
+
| 'tiny-q8_0'
|
|
200
|
+
| 'tiny.en'
|
|
201
|
+
| 'tiny.en-q5_1'
|
|
202
|
+
| 'tiny.en-q8_0'
|
|
203
|
+
| 'base'
|
|
204
|
+
| 'base-q5_1'
|
|
205
|
+
| 'base-q8_0'
|
|
206
|
+
| 'base.en'
|
|
207
|
+
| 'base.en-q5_1'
|
|
208
|
+
| 'base.en-q8_0'
|
|
209
|
+
| 'small'
|
|
210
|
+
| 'small-q5_1'
|
|
211
|
+
| 'small-q8_0'
|
|
212
|
+
| 'small.en'
|
|
213
|
+
| 'small.en-q5_1'
|
|
214
|
+
| 'small.en-q8_0'
|
|
215
|
+
| 'medium'
|
|
216
|
+
| 'medium-q5_0'
|
|
217
|
+
| 'medium-q8_0'
|
|
218
|
+
| 'medium.en'
|
|
219
|
+
| 'medium.en-q5_1'
|
|
220
|
+
| 'medium.en-q8_0'
|
|
221
|
+
| 'large-v1'
|
|
222
|
+
| 'large-v2'
|
|
223
|
+
| 'large-v2-q5_0'
|
|
224
|
+
| 'large-v2-q8_0'
|
|
225
|
+
| 'large-v3'
|
|
226
|
+
| 'large-v3-q5_0'
|
|
227
|
+
| 'large-v3-q8_0'
|
|
228
|
+
| 'large-v3-turbo'
|
|
229
|
+
| 'large-v3-turbo-q5_0'
|
|
230
|
+
| 'large-v3-turbo-q8_0'
|
|
231
|
+
| 'small.en-tdrz'
|
|
232
|
+
| 'small.en-tdrz-q5_0'
|
|
233
|
+
| 'small.en-tdrz-q8_0'
|
|
234
|
+
| 'distil-small.en'
|
|
235
|
+
| 'distil-small.en-q5_0'
|
|
236
|
+
| 'distil-small.en-q8_0'
|
|
237
|
+
| 'distil-medium.en'
|
|
238
|
+
| 'distil-medium.en-q5_0'
|
|
239
|
+
| 'distil-medium.en-q8_0'
|
|
240
|
+
| 'distil-large-v3'
|
|
241
|
+
| 'distil-large-v3-q5_0'
|
|
242
|
+
| 'distil-large-v3-q8_0'
|
|
243
|
+
| DataLink
|
|
244
|
+
/* The URL or path of model
|
|
245
|
+
We used `ggml` format model, please refer to https://github.com/ggerganov/whisper.cpp/tree/master/models */
|
|
246
|
+
modelUrl?: string | DataLink
|
|
247
|
+
/* Hash type of model */
|
|
248
|
+
modelHashType?: 'md5' | 'sha256' | 'sha1' | DataLink
|
|
249
|
+
/* Hash of model */
|
|
250
|
+
modelHash?: string | DataLink
|
|
251
|
+
/* [Unstable] iOS: Use CoreML model for inference */
|
|
252
|
+
modelUseCoreML?: boolean | DataLink
|
|
253
|
+
/* Use GPU Acceleration for inference. Currently iOS only, if it's enabled, Core ML option will be ignored. */
|
|
254
|
+
modelUseGPU?: boolean | DataLink
|
|
255
|
+
/* Use Flash Attention for inference (Recommended with GPU enabled) */
|
|
256
|
+
modelUseFlashAttn?: boolean | DataLink
|
|
257
|
+
/* The language of the file to be inferred */
|
|
258
|
+
inferLanguage?:
|
|
259
|
+
| 'Auto'
|
|
260
|
+
| 'English (en)'
|
|
261
|
+
| 'Chinese (zh)'
|
|
262
|
+
| 'German (de)'
|
|
263
|
+
| 'Spanish (es)'
|
|
264
|
+
| 'Russian (ru)'
|
|
265
|
+
| 'Korean (ko)'
|
|
266
|
+
| 'French (fr)'
|
|
267
|
+
| 'Japanese (ja)'
|
|
268
|
+
| 'Portuguese (pt)'
|
|
269
|
+
| 'Turkish (tr)'
|
|
270
|
+
| 'Polish (pl)'
|
|
271
|
+
| 'Catalan (ca)'
|
|
272
|
+
| 'Dutch (nl)'
|
|
273
|
+
| 'Arabic (ar)'
|
|
274
|
+
| 'Swedish (sv)'
|
|
275
|
+
| 'Italian (it)'
|
|
276
|
+
| 'Indonesian (id)'
|
|
277
|
+
| 'Hindi (hi)'
|
|
278
|
+
| 'Finnish (fi)'
|
|
279
|
+
| 'Vietnamese (vi)'
|
|
280
|
+
| 'Hebrew (he)'
|
|
281
|
+
| 'Ukrainian (uk)'
|
|
282
|
+
| 'Greek (el)'
|
|
283
|
+
| 'Malay (ms)'
|
|
284
|
+
| 'Czech (cs)'
|
|
285
|
+
| 'Romanian (ro)'
|
|
286
|
+
| 'Danish (da)'
|
|
287
|
+
| 'Hungarian (hu)'
|
|
288
|
+
| 'Tamil (ta)'
|
|
289
|
+
| 'Norwegian (no)'
|
|
290
|
+
| 'Thai (th)'
|
|
291
|
+
| 'Urdu (ur)'
|
|
292
|
+
| 'Croatian (hr)'
|
|
293
|
+
| 'Bulgarian (bg)'
|
|
294
|
+
| 'Lithuanian (lt)'
|
|
295
|
+
| 'Latin (la)'
|
|
296
|
+
| 'Maori (mi)'
|
|
297
|
+
| 'Malayalam (ml)'
|
|
298
|
+
| 'Welsh (cy)'
|
|
299
|
+
| 'Slovak (sk)'
|
|
300
|
+
| 'Telugu (te)'
|
|
301
|
+
| 'Persian (fa)'
|
|
302
|
+
| 'Latvian (lv)'
|
|
303
|
+
| 'Bengali (bn)'
|
|
304
|
+
| 'Serbian (sr)'
|
|
305
|
+
| 'Azerbaijani (az)'
|
|
306
|
+
| 'Slovenian (sl)'
|
|
307
|
+
| 'Kannada (kn)'
|
|
308
|
+
| 'Estonian (et)'
|
|
309
|
+
| 'Macedonian (mk)'
|
|
310
|
+
| 'Breton (br)'
|
|
311
|
+
| 'Basque (eu)'
|
|
312
|
+
| 'Icelandic (is)'
|
|
313
|
+
| 'Armenian (hy)'
|
|
314
|
+
| 'Nepali (ne)'
|
|
315
|
+
| 'Mongolian (mn)'
|
|
316
|
+
| 'Bosnian (bs)'
|
|
317
|
+
| 'Kazakh (kk)'
|
|
318
|
+
| 'Albanian (sq)'
|
|
319
|
+
| 'Swahili (sw)'
|
|
320
|
+
| 'Galician (gl)'
|
|
321
|
+
| 'Marathi (mr)'
|
|
322
|
+
| 'Punjabi (pa)'
|
|
323
|
+
| 'Sinhala (si)'
|
|
324
|
+
| 'Khmer (km)'
|
|
325
|
+
| 'Shona (sn)'
|
|
326
|
+
| 'Yoruba (yo)'
|
|
327
|
+
| 'Somali (so)'
|
|
328
|
+
| 'Afrikaans (af)'
|
|
329
|
+
| 'Occitan (oc)'
|
|
330
|
+
| 'Georgian (ka)'
|
|
331
|
+
| 'Belarusian (be)'
|
|
332
|
+
| 'Tajik (tg)'
|
|
333
|
+
| 'Sindhi (sd)'
|
|
334
|
+
| 'Gujarati (gu)'
|
|
335
|
+
| 'Amharic (am)'
|
|
336
|
+
| 'Yiddish (yi)'
|
|
337
|
+
| 'Lao (lo)'
|
|
338
|
+
| 'Uzbek (uz)'
|
|
339
|
+
| 'Faroese (fo)'
|
|
340
|
+
| 'Haitian Creole (ht)'
|
|
341
|
+
| 'Pashto (ps)'
|
|
342
|
+
| 'Turkmen (tk)'
|
|
343
|
+
| 'Nynorsk (nn)'
|
|
344
|
+
| 'Maltese (mt)'
|
|
345
|
+
| 'Sanskrit (sa)'
|
|
346
|
+
| 'Luxembourgish (lb)'
|
|
347
|
+
| 'Myanmar (my)'
|
|
348
|
+
| 'Tibetan (bo)'
|
|
349
|
+
| 'Tagalog (tl)'
|
|
350
|
+
| 'Malagasy (mg)'
|
|
351
|
+
| 'Assamese (as)'
|
|
352
|
+
| 'Tatar (tt)'
|
|
353
|
+
| 'Hawaiian (haw)'
|
|
354
|
+
| 'Lingala (ln)'
|
|
355
|
+
| 'Hausa (ha)'
|
|
356
|
+
| 'Bashkir (ba)'
|
|
357
|
+
| 'Javanese (jw)'
|
|
358
|
+
| 'Sundanese (su)'
|
|
359
|
+
| DataLink
|
|
360
|
+
/* Initial prompt text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. */
|
|
361
|
+
inferPrompt?: string | DataLink
|
|
362
|
+
/* Beam size to use for beam search (enables beam search if set) */
|
|
363
|
+
inferBeamSize?: number | DataLink
|
|
364
|
+
/* Translate the result to English */
|
|
365
|
+
inferTranslate?: boolean | DataLink
|
|
366
|
+
/* Max threads to use for inference */
|
|
367
|
+
inferMaxThreads?: number | DataLink
|
|
368
|
+
/* Output token-level timestamps in details outlet */
|
|
369
|
+
inferTokenTimestamps?: boolean | DataLink
|
|
370
|
+
/* Speaker diarization (Please use small.en-tdrz model) */
|
|
371
|
+
inferTdrz?: boolean | DataLink
|
|
372
|
+
/* Maximum segment length in characters */
|
|
373
|
+
inferMaxLength?: number | DataLink
|
|
374
|
+
/* Audio time offset in milliseconds */
|
|
375
|
+
inferOffset?: number | DataLink
|
|
376
|
+
/* Audio duration of audio to process in milliseconds */
|
|
377
|
+
inferDuration?: number | DataLink
|
|
378
|
+
/* The file URL or path to be inferred.
|
|
379
|
+
It only supported `wav` format with 16kHz sample rate & single (mono) channel */
|
|
380
|
+
inferFileUrl?: string | DataLink
|
|
381
|
+
/* MD5 of file to be inferred */
|
|
382
|
+
inferFileMd5?: string | DataLink
|
|
383
|
+
/* Record duration in seconds. For performance, the value less than 30 seconds is recommended. */
|
|
384
|
+
inferRealtimeAudioSeconds?: number | DataLink
|
|
385
|
+
/* Optimize audio transcription performance by slicing audio samples when `Realtime Audio Seconds` > 30. */
|
|
386
|
+
inferRealtimeAudioSliceSeconds?: number | DataLink
|
|
387
|
+
/* Min duration of audio to start transcribe. Min: 0.5 seconds, Max: `Realtime Audio Slice Seconds`, Default: 1 second */
|
|
388
|
+
inferRealtimeAudioMinSeconds?: number | DataLink
|
|
389
|
+
/* Save recorded audio to file, the file path will be output to outlet. */
|
|
390
|
+
inferRealtimeSaveAudio?: boolean | DataLink
|
|
391
|
+
/* Start transcribe on recording when the audio volume is greater than the threshold by using VAD (Voice Activity Detection).
|
|
392
|
+
The first VAD will be triggered after 2 second of recording. */
|
|
393
|
+
inferRealtimeVadEnabled?: boolean | DataLink
|
|
394
|
+
/* The length of the collected audio is used for VAD. (ms) */
|
|
395
|
+
inferRealtimeVadMs?: number | DataLink
|
|
396
|
+
/* VAD threshold */
|
|
397
|
+
inferRealtimeVadThold?: number | DataLink
|
|
398
|
+
/* Frequency to apply High-pass filter in VAD */
|
|
399
|
+
inferRealtimeVadFreqThold?: number | DataLink
|
|
400
|
+
}
|
|
401
|
+
events?: {
|
|
402
|
+
/* Event triggered when context state changes */
|
|
403
|
+
onContextStateChange?: Array<EventAction>
|
|
404
|
+
/* Event triggered when error occurs */
|
|
405
|
+
onError?: Array<EventAction>
|
|
406
|
+
/* Event triggered when got transcribe result */
|
|
407
|
+
onTranscribed?: Array<EventAction>
|
|
408
|
+
/* Event triggered when transcribe realtime done */
|
|
409
|
+
onRealtimeStop?: Array<EventAction>
|
|
410
|
+
}
|
|
411
|
+
outlets?: {
|
|
412
|
+
/* Context state */
|
|
413
|
+
contextState?: () => Data
|
|
414
|
+
/* Context details */
|
|
415
|
+
contextDetails?: () => Data
|
|
416
|
+
/* Is transcribing */
|
|
417
|
+
isTranscribing?: () => Data
|
|
418
|
+
/* Progress of transcribe audio (0-100) */
|
|
419
|
+
transcribeProgress?: () => Data
|
|
420
|
+
/* Inference result */
|
|
421
|
+
transcribeResult?: () => Data
|
|
422
|
+
/* Inference result details */
|
|
423
|
+
transcribeDetails?: () => Data
|
|
424
|
+
/* Recorded audio file path of transcribe realtime (if `Save Audio` is enabled) */
|
|
425
|
+
recordedPath?: () => Data
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
/* Local Speech-to-Text (STT) inference based on GGML and [whisper.cpp](https://github.com/ggerganov/whisper.cpp)
|
|
430
|
+
|
|
431
|
+
## Notice
|
|
432
|
+
- iOS: Supported GPU acceleration, recommended use M1+ / A17+ chip device
|
|
433
|
+
- macOS: Supported GPU acceleration, recommended use M1+ chip device
|
|
434
|
+
- Android: Currently not supported GPU acceleration (Coming soon), recommended use Android 13+ system
|
|
435
|
+
- Linux / Windows: Supported GPU acceleration, you can choose `vulkan` or `cuda` backend in Accel Variant property */
|
|
436
|
+
export type GeneratorSpeechInference = Generator &
|
|
437
|
+
GeneratorSpeechInferenceDef & {
|
|
438
|
+
templateKey: 'GENERATOR_SPEECH_INFERENCE'
|
|
439
|
+
switches: Array<
|
|
440
|
+
SwitchDef &
|
|
441
|
+
GeneratorSpeechInferenceDef & {
|
|
442
|
+
conds?: Array<{
|
|
443
|
+
method: '==' | '!=' | '>' | '<' | '>=' | '<='
|
|
444
|
+
cond:
|
|
445
|
+
| SwitchCondInnerStateCurrentCanvas
|
|
446
|
+
| SwitchCondData
|
|
447
|
+
| {
|
|
448
|
+
__typename: 'SwitchCondInnerStateOutlet'
|
|
449
|
+
outlet:
|
|
450
|
+
| 'contextState'
|
|
451
|
+
| 'contextDetails'
|
|
452
|
+
| 'isTranscribing'
|
|
453
|
+
| 'transcribeProgress'
|
|
454
|
+
| 'transcribeResult'
|
|
455
|
+
| 'transcribeDetails'
|
|
456
|
+
| 'recordedPath'
|
|
457
|
+
value: any
|
|
458
|
+
}
|
|
459
|
+
}>
|
|
460
|
+
}
|
|
461
|
+
>
|
|
462
|
+
}
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import type { SwitchCondInnerStateCurrentCanvas, SwitchCondData, SwitchDef } from '../switch'
|
|
2
|
+
import type { Data, DataLink } from '../data'
|
|
3
|
+
import type {
|
|
4
|
+
Generator,
|
|
5
|
+
EventAction,
|
|
6
|
+
ActionWithDataParams,
|
|
7
|
+
ActionWithParams,
|
|
8
|
+
Action,
|
|
9
|
+
EventProperty,
|
|
10
|
+
} from '../common'
|
|
11
|
+
|
|
12
|
+
/* Load the model */
|
|
13
|
+
export type GeneratorOnnxSTTActionLoadModel = Action & {
|
|
14
|
+
__actionName: 'GENERATOR_ONNX_STT_LOAD_MODEL'
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/* Inference */
|
|
18
|
+
export type GeneratorOnnxSTTActionInfer = ActionWithParams & {
|
|
19
|
+
__actionName: 'GENERATOR_ONNX_STT_INFER'
|
|
20
|
+
params?: Array<{
|
|
21
|
+
input: 'audioUri'
|
|
22
|
+
value?: string | DataLink | EventProperty
|
|
23
|
+
mapping?: string
|
|
24
|
+
}>
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/* Clean cache */
|
|
28
|
+
export type GeneratorOnnxSTTActionCleanCache = Action & {
|
|
29
|
+
__actionName: 'GENERATOR_ONNX_STT_CLEAN_CACHE'
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/* Release context */
|
|
33
|
+
export type GeneratorOnnxSTTActionReleaseContext = Action & {
|
|
34
|
+
__actionName: 'GENERATOR_ONNX_STT_RELEASE_CONTEXT'
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
interface GeneratorOnnxSTTDef {
|
|
38
|
+
/*
|
|
39
|
+
Default property:
|
|
40
|
+
{
|
|
41
|
+
"model": "whisper-base",
|
|
42
|
+
"modelType": "auto",
|
|
43
|
+
"returnTimestamps": "none",
|
|
44
|
+
"executionMode": "sequential"
|
|
45
|
+
}
|
|
46
|
+
*/
|
|
47
|
+
property?: {
|
|
48
|
+
/* Initialize the TTS context on generator initialization */
|
|
49
|
+
init?: boolean | DataLink
|
|
50
|
+
/* STT model */
|
|
51
|
+
model?: string | DataLink
|
|
52
|
+
/* Model type */
|
|
53
|
+
modelType?: string | DataLink
|
|
54
|
+
/* Quantize type */
|
|
55
|
+
quantizeType?:
|
|
56
|
+
| 'auto'
|
|
57
|
+
| 'none'
|
|
58
|
+
| 'fp16'
|
|
59
|
+
| 'q8'
|
|
60
|
+
| 'int8'
|
|
61
|
+
| 'uint8'
|
|
62
|
+
| 'q4'
|
|
63
|
+
| 'bnb4'
|
|
64
|
+
| 'q4f16'
|
|
65
|
+
| DataLink
|
|
66
|
+
/* Return timestamps */
|
|
67
|
+
returnTimestamps?: 'none' | 'enable' | 'word' | DataLink
|
|
68
|
+
/* Transcription language
|
|
69
|
+
Not specifying the language will auto detect the language. */
|
|
70
|
+
language?:
|
|
71
|
+
| 'English'
|
|
72
|
+
| 'Chinese'
|
|
73
|
+
| 'German'
|
|
74
|
+
| 'Spanish'
|
|
75
|
+
| 'Russian'
|
|
76
|
+
| 'Korean'
|
|
77
|
+
| 'French'
|
|
78
|
+
| 'Japanese'
|
|
79
|
+
| 'Portuguese'
|
|
80
|
+
| 'Turkish'
|
|
81
|
+
| 'Polish'
|
|
82
|
+
| 'Catalan'
|
|
83
|
+
| 'Dutch'
|
|
84
|
+
| 'Arabic'
|
|
85
|
+
| 'Swedish'
|
|
86
|
+
| 'Italian'
|
|
87
|
+
| 'Indonesian'
|
|
88
|
+
| 'Hindi'
|
|
89
|
+
| 'Finnish'
|
|
90
|
+
| 'Vietnamese'
|
|
91
|
+
| 'Hebrew'
|
|
92
|
+
| 'Ukrainian'
|
|
93
|
+
| 'Greek'
|
|
94
|
+
| 'Malay'
|
|
95
|
+
| 'Czech'
|
|
96
|
+
| 'Romanian'
|
|
97
|
+
| 'Danish'
|
|
98
|
+
| 'Hungarian'
|
|
99
|
+
| 'Tamil'
|
|
100
|
+
| 'Norwegian'
|
|
101
|
+
| 'Thai'
|
|
102
|
+
| 'Urdu'
|
|
103
|
+
| 'Croatian'
|
|
104
|
+
| 'Bulgarian'
|
|
105
|
+
| 'Lithuanian'
|
|
106
|
+
| 'Latin'
|
|
107
|
+
| 'Maori'
|
|
108
|
+
| 'Malayalam'
|
|
109
|
+
| 'Welsh'
|
|
110
|
+
| 'Slovak'
|
|
111
|
+
| 'Telugu'
|
|
112
|
+
| 'Persian'
|
|
113
|
+
| 'Latvian'
|
|
114
|
+
| 'Bengali'
|
|
115
|
+
| 'Serbian'
|
|
116
|
+
| 'Azerbaijani'
|
|
117
|
+
| 'Slovenian'
|
|
118
|
+
| 'Kannada'
|
|
119
|
+
| 'Estonian'
|
|
120
|
+
| 'Macedonian'
|
|
121
|
+
| 'Breton'
|
|
122
|
+
| 'Basque'
|
|
123
|
+
| 'Icelandic'
|
|
124
|
+
| 'Armenian'
|
|
125
|
+
| 'Nepali'
|
|
126
|
+
| 'Mongolian'
|
|
127
|
+
| 'Bosnian'
|
|
128
|
+
| 'Kazakh'
|
|
129
|
+
| 'Albanian'
|
|
130
|
+
| 'Swahili'
|
|
131
|
+
| 'Galician'
|
|
132
|
+
| 'Marathi'
|
|
133
|
+
| 'Punjabi'
|
|
134
|
+
| 'Sinhala'
|
|
135
|
+
| 'Khmer'
|
|
136
|
+
| 'Shona'
|
|
137
|
+
| 'Yoruba'
|
|
138
|
+
| 'Somali'
|
|
139
|
+
| 'Afrikaans'
|
|
140
|
+
| 'Occitan'
|
|
141
|
+
| 'Georgian'
|
|
142
|
+
| 'Belarusian'
|
|
143
|
+
| 'Tajik'
|
|
144
|
+
| 'Sindhi'
|
|
145
|
+
| 'Gujarati'
|
|
146
|
+
| 'Amharic'
|
|
147
|
+
| 'Yiddish'
|
|
148
|
+
| 'Lao'
|
|
149
|
+
| 'Uzbek'
|
|
150
|
+
| 'Faroese'
|
|
151
|
+
| 'Haitian Creole'
|
|
152
|
+
| 'Pashto'
|
|
153
|
+
| 'Turkmen'
|
|
154
|
+
| 'Nynorsk'
|
|
155
|
+
| 'Maltese'
|
|
156
|
+
| 'Sanskrit'
|
|
157
|
+
| 'Luxembourgish'
|
|
158
|
+
| 'Myanmar'
|
|
159
|
+
| 'Tibetan'
|
|
160
|
+
| 'Tagalog'
|
|
161
|
+
| 'Malagasy'
|
|
162
|
+
| 'Assamese'
|
|
163
|
+
| 'Tatar'
|
|
164
|
+
| 'Hawaiian '
|
|
165
|
+
| 'Lingala'
|
|
166
|
+
| 'Hausa'
|
|
167
|
+
| 'Bashkir'
|
|
168
|
+
| 'Javanese'
|
|
169
|
+
| 'Sundanese'
|
|
170
|
+
| DataLink
|
|
171
|
+
/* Task */
|
|
172
|
+
task?: 'transcribe' | 'translate' | DataLink
|
|
173
|
+
/* Inferencing chunk length */
|
|
174
|
+
chunkLength?: number | DataLink
|
|
175
|
+
/* Executor candidates, descending order of priority
|
|
176
|
+
Default will be xnnpack, wasm, cpu */
|
|
177
|
+
executors?:
|
|
178
|
+
| Array<'qnn' | 'dml' | 'nnapi' | 'xnnpack' | 'coreml' | 'cpu' | 'wasm' | 'webgpu' | DataLink>
|
|
179
|
+
| DataLink
|
|
180
|
+
/* Execution mode
|
|
181
|
+
Usually when the model has many branches, setting this option to `parallel` will give you better performance. */
|
|
182
|
+
executionMode?: 'sequential' | 'parallel' | DataLink
|
|
183
|
+
/* QNN backend */
|
|
184
|
+
qnnBackend?: 'HTP' | 'HTA' | 'DSP' | 'GPU' | 'CPU' | DataLink
|
|
185
|
+
/* Enable FP16 for QNN HTP */
|
|
186
|
+
qnnHtpEnableFp16?: boolean | DataLink
|
|
187
|
+
/* Enable QNN debug */
|
|
188
|
+
qnnEnableDebug?: boolean | DataLink
|
|
189
|
+
}
|
|
190
|
+
events?: {
|
|
191
|
+
/* Event triggered when state change */
|
|
192
|
+
onContextStateChange?: Array<EventAction>
|
|
193
|
+
/* Event triggered when error occurs */
|
|
194
|
+
onError?: Array<EventAction>
|
|
195
|
+
}
|
|
196
|
+
outlets?: {
|
|
197
|
+
/* Context state */
|
|
198
|
+
contextState?: () => Data
|
|
199
|
+
/* Inference result */
|
|
200
|
+
result?: () => Data
|
|
201
|
+
/* Inference result detail */
|
|
202
|
+
resultDetail?: () => Data
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/* Local STT inference based on [transformers.js](https://huggingface.co/docs/transformers.js)
|
|
207
|
+
You can use any converted model on HuggingFace. */
|
|
208
|
+
export type GeneratorOnnxSTT = Generator &
|
|
209
|
+
GeneratorOnnxSTTDef & {
|
|
210
|
+
templateKey: 'GENERATOR_ONNX_STT'
|
|
211
|
+
switches: Array<
|
|
212
|
+
SwitchDef &
|
|
213
|
+
GeneratorOnnxSTTDef & {
|
|
214
|
+
conds?: Array<{
|
|
215
|
+
method: '==' | '!=' | '>' | '<' | '>=' | '<='
|
|
216
|
+
cond:
|
|
217
|
+
| SwitchCondInnerStateCurrentCanvas
|
|
218
|
+
| SwitchCondData
|
|
219
|
+
| {
|
|
220
|
+
__typename: 'SwitchCondInnerStateOutlet'
|
|
221
|
+
outlet: 'contextState' | 'result' | 'resultDetail'
|
|
222
|
+
value: any
|
|
223
|
+
}
|
|
224
|
+
}>
|
|
225
|
+
}
|
|
226
|
+
>
|
|
227
|
+
}
|