@fugood/bricks-project 2.22.0-beta.9 → 2.22.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/compile/action-name-map.ts +108 -17
  2. package/compile/index.ts +10 -1
  3. package/package.json +3 -4
  4. package/tools/postinstall.ts +16 -9
  5. package/types/animation.ts +2 -1
  6. package/types/brick-base.ts +79 -0
  7. package/types/bricks/3DViewer.ts +200 -0
  8. package/types/bricks/Camera.ts +195 -0
  9. package/types/bricks/Chart.ts +362 -0
  10. package/types/bricks/GenerativeMedia.ts +240 -0
  11. package/types/bricks/Icon.ts +93 -0
  12. package/types/bricks/Image.ts +104 -0
  13. package/types/bricks/Items.ts +461 -0
  14. package/types/bricks/Lottie.ts +159 -0
  15. package/types/bricks/QrCode.ts +112 -0
  16. package/types/bricks/Rect.ts +110 -0
  17. package/types/bricks/RichText.ts +123 -0
  18. package/types/bricks/Rive.ts +209 -0
  19. package/types/bricks/Slideshow.ts +155 -0
  20. package/types/bricks/Svg.ts +94 -0
  21. package/types/bricks/Text.ts +143 -0
  22. package/types/bricks/TextInput.ts +231 -0
  23. package/types/bricks/Video.ts +170 -0
  24. package/types/bricks/VideoStreaming.ts +107 -0
  25. package/types/bricks/WebRtcStream.ts +60 -0
  26. package/types/bricks/WebView.ts +157 -0
  27. package/types/bricks/index.ts +19 -0
  28. package/types/common.ts +8 -3
  29. package/types/data.ts +6 -0
  30. package/types/generators/AlarmClock.ts +102 -0
  31. package/types/generators/Assistant.ts +546 -0
  32. package/types/generators/BleCentral.ts +225 -0
  33. package/types/generators/BlePeripheral.ts +202 -0
  34. package/types/generators/CanvasMap.ts +57 -0
  35. package/types/generators/CastlesPay.ts +77 -0
  36. package/types/generators/DataBank.ts +123 -0
  37. package/types/generators/File.ts +351 -0
  38. package/types/generators/GraphQl.ts +124 -0
  39. package/types/generators/Http.ts +117 -0
  40. package/types/generators/HttpServer.ts +164 -0
  41. package/types/generators/Information.ts +97 -0
  42. package/types/generators/Intent.ts +107 -0
  43. package/types/generators/Iterator.ts +95 -0
  44. package/types/generators/Keyboard.ts +85 -0
  45. package/types/generators/LlmAnthropicCompat.ts +188 -0
  46. package/types/generators/LlmGgml.ts +719 -0
  47. package/types/generators/LlmOnnx.ts +184 -0
  48. package/types/generators/LlmOpenAiCompat.ts +206 -0
  49. package/types/generators/LlmQualcommAiEngine.ts +213 -0
  50. package/types/generators/Mcp.ts +294 -0
  51. package/types/generators/McpServer.ts +248 -0
  52. package/types/generators/MediaFlow.ts +142 -0
  53. package/types/generators/MqttBroker.ts +121 -0
  54. package/types/generators/MqttClient.ts +129 -0
  55. package/types/generators/Question.ts +395 -0
  56. package/types/generators/RealtimeTranscription.ts +180 -0
  57. package/types/generators/RerankerGgml.ts +153 -0
  58. package/types/generators/SerialPort.ts +141 -0
  59. package/types/generators/SoundPlayer.ts +86 -0
  60. package/types/generators/SoundRecorder.ts +113 -0
  61. package/types/generators/SpeechToTextGgml.ts +462 -0
  62. package/types/generators/SpeechToTextOnnx.ts +227 -0
  63. package/types/generators/SpeechToTextPlatform.ts +75 -0
  64. package/types/generators/SqLite.ts +118 -0
  65. package/types/generators/Step.ts +101 -0
  66. package/types/generators/Tcp.ts +120 -0
  67. package/types/generators/TcpServer.ts +137 -0
  68. package/types/generators/TextToSpeechGgml.ts +182 -0
  69. package/types/generators/TextToSpeechOnnx.ts +169 -0
  70. package/types/generators/TextToSpeechOpenAiLike.ts +113 -0
  71. package/types/generators/ThermalPrinter.ts +185 -0
  72. package/types/generators/Tick.ts +75 -0
  73. package/types/generators/Udp.ts +109 -0
  74. package/types/generators/VadGgml.ts +211 -0
  75. package/types/generators/VectorStore.ts +223 -0
  76. package/types/generators/Watchdog.ts +96 -0
  77. package/types/generators/WebCrawler.ts +97 -0
  78. package/types/generators/WebRtc.ts +165 -0
  79. package/types/generators/WebSocket.ts +142 -0
  80. package/types/generators/index.ts +50 -0
  81. package/types/system.ts +64 -0
  82. package/utils/data.ts +45 -0
  83. package/utils/event-props.ts +89 -13
  84. package/types/bricks.ts +0 -3168
  85. package/types/generators.ts +0 -7633
@@ -0,0 +1,462 @@
1
+ import type { SwitchCondInnerStateCurrentCanvas, SwitchCondData, SwitchDef } from '../switch'
2
+ import type { Data, DataLink } from '../data'
3
+ import type {
4
+ Generator,
5
+ EventAction,
6
+ ActionWithDataParams,
7
+ ActionWithParams,
8
+ Action,
9
+ EventProperty,
10
+ } from '../common'
11
+
12
+ /* Load the model */
13
+ export type GeneratorSpeechInferenceActionLoadModel = Action & {
14
+ __actionName: 'GENERATOR_SPEECH_INFERENCE_LOAD_MODEL'
15
+ }
16
+
17
+ /* Transcribe audio file. You can provide `File URL` property, if not provided, it will use the default `File URL` */
18
+ export type GeneratorSpeechInferenceActionTranscribeFile = ActionWithParams & {
19
+ __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_FILE'
20
+ params?: Array<
21
+ | {
22
+ input: 'fileUrl'
23
+ value?: string | DataLink | EventProperty
24
+ mapping?: string
25
+ }
26
+ | {
27
+ input: 'prompt'
28
+ value?: string | DataLink | EventProperty
29
+ mapping?: string
30
+ }
31
+ | {
32
+ input: 'beamSize'
33
+ value?: number | DataLink | EventProperty
34
+ mapping?: string
35
+ }
36
+ | {
37
+ input: 'language'
38
+ value?: string | DataLink | EventProperty
39
+ mapping?: string
40
+ }
41
+ | {
42
+ input: 'translate'
43
+ value?: boolean | DataLink | EventProperty
44
+ mapping?: string
45
+ }
46
+ >
47
+ }
48
+
49
+ /* Transcribe audio data. Currently only support base64 encoded audio data (16-bit PCM, mono, 16kHz) */
50
+ export type GeneratorSpeechInferenceActionTranscribeData = ActionWithParams & {
51
+ __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_DATA'
52
+ params?: Array<
53
+ | {
54
+ input: 'data'
55
+ value?: any | EventProperty
56
+ mapping?: string
57
+ }
58
+ | {
59
+ input: 'prompt'
60
+ value?: string | DataLink | EventProperty
61
+ mapping?: string
62
+ }
63
+ | {
64
+ input: 'beamSize'
65
+ value?: number | DataLink | EventProperty
66
+ mapping?: string
67
+ }
68
+ | {
69
+ input: 'language'
70
+ value?: string | DataLink | EventProperty
71
+ mapping?: string
72
+ }
73
+ | {
74
+ input: 'translate'
75
+ value?: boolean | DataLink | EventProperty
76
+ mapping?: string
77
+ }
78
+ >
79
+ }
80
+
81
+ /* [Deprecated] Transcribe microphone audio source */
82
+ export type GeneratorSpeechInferenceActionTranscribeRealtime = ActionWithParams & {
83
+ __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME'
84
+ params?: Array<
85
+ | {
86
+ input: 'prompt'
87
+ value?: string | DataLink | EventProperty
88
+ mapping?: string
89
+ }
90
+ | {
91
+ input: 'beamSize'
92
+ value?: number | DataLink | EventProperty
93
+ mapping?: string
94
+ }
95
+ | {
96
+ input: 'language'
97
+ value?: string | DataLink | EventProperty
98
+ mapping?: string
99
+ }
100
+ | {
101
+ input: 'translate'
102
+ value?: boolean | DataLink | EventProperty
103
+ mapping?: string
104
+ }
105
+ | {
106
+ input: 'realtimeAudioSeconds'
107
+ value?: number | DataLink | EventProperty
108
+ mapping?: string
109
+ }
110
+ | {
111
+ input: 'realtimeAudioSliceSeconds'
112
+ value?: number | DataLink | EventProperty
113
+ mapping?: string
114
+ }
115
+ | {
116
+ input: 'realtimeAudioMinSeconds'
117
+ value?: number | DataLink | EventProperty
118
+ mapping?: string
119
+ }
120
+ | {
121
+ input: 'realtimeSaveAudio'
122
+ value?: boolean | DataLink | EventProperty
123
+ mapping?: string
124
+ }
125
+ | {
126
+ input: 'realtimeVadEnabled'
127
+ value?: boolean | DataLink | EventProperty
128
+ mapping?: string
129
+ }
130
+ | {
131
+ input: 'realtimeVadMs'
132
+ value?: number | DataLink | EventProperty
133
+ mapping?: string
134
+ }
135
+ | {
136
+ input: 'realtimeVadThold'
137
+ value?: number | DataLink | EventProperty
138
+ mapping?: string
139
+ }
140
+ | {
141
+ input: 'realtimeVadFreqThold'
142
+ value?: number | DataLink | EventProperty
143
+ mapping?: string
144
+ }
145
+ >
146
+ }
147
+
148
+ /* [Deprecated] Stop transcribing microphone audio source */
149
+ export type GeneratorSpeechInferenceActionTranscribeRealtimeStop = Action & {
150
+ __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME_STOP'
151
+ }
152
+
153
+ /* Clear downloaded files (model, audio) & current jobs */
154
+ export type GeneratorSpeechInferenceActionClearDownload = Action & {
155
+ __actionName: 'GENERATOR_SPEECH_INFERENCE_CLEAR_DOWNLOAD'
156
+ }
157
+
158
+ /* Release context */
159
+ export type GeneratorSpeechInferenceActionReleaseContext = Action & {
160
+ __actionName: 'GENERATOR_SPEECH_INFERENCE_RELEASE_CONTEXT'
161
+ }
162
+
163
+ interface GeneratorSpeechInferenceDef {
164
+ /*
165
+ Default property:
166
+ {
167
+ "init": false,
168
+ "accelVariant": "default",
169
+ "modelName": "base-q8_0",
170
+ "modelUseCoreML": false,
171
+ "modelUseGPU": true,
172
+ "modelUseFlashAttn": false,
173
+ "inferLanguage": "Auto",
174
+ "inferRealtimeAudioSeconds": 30,
175
+ "inferRealtimeAudioSliceSeconds": 30,
176
+ "inferRealtimeSaveAudio": false,
177
+ "inferRealtimeVadEnabled": false,
178
+ "inferRealtimeVadMs": 2000,
179
+ "inferRealtimeVadThold": 0.6,
180
+ "inferRealtimeVadFreqThold": 100
181
+ }
182
+ */
183
+ property?: {
184
+ /* Initialize the Whisper context on generator initialization
185
+ Please note that it will take some RAM depending on the model size */
186
+ init?: boolean | DataLink
187
+ /* Accelerator variant (Only for desktop)
188
+ `default` - CPU / Metal (macOS)
189
+ `vulkan` - Use Vulkan
190
+ `cuda` - Use CUDA */
191
+ accelVariant?: 'default' | 'vulkan' | 'cuda' | DataLink
192
+ /* Use model name, the model download progress will be done in preload stage or the generator initialization stage.
193
+ We used `ggml` format model, please refer to https://huggingface.co/BricksDisplay/whisper-ggml
194
+ You can also choose `custom` option and set `Model URL` and `Model MD5` to use your own model */
195
+ modelName?:
196
+ | 'custom'
197
+ | 'tiny'
198
+ | 'tiny-q5_1'
199
+ | 'tiny-q8_0'
200
+ | 'tiny.en'
201
+ | 'tiny.en-q5_1'
202
+ | 'tiny.en-q8_0'
203
+ | 'base'
204
+ | 'base-q5_1'
205
+ | 'base-q8_0'
206
+ | 'base.en'
207
+ | 'base.en-q5_1'
208
+ | 'base.en-q8_0'
209
+ | 'small'
210
+ | 'small-q5_1'
211
+ | 'small-q8_0'
212
+ | 'small.en'
213
+ | 'small.en-q5_1'
214
+ | 'small.en-q8_0'
215
+ | 'medium'
216
+ | 'medium-q5_0'
217
+ | 'medium-q8_0'
218
+ | 'medium.en'
219
+ | 'medium.en-q5_1'
220
+ | 'medium.en-q8_0'
221
+ | 'large-v1'
222
+ | 'large-v2'
223
+ | 'large-v2-q5_0'
224
+ | 'large-v2-q8_0'
225
+ | 'large-v3'
226
+ | 'large-v3-q5_0'
227
+ | 'large-v3-q8_0'
228
+ | 'large-v3-turbo'
229
+ | 'large-v3-turbo-q5_0'
230
+ | 'large-v3-turbo-q8_0'
231
+ | 'small.en-tdrz'
232
+ | 'small.en-tdrz-q5_0'
233
+ | 'small.en-tdrz-q8_0'
234
+ | 'distil-small.en'
235
+ | 'distil-small.en-q5_0'
236
+ | 'distil-small.en-q8_0'
237
+ | 'distil-medium.en'
238
+ | 'distil-medium.en-q5_0'
239
+ | 'distil-medium.en-q8_0'
240
+ | 'distil-large-v3'
241
+ | 'distil-large-v3-q5_0'
242
+ | 'distil-large-v3-q8_0'
243
+ | DataLink
244
+ /* The URL or path of model
245
+ We used `ggml` format model, please refer to https://github.com/ggerganov/whisper.cpp/tree/master/models */
246
+ modelUrl?: string | DataLink
247
+ /* Hash type of model */
248
+ modelHashType?: 'md5' | 'sha256' | 'sha1' | DataLink
249
+ /* Hash of model */
250
+ modelHash?: string | DataLink
251
+ /* [Unstable] iOS: Use CoreML model for inference */
252
+ modelUseCoreML?: boolean | DataLink
253
+ /* Use GPU Acceleration for inference. Currently iOS only, if it's enabled, Core ML option will be ignored. */
254
+ modelUseGPU?: boolean | DataLink
255
+ /* Use Flash Attention for inference (Recommended with GPU enabled) */
256
+ modelUseFlashAttn?: boolean | DataLink
257
+ /* The language of the file to be inferred */
258
+ inferLanguage?:
259
+ | 'Auto'
260
+ | 'English (en)'
261
+ | 'Chinese (zh)'
262
+ | 'German (de)'
263
+ | 'Spanish (es)'
264
+ | 'Russian (ru)'
265
+ | 'Korean (ko)'
266
+ | 'French (fr)'
267
+ | 'Japanese (ja)'
268
+ | 'Portuguese (pt)'
269
+ | 'Turkish (tr)'
270
+ | 'Polish (pl)'
271
+ | 'Catalan (ca)'
272
+ | 'Dutch (nl)'
273
+ | 'Arabic (ar)'
274
+ | 'Swedish (sv)'
275
+ | 'Italian (it)'
276
+ | 'Indonesian (id)'
277
+ | 'Hindi (hi)'
278
+ | 'Finnish (fi)'
279
+ | 'Vietnamese (vi)'
280
+ | 'Hebrew (he)'
281
+ | 'Ukrainian (uk)'
282
+ | 'Greek (el)'
283
+ | 'Malay (ms)'
284
+ | 'Czech (cs)'
285
+ | 'Romanian (ro)'
286
+ | 'Danish (da)'
287
+ | 'Hungarian (hu)'
288
+ | 'Tamil (ta)'
289
+ | 'Norwegian (no)'
290
+ | 'Thai (th)'
291
+ | 'Urdu (ur)'
292
+ | 'Croatian (hr)'
293
+ | 'Bulgarian (bg)'
294
+ | 'Lithuanian (lt)'
295
+ | 'Latin (la)'
296
+ | 'Maori (mi)'
297
+ | 'Malayalam (ml)'
298
+ | 'Welsh (cy)'
299
+ | 'Slovak (sk)'
300
+ | 'Telugu (te)'
301
+ | 'Persian (fa)'
302
+ | 'Latvian (lv)'
303
+ | 'Bengali (bn)'
304
+ | 'Serbian (sr)'
305
+ | 'Azerbaijani (az)'
306
+ | 'Slovenian (sl)'
307
+ | 'Kannada (kn)'
308
+ | 'Estonian (et)'
309
+ | 'Macedonian (mk)'
310
+ | 'Breton (br)'
311
+ | 'Basque (eu)'
312
+ | 'Icelandic (is)'
313
+ | 'Armenian (hy)'
314
+ | 'Nepali (ne)'
315
+ | 'Mongolian (mn)'
316
+ | 'Bosnian (bs)'
317
+ | 'Kazakh (kk)'
318
+ | 'Albanian (sq)'
319
+ | 'Swahili (sw)'
320
+ | 'Galician (gl)'
321
+ | 'Marathi (mr)'
322
+ | 'Punjabi (pa)'
323
+ | 'Sinhala (si)'
324
+ | 'Khmer (km)'
325
+ | 'Shona (sn)'
326
+ | 'Yoruba (yo)'
327
+ | 'Somali (so)'
328
+ | 'Afrikaans (af)'
329
+ | 'Occitan (oc)'
330
+ | 'Georgian (ka)'
331
+ | 'Belarusian (be)'
332
+ | 'Tajik (tg)'
333
+ | 'Sindhi (sd)'
334
+ | 'Gujarati (gu)'
335
+ | 'Amharic (am)'
336
+ | 'Yiddish (yi)'
337
+ | 'Lao (lo)'
338
+ | 'Uzbek (uz)'
339
+ | 'Faroese (fo)'
340
+ | 'Haitian Creole (ht)'
341
+ | 'Pashto (ps)'
342
+ | 'Turkmen (tk)'
343
+ | 'Nynorsk (nn)'
344
+ | 'Maltese (mt)'
345
+ | 'Sanskrit (sa)'
346
+ | 'Luxembourgish (lb)'
347
+ | 'Myanmar (my)'
348
+ | 'Tibetan (bo)'
349
+ | 'Tagalog (tl)'
350
+ | 'Malagasy (mg)'
351
+ | 'Assamese (as)'
352
+ | 'Tatar (tt)'
353
+ | 'Hawaiian (haw)'
354
+ | 'Lingala (ln)'
355
+ | 'Hausa (ha)'
356
+ | 'Bashkir (ba)'
357
+ | 'Javanese (jw)'
358
+ | 'Sundanese (su)'
359
+ | DataLink
360
+ /* Initial prompt text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. */
361
+ inferPrompt?: string | DataLink
362
+ /* Beam size to use for beam search (enables beam search if set) */
363
+ inferBeamSize?: number | DataLink
364
+ /* Translate the result to English */
365
+ inferTranslate?: boolean | DataLink
366
+ /* Max threads to use for inference */
367
+ inferMaxThreads?: number | DataLink
368
+ /* Output token-level timestamps in details outlet */
369
+ inferTokenTimestamps?: boolean | DataLink
370
+ /* Speaker diarization (Please use small.en-tdrz model) */
371
+ inferTdrz?: boolean | DataLink
372
+ /* Maximum segment length in characters */
373
+ inferMaxLength?: number | DataLink
374
+ /* Audio time offset in milliseconds */
375
+ inferOffset?: number | DataLink
376
+ /* Audio duration of audio to process in milliseconds */
377
+ inferDuration?: number | DataLink
378
+ /* The file URL or path to be inferred.
379
+ It only supported `wav` format with 16kHz sample rate & single (mono) channel */
380
+ inferFileUrl?: string | DataLink
381
+ /* MD5 of file to be inferred */
382
+ inferFileMd5?: string | DataLink
383
+ /* Record duration in seconds. For performance, the value less than 30 seconds is recommended. */
384
+ inferRealtimeAudioSeconds?: number | DataLink
385
+ /* Optimize audio transcription performance by slicing audio samples when `Realtime Audio Seconds` > 30. */
386
+ inferRealtimeAudioSliceSeconds?: number | DataLink
387
+ /* Min duration of audio to start transcribe. Min: 0.5 seconds, Max: `Realtime Audio Slice Seconds`, Default: 1 second */
388
+ inferRealtimeAudioMinSeconds?: number | DataLink
389
+ /* Save recorded audio to file, the file path will be output to outlet. */
390
+ inferRealtimeSaveAudio?: boolean | DataLink
391
+ /* Start transcribe on recording when the audio volume is greater than the threshold by using VAD (Voice Activity Detection).
392
+ The first VAD will be triggered after 2 second of recording. */
393
+ inferRealtimeVadEnabled?: boolean | DataLink
394
+ /* The length of the collected audio is used for VAD. (ms) */
395
+ inferRealtimeVadMs?: number | DataLink
396
+ /* VAD threshold */
397
+ inferRealtimeVadThold?: number | DataLink
398
+ /* Frequency to apply High-pass filter in VAD */
399
+ inferRealtimeVadFreqThold?: number | DataLink
400
+ }
401
+ events?: {
402
+ /* Event triggered when context state changes */
403
+ onContextStateChange?: Array<EventAction>
404
+ /* Event triggered when error occurs */
405
+ onError?: Array<EventAction>
406
+ /* Event triggered when got transcribe result */
407
+ onTranscribed?: Array<EventAction>
408
+ /* Event triggered when transcribe realtime done */
409
+ onRealtimeStop?: Array<EventAction>
410
+ }
411
+ outlets?: {
412
+ /* Context state */
413
+ contextState?: () => Data
414
+ /* Context details */
415
+ contextDetails?: () => Data
416
+ /* Is transcribing */
417
+ isTranscribing?: () => Data
418
+ /* Progress of transcribe audio (0-100) */
419
+ transcribeProgress?: () => Data
420
+ /* Inference result */
421
+ transcribeResult?: () => Data
422
+ /* Inference result details */
423
+ transcribeDetails?: () => Data
424
+ /* Recorded audio file path of transcribe realtime (if `Save Audio` is enabled) */
425
+ recordedPath?: () => Data
426
+ }
427
+ }
428
+
429
+ /* Local Speech-to-Text (STT) inference based on GGML and [whisper.cpp](https://github.com/ggerganov/whisper.cpp)
430
+
431
+ ## Notice
432
+ - iOS: Supported GPU acceleration, recommended use M1+ / A17+ chip device
433
+ - macOS: Supported GPU acceleration, recommended use M1+ chip device
434
+ - Android: Currently not supported GPU acceleration (Coming soon), recommended use Android 13+ system
435
+ - Linux / Windows: Supported GPU acceleration, you can choose `vulkan` or `cuda` backend in Accel Variant property */
436
+ export type GeneratorSpeechInference = Generator &
437
+ GeneratorSpeechInferenceDef & {
438
+ templateKey: 'GENERATOR_SPEECH_INFERENCE'
439
+ switches: Array<
440
+ SwitchDef &
441
+ GeneratorSpeechInferenceDef & {
442
+ conds?: Array<{
443
+ method: '==' | '!=' | '>' | '<' | '>=' | '<='
444
+ cond:
445
+ | SwitchCondInnerStateCurrentCanvas
446
+ | SwitchCondData
447
+ | {
448
+ __typename: 'SwitchCondInnerStateOutlet'
449
+ outlet:
450
+ | 'contextState'
451
+ | 'contextDetails'
452
+ | 'isTranscribing'
453
+ | 'transcribeProgress'
454
+ | 'transcribeResult'
455
+ | 'transcribeDetails'
456
+ | 'recordedPath'
457
+ value: any
458
+ }
459
+ }>
460
+ }
461
+ >
462
+ }
@@ -0,0 +1,227 @@
1
+ import type { SwitchCondInnerStateCurrentCanvas, SwitchCondData, SwitchDef } from '../switch'
2
+ import type { Data, DataLink } from '../data'
3
+ import type {
4
+ Generator,
5
+ EventAction,
6
+ ActionWithDataParams,
7
+ ActionWithParams,
8
+ Action,
9
+ EventProperty,
10
+ } from '../common'
11
+
12
+ /* Load the model */
13
+ export type GeneratorOnnxSTTActionLoadModel = Action & {
14
+ __actionName: 'GENERATOR_ONNX_STT_LOAD_MODEL'
15
+ }
16
+
17
+ /* Inference */
18
+ export type GeneratorOnnxSTTActionInfer = ActionWithParams & {
19
+ __actionName: 'GENERATOR_ONNX_STT_INFER'
20
+ params?: Array<{
21
+ input: 'audioUri'
22
+ value?: string | DataLink | EventProperty
23
+ mapping?: string
24
+ }>
25
+ }
26
+
27
+ /* Clean cache */
28
+ export type GeneratorOnnxSTTActionCleanCache = Action & {
29
+ __actionName: 'GENERATOR_ONNX_STT_CLEAN_CACHE'
30
+ }
31
+
32
+ /* Release context */
33
+ export type GeneratorOnnxSTTActionReleaseContext = Action & {
34
+ __actionName: 'GENERATOR_ONNX_STT_RELEASE_CONTEXT'
35
+ }
36
+
37
+ interface GeneratorOnnxSTTDef {
38
+ /*
39
+ Default property:
40
+ {
41
+ "model": "whisper-base",
42
+ "modelType": "auto",
43
+ "returnTimestamps": "none",
44
+ "executionMode": "sequential"
45
+ }
46
+ */
47
+ property?: {
48
+ /* Initialize the TTS context on generator initialization */
49
+ init?: boolean | DataLink
50
+ /* STT model */
51
+ model?: string | DataLink
52
+ /* Model type */
53
+ modelType?: string | DataLink
54
+ /* Quantize type */
55
+ quantizeType?:
56
+ | 'auto'
57
+ | 'none'
58
+ | 'fp16'
59
+ | 'q8'
60
+ | 'int8'
61
+ | 'uint8'
62
+ | 'q4'
63
+ | 'bnb4'
64
+ | 'q4f16'
65
+ | DataLink
66
+ /* Return timestamps */
67
+ returnTimestamps?: 'none' | 'enable' | 'word' | DataLink
68
+ /* Transcription language
69
+ Not specifying the language will auto detect the language. */
70
+ language?:
71
+ | 'English'
72
+ | 'Chinese'
73
+ | 'German'
74
+ | 'Spanish'
75
+ | 'Russian'
76
+ | 'Korean'
77
+ | 'French'
78
+ | 'Japanese'
79
+ | 'Portuguese'
80
+ | 'Turkish'
81
+ | 'Polish'
82
+ | 'Catalan'
83
+ | 'Dutch'
84
+ | 'Arabic'
85
+ | 'Swedish'
86
+ | 'Italian'
87
+ | 'Indonesian'
88
+ | 'Hindi'
89
+ | 'Finnish'
90
+ | 'Vietnamese'
91
+ | 'Hebrew'
92
+ | 'Ukrainian'
93
+ | 'Greek'
94
+ | 'Malay'
95
+ | 'Czech'
96
+ | 'Romanian'
97
+ | 'Danish'
98
+ | 'Hungarian'
99
+ | 'Tamil'
100
+ | 'Norwegian'
101
+ | 'Thai'
102
+ | 'Urdu'
103
+ | 'Croatian'
104
+ | 'Bulgarian'
105
+ | 'Lithuanian'
106
+ | 'Latin'
107
+ | 'Maori'
108
+ | 'Malayalam'
109
+ | 'Welsh'
110
+ | 'Slovak'
111
+ | 'Telugu'
112
+ | 'Persian'
113
+ | 'Latvian'
114
+ | 'Bengali'
115
+ | 'Serbian'
116
+ | 'Azerbaijani'
117
+ | 'Slovenian'
118
+ | 'Kannada'
119
+ | 'Estonian'
120
+ | 'Macedonian'
121
+ | 'Breton'
122
+ | 'Basque'
123
+ | 'Icelandic'
124
+ | 'Armenian'
125
+ | 'Nepali'
126
+ | 'Mongolian'
127
+ | 'Bosnian'
128
+ | 'Kazakh'
129
+ | 'Albanian'
130
+ | 'Swahili'
131
+ | 'Galician'
132
+ | 'Marathi'
133
+ | 'Punjabi'
134
+ | 'Sinhala'
135
+ | 'Khmer'
136
+ | 'Shona'
137
+ | 'Yoruba'
138
+ | 'Somali'
139
+ | 'Afrikaans'
140
+ | 'Occitan'
141
+ | 'Georgian'
142
+ | 'Belarusian'
143
+ | 'Tajik'
144
+ | 'Sindhi'
145
+ | 'Gujarati'
146
+ | 'Amharic'
147
+ | 'Yiddish'
148
+ | 'Lao'
149
+ | 'Uzbek'
150
+ | 'Faroese'
151
+ | 'Haitian Creole'
152
+ | 'Pashto'
153
+ | 'Turkmen'
154
+ | 'Nynorsk'
155
+ | 'Maltese'
156
+ | 'Sanskrit'
157
+ | 'Luxembourgish'
158
+ | 'Myanmar'
159
+ | 'Tibetan'
160
+ | 'Tagalog'
161
+ | 'Malagasy'
162
+ | 'Assamese'
163
+ | 'Tatar'
164
+ | 'Hawaiian '
165
+ | 'Lingala'
166
+ | 'Hausa'
167
+ | 'Bashkir'
168
+ | 'Javanese'
169
+ | 'Sundanese'
170
+ | DataLink
171
+ /* Task */
172
+ task?: 'transcribe' | 'translate' | DataLink
173
+ /* Inferencing chunk length */
174
+ chunkLength?: number | DataLink
175
+ /* Executor candidates, descending order of priority
176
+ Default will be xnnpack, wasm, cpu */
177
+ executors?:
178
+ | Array<'qnn' | 'dml' | 'nnapi' | 'xnnpack' | 'coreml' | 'cpu' | 'wasm' | 'webgpu' | DataLink>
179
+ | DataLink
180
+ /* Execution mode
181
+ Usually when the model has many branches, setting this option to `parallel` will give you better performance. */
182
+ executionMode?: 'sequential' | 'parallel' | DataLink
183
+ /* QNN backend */
184
+ qnnBackend?: 'HTP' | 'HTA' | 'DSP' | 'GPU' | 'CPU' | DataLink
185
+ /* Enable FP16 for QNN HTP */
186
+ qnnHtpEnableFp16?: boolean | DataLink
187
+ /* Enable QNN debug */
188
+ qnnEnableDebug?: boolean | DataLink
189
+ }
190
+ events?: {
191
+ /* Event triggered when state change */
192
+ onContextStateChange?: Array<EventAction>
193
+ /* Event triggered when error occurs */
194
+ onError?: Array<EventAction>
195
+ }
196
+ outlets?: {
197
+ /* Context state */
198
+ contextState?: () => Data
199
+ /* Inference result */
200
+ result?: () => Data
201
+ /* Inference result detail */
202
+ resultDetail?: () => Data
203
+ }
204
+ }
205
+
206
+ /* Local STT inference based on [transformers.js](https://huggingface.co/docs/transformers.js)
207
+ You can use any converted model on HuggingFace. */
208
+ export type GeneratorOnnxSTT = Generator &
209
+ GeneratorOnnxSTTDef & {
210
+ templateKey: 'GENERATOR_ONNX_STT'
211
+ switches: Array<
212
+ SwitchDef &
213
+ GeneratorOnnxSTTDef & {
214
+ conds?: Array<{
215
+ method: '==' | '!=' | '>' | '<' | '>=' | '<='
216
+ cond:
217
+ | SwitchCondInnerStateCurrentCanvas
218
+ | SwitchCondData
219
+ | {
220
+ __typename: 'SwitchCondInnerStateOutlet'
221
+ outlet: 'contextState' | 'result' | 'resultDetail'
222
+ value: any
223
+ }
224
+ }>
225
+ }
226
+ >
227
+ }