@fugood/bricks-project 2.21.8 → 2.21.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -100,6 +100,9 @@ export const templateActionNameMap = {
100
100
  payloadType: 'CHANNEL_PUBLISH_PAYLOAD_TYPE',
101
101
  payload: 'CHANNEL_PUBLISH_PAYLOAD',
102
102
  },
103
+ DELAY: {
104
+ time: 'DELAY_TIME',
105
+ },
103
106
  USE_SHARE_APPLICATION: {
104
107
  applicationId: 'APPLICATION_ID',
105
108
  releaseVersion: 'RELEASE_VERSION',
@@ -142,6 +145,13 @@ export const templateActionNameMap = {
142
145
  },
143
146
  },
144
147
 
148
+ BRICK_VIDEO: {
149
+ BRICK_VIDEO_SEEK: {
150
+ seekTime: 'BRICK_VIDEO_SEEK_TIME',
151
+ play: 'BRICK_VIDEO_PLAY',
152
+ },
153
+ },
154
+
145
155
  BRICK_SLIDESHOW: {
146
156
  BRICK_SLIDESHOW_JUMP_TO_INDEX: {
147
157
  index: 'BRICK_SLIDESHOW_INDEX',
@@ -231,7 +241,12 @@ export const templateActionNameMap = {
231
241
  index: 'BRICK_ITEMS_INDEX',
232
242
  },
233
243
  },
234
-
244
+ BRICK_LOTTIE: {
245
+ BRICK_LOTTIE_PLAY: {
246
+ startFrame: 'BRICK_LOTTIE_START_FRAME',
247
+ endFrame: 'BRICK_LOTTIE_END_FRAME',
248
+ },
249
+ },
235
250
  BRICK_RIVE: {
236
251
  BRICK_RIVE_PLAY: {
237
252
  animationName: 'BRICK_RIVE_ANIMATION_NAME',
@@ -595,6 +610,27 @@ export const templateActionNameMap = {
595
610
  realtimeVadFreqThold: 'GENERATOR_SPEECH_INFERENCE_REALTIME_VAD_FREQ_THOLD',
596
611
  },
597
612
  },
613
+ GENERATOR_VAD_INFERENCE: {
614
+ GENERATOR_VAD_INFERENCE_DETECT_FILE: {
615
+ fileUrl: 'GENERATOR_VAD_INFERENCE_FILE_URL',
616
+ threshold: 'GENERATOR_VAD_INFERENCE_THRESHOLD',
617
+ minSpeechDurationMs: 'GENERATOR_VAD_INFERENCE_MIN_SPEECH_DURATION_MS',
618
+ minSilenceDurationMs: 'GENERATOR_VAD_INFERENCE_MIN_SILENCE_DURATION_MS',
619
+ maxSpeechDurationS: 'GENERATOR_VAD_INFERENCE_MAX_SPEECH_DURATION_S',
620
+ speechPadMs: 'GENERATOR_VAD_INFERENCE_SPEECH_PAD_MS',
621
+ samplesOverlap: 'GENERATOR_VAD_INFERENCE_SAMPLES_OVERLAP',
622
+ },
623
+ GENERATOR_VAD_INFERENCE_DETECT_DATA: {
624
+ data: 'GENERATOR_VAD_INFERENCE_DATA',
625
+ threshold: 'GENERATOR_VAD_INFERENCE_THRESHOLD',
626
+ minSpeechDurationMs: 'GENERATOR_VAD_INFERENCE_MIN_SPEECH_DURATION_MS',
627
+ minSilenceDurationMs: 'GENERATOR_VAD_INFERENCE_MIN_SILENCE_DURATION_MS',
628
+ maxSpeechDurationS: 'GENERATOR_VAD_INFERENCE_MAX_SPEECH_DURATION_S',
629
+ speechPadMs: 'GENERATOR_VAD_INFERENCE_SPEECH_PAD_MS',
630
+ samplesOverlap: 'GENERATOR_VAD_INFERENCE_SAMPLES_OVERLAP',
631
+ },
632
+ },
633
+
598
634
  GENERATOR_LLM: {
599
635
  GENERATOR_LLM_TOKENIZE: {
600
636
  mode: 'GENERATOR_LLM_MODE',
@@ -612,6 +648,7 @@ export const templateActionNameMap = {
612
648
  tools: 'GENERATOR_LLM_TOOLS',
613
649
  parallelToolCalls: 'GENERATOR_LLM_PARALLEL_TOOL_CALLS',
614
650
  toolChoice: 'GENERATOR_LLM_TOOL_CHOICE',
651
+ enableThinking: 'GENERATOR_LLM_ENABLE_THINKING',
615
652
  prompt: 'GENERATOR_LLM_PROMPT',
616
653
  promptMediaPaths: 'GENERATOR_LLM_PROMPT_MEDIA_PATHS',
617
654
  promptTemplateData: 'GENERATOR_LLM_PROMPT_TEMPLATE_DATA',
@@ -625,6 +662,7 @@ export const templateActionNameMap = {
625
662
  tools: 'GENERATOR_LLM_TOOLS',
626
663
  parallelToolCalls: 'GENERATOR_LLM_PARALLEL_TOOL_CALLS',
627
664
  toolChoice: 'GENERATOR_LLM_TOOL_CHOICE',
665
+ enableThinking: 'GENERATOR_LLM_ENABLE_THINKING',
628
666
  prompt: 'GENERATOR_LLM_PROMPT',
629
667
  promptMediaPaths: 'GENERATOR_LLM_PROMPT_MEDIA_PATHS',
630
668
  promptTemplateData: 'GENERATOR_LLM_PROMPT_TEMPLATE_DATA',
@@ -663,6 +701,17 @@ export const templateActionNameMap = {
663
701
  sessionCustomKey: 'GENERATOR_LLM_SESSION_CUSTOM_KEY',
664
702
  },
665
703
  },
704
+ GENERATOR_GGML_TTS: {
705
+ GENERATOR_GGML_TTS_GENERATE: {
706
+ text: 'GENERATOR_GGML_TTS_TEXT',
707
+ },
708
+ },
709
+ GENERATOR_RERANKER: {
710
+ GENERATOR_RERANKER_RERANK: {
711
+ query: 'GENERATOR_RERANKER_QUERY',
712
+ documents: 'GENERATOR_RERANKER_DOCUMENTS',
713
+ },
714
+ },
666
715
  GENERATOR_QNN_LLM: {
667
716
  GENERATOR_QNN_LLM_GENERATE: {
668
717
  prompt: 'GENERATOR_QNN_LLM_PROMPT',
package/package.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "@fugood/bricks-project",
3
- "version": "2.21.8",
3
+ "version": "2.21.10",
4
4
  "main": "index.ts",
5
5
  "scripts": {
6
6
  "build": "node scripts/build.js"
7
7
  },
8
8
  "dependencies": {
9
- "@modelcontextprotocol/sdk": "^1.11.1",
9
+ "@modelcontextprotocol/sdk": "^1.15.0",
10
10
  "@types/escodegen": "^0.0.10",
11
11
  "@types/lodash": "^4.17.12",
12
12
  "acorn": "^8.13.0",
@@ -1,15 +1,7 @@
1
1
  import { $ } from 'bun'
2
2
  import { stat, readFile, writeFile } from 'fs/promises'
3
- const cwd = process.cwd()
4
-
5
- const libFiles = ['types', 'utils', 'index.ts']
6
3
 
7
- await $`mkdir -p ${cwd}/project`
8
- for (const file of libFiles) {
9
- await $`cp -r ${__dirname}/../${file} ${cwd}/project`
10
- }
11
-
12
- console.log('Copied files to project/')
4
+ const cwd = process.cwd()
13
5
 
14
6
  async function exists(f: string) {
15
7
  try {
@@ -20,6 +12,21 @@ async function exists(f: string) {
20
12
  }
21
13
  }
22
14
 
15
+ // handle flag --skip-copy
16
+ const skipCopyProject = process.argv.includes('--skip-copy-project')
17
+ if (skipCopyProject) {
18
+ console.log('Skipping copy of files to project/')
19
+ } else {
20
+
21
+ const libFiles = ['types', 'utils', 'index.ts']
22
+
23
+ await $`mkdir -p ${cwd}/project`
24
+ for (const file of libFiles) {
25
+ await $`cp -r ${__dirname}/../${file} ${cwd}/project`
26
+ }
27
+ console.log('Copied files to project/')
28
+ }
29
+
23
30
  const projectMcpServer = {
24
31
  command: 'bun',
25
32
  args: [`${cwd}/node_modules/@fugood/bricks-project/tools/mcp-server.ts`],
package/types/bricks.ts CHANGED
@@ -865,6 +865,43 @@ export type BrickIcon = Brick &
865
865
  >
866
866
  }
867
867
 
868
+ /* Play the video */
869
+ export type BrickVideoActionPlay = Action & {
870
+ __actionName: 'BRICK_VIDEO_PLAY'
871
+ }
872
+
873
+ /* Seek the video */
874
+ export type BrickVideoActionSeek = ActionWithParams & {
875
+ __actionName: 'BRICK_VIDEO_SEEK'
876
+ params?: Array<
877
+ | {
878
+ input: 'seekTime'
879
+ value?: number | DataLink | EventProperty
880
+ mapping?: string
881
+ }
882
+ | {
883
+ input: 'play'
884
+ value?: boolean | DataLink | EventProperty
885
+ mapping?: string
886
+ }
887
+ >
888
+ }
889
+
890
+ /* Pause the video */
891
+ export type BrickVideoActionPause = Action & {
892
+ __actionName: 'BRICK_VIDEO_PAUSE'
893
+ }
894
+
895
+ /* Replay the video */
896
+ export type BrickVideoActionReplay = Action & {
897
+ __actionName: 'BRICK_VIDEO_REPLAY'
898
+ }
899
+
900
+ /* Stop the video */
901
+ export type BrickVideoActionStop = Action & {
902
+ __actionName: 'BRICK_VIDEO_STOP'
903
+ }
904
+
868
905
  interface BrickVideoDef {
869
906
  /*
870
907
  Default property:
@@ -2108,6 +2145,43 @@ export type BrickItems = Brick &
2108
2145
  >
2109
2146
  }
2110
2147
 
2148
+ /* Play animation */
2149
+ export type BrickLottieActionPlay = ActionWithParams & {
2150
+ __actionName: 'BRICK_LOTTIE_PLAY'
2151
+ params?: Array<
2152
+ | {
2153
+ input: 'startFrame'
2154
+ value?: number | DataLink | EventProperty
2155
+ mapping?: string
2156
+ }
2157
+ | {
2158
+ input: 'endFrame'
2159
+ value?: number | DataLink | EventProperty
2160
+ mapping?: string
2161
+ }
2162
+ >
2163
+ }
2164
+
2165
+ /* Pause animation */
2166
+ export type BrickLottieActionPause = Action & {
2167
+ __actionName: 'BRICK_LOTTIE_PAUSE'
2168
+ }
2169
+
2170
+ /* Resume animation */
2171
+ export type BrickLottieActionResume = Action & {
2172
+ __actionName: 'BRICK_LOTTIE_RESUME'
2173
+ }
2174
+
2175
+ /* Stop animation */
2176
+ export type BrickLottieActionStop = Action & {
2177
+ __actionName: 'BRICK_LOTTIE_STOP'
2178
+ }
2179
+
2180
+ /* Reset animation */
2181
+ export type BrickLottieActionReset = Action & {
2182
+ __actionName: 'BRICK_LOTTIE_RESET'
2183
+ }
2184
+
2111
2185
  interface BrickLottieDef {
2112
2186
  /*
2113
2187
  Default property:
@@ -2990,6 +3064,10 @@ Default property:
2990
3064
  type?: 'image' | 'video' | DataLink
2991
3065
  /* Default image to display when no generated image is available */
2992
3066
  defaultImage?: string | DataLink
3067
+ /* The hash of the default image */
3068
+ defaultImageHash?: string | DataLink
3069
+ /* The type of the default image hash */
3070
+ defaultImageHashType?: 'md5' | 'sha1' | 'sha256' | DataLink
2993
3071
  /* The Lottie animation to show while generating */
2994
3072
  loadingAnimation?: string | DataLink
2995
3073
  /* The Lottie animation to show when an error occurs */
package/types/common.ts CHANGED
@@ -39,7 +39,7 @@ export type SubpsaceAction = string
39
39
  export type Action = {
40
40
  __actionName: string
41
41
  parent: 'Brick' | 'Generator' | 'Subspace' | 'System'
42
- name: string
42
+ name?: string
43
43
  }
44
44
 
45
45
  // Find correct key in bricks-project/utils/event-props for EventAction
@@ -65,7 +65,7 @@ export type ItemBrickID = string
65
65
 
66
66
  export type EventAction = {
67
67
  handler: 'system' | (() => Brick | Generator) | SubspaceID | ItemBrickID
68
- action: Action
68
+ action: ActionWithParams | ActionWithDataParams
69
69
  waitAsync?: boolean
70
70
  }
71
71
 
@@ -182,8 +182,8 @@ export type GeneratorFileActionReadContent = ActionWithParams & {
182
182
  }
183
183
 
184
184
  /* Delete */
185
- export type GeneratorFileActionGeneratorDeleteFile = Action & {
186
- __actionName: 'GENERATOR_DELETE_FILE'
185
+ export type GeneratorFileActionDelete = Action & {
186
+ __actionName: 'GENERATOR_FILE_DELETE'
187
187
  }
188
188
 
189
189
  /* Append (Currently only support text file) */
@@ -4848,8 +4848,13 @@ Default property:
4848
4848
  "model": "BricksDisplay/vits-eng",
4849
4849
  "modelType": "auto",
4850
4850
  "vocoderModel": "speecht5_hifigan",
4851
+ "maxLength": 4096,
4852
+ "temperature": 0.1,
4853
+ "repetitionPenalty": 1.1,
4854
+ "doSample": true,
4851
4855
  "outputType": "play",
4852
4856
  "cacheGenerated": true,
4857
+ "speed": 1,
4853
4858
  "autoInferEnable": false,
4854
4859
  "softBreakRegex": "^[^\\r\\n\\t\\f\\v]*([\\r\\n]+|[。!?!?.]\\B)",
4855
4860
  "hardBreakTime": 500,
@@ -4861,29 +4866,9 @@ Default property:
4861
4866
  init?: boolean | DataLink
4862
4867
  /* TTS model
4863
4868
  The mms-tts models are licensed under CC-BY-NC-4.0 */
4864
- model?:
4865
- | 'Custom'
4866
- | 'BricksDisplay/vits-eng'
4867
- | 'BricksDisplay/vits-cmn'
4868
- | 'BricksDisplay/ellie-Bert-VITS2'
4869
- | 'mms-tts-ara (NC)'
4870
- | 'mms-tts-deu (NC)'
4871
- | 'mms-tts-eng (NC)'
4872
- | 'mms-tts-fra (NC)'
4873
- | 'mms-tts-hin (NC)'
4874
- | 'mms-tts-kor (NC)'
4875
- | 'mms-tts-por (NC)'
4876
- | 'mms-tts-ron (NC)'
4877
- | 'mms-tts-rus (NC)'
4878
- | 'mms-tts-spa (NC)'
4879
- | 'mms-tts-vie (NC)'
4880
- | 'mms-tts-yor (NC)'
4881
- | 'speecht5_tts'
4882
- | DataLink
4869
+ model?: string | DataLink
4883
4870
  /* Model type */
4884
- modelType?: 'auto' | 'vits' | 'bert_vits2' | 'speecht5' | DataLink
4885
- /* Load quantized model (deprecated, use `quantizeType` instead) */
4886
- quantized?: boolean | DataLink
4871
+ modelType?: string | DataLink
4887
4872
  /* Quantize type */
4888
4873
  quantizeType?:
4889
4874
  | 'auto'
@@ -4896,22 +4881,33 @@ Default property:
4896
4881
  | 'bnb4'
4897
4882
  | 'q4f16'
4898
4883
  | DataLink
4899
- /* Custom model name
4900
- Choose model from https://huggingface.co/models?pipeline_tag=text-to-audio&library=transformers.js */
4901
- customModel?: string | DataLink
4902
4884
  /* Vocoder model for SpeechT5 */
4903
4885
  vocoderModel?: 'Custom' | 'speecht5_hifigan' | DataLink
4904
4886
  /* Custom vocoder model
4905
4887
  Choose model from https://huggingface.co/models?library=transformers.js&other=hifigan */
4906
4888
  customVocoderModel?: string | DataLink
4907
- /* XVector speaker embedding for HiFi-GAN */
4889
+ /* Speaker embedding, for SpeechT5 or StyleTTS (Kokoro) */
4908
4890
  speakerEmbedUrl?: string | DataLink
4909
- /* MD5 checksum of `speakerEmbedUrl` */
4910
- speakerEmbedMd5?: string | DataLink
4891
+ /* Hash of `speakerEmbedUrl` */
4892
+ speakerEmbedHash?: string | DataLink
4893
+ /* Hash type of `speakerEmbedUrl` */
4894
+ speakerEmbedHashType?: 'md5' | 'sha256' | 'sha1' | DataLink
4895
+ /* Speaker config, for OuteTTS model */
4896
+ speakerConfig?: {} | DataLink
4897
+ /* Audio token generation max length */
4898
+ maxLength?: number | DataLink
4899
+ /* Audio token generation temperature */
4900
+ temperature?: number | DataLink
4901
+ /* Audio token generation repetition penalty */
4902
+ repetitionPenalty?: number | DataLink
4903
+ /* Use greedy sampling for audio token generation */
4904
+ doSample?: boolean | DataLink
4911
4905
  /* Output mode */
4912
4906
  outputType?: 'play' | 'file' | DataLink
4913
4907
  /* Enable cache for generated audio */
4914
4908
  cacheGenerated?: boolean | DataLink
4909
+ /* Speed of the generated audio, for StyleTTS (Kokoro) */
4910
+ speed?: number | DataLink
4915
4911
  /* Text to generate */
4916
4912
  prompt?: string | DataLink
4917
4913
  /* Auto inference when prompt changes */
@@ -5025,7 +5021,6 @@ interface GeneratorOnnxLLMDef {
5025
5021
  /*
5026
5022
  Default property:
5027
5023
  {
5028
- "model": "BricksDisplay/phi-1_5-q4",
5029
5024
  "modelType": "auto",
5030
5025
  "toolCallParser": "llama3_json",
5031
5026
  "toolChoice": "auto",
@@ -5044,70 +5039,9 @@ Default property:
5044
5039
  /* Initialize the TTS context on generator initialization */
5045
5040
  init?: boolean | DataLink
5046
5041
  /* LLM model */
5047
- model?:
5048
- | 'Custom'
5049
- | 'onnx-community/gemma-3-1b-it-ONNX'
5050
- | 'BricksDisplay/phi-1_5'
5051
- | 'BricksDisplay/phi-1_5-q4'
5052
- | 'onnx-community/Phi-3.5-vision-instruct'
5053
- | 'onnx-community/Phi-3-vision-128k-instruct'
5054
- | 'onnx-community/Phi-4-mini-instruct-ONNX-MHA'
5055
- | 'onnx-community/Qwen2.5-0.5B'
5056
- | 'onnx-community/Qwen2.5-0.5B-Instruct'
5057
- | 'onnx-community/Qwen2.5-1.5B'
5058
- | 'onnx-community/Qwen2.5-1.5B-Instruct'
5059
- | 'onnx-community/Qwen2-VL-2B-Instruct'
5060
- | 'stablelm-2-1_6b'
5061
- | 'BricksDisplay/stablelm-2-1_6b-q4'
5062
- | 'stablelm-2-zephyr-1_6b'
5063
- | 'BricksDisplay/stablelm-2-zephyr-1_6b-q4'
5064
- | 'BricksDisplay/Llama-2-7b-chat-q4'
5065
- | 'TinyLLama-v0'
5066
- | 'TinyLlama-1.1B-Chat-v1.0'
5067
- | 'BricksDisplay/TinyLlama-1.1B-Chat-v1.0-q4'
5068
- | 'llama-160m'
5069
- | 'llama-68m'
5070
- | 'BricksDisplay/Yi-6B-q4'
5071
- | 'BricksDisplay/Yi-6B-Chat-q4'
5072
- | 'BricksDisplay/Mistral-7B-v0.1-q4'
5073
- | 'BricksDisplay/Mistral-7B-Instruct-v0.2-q4'
5074
- | 'BricksDisplay/Breeze-7B-Base-v1_0-q4'
5075
- | 'BricksDisplay/Breeze-7B-Instruct-v1_0-q4'
5076
- | 'gpt2'
5077
- | 'distilgpt2'
5078
- | 'gpt-neo-125M'
5079
- | 'opt-125m'
5080
- | 'opt-350m'
5081
- | 'bloom-560m'
5082
- | 'bloomz-560m'
5083
- | 't5-small'
5084
- | 't5-base'
5085
- | 'flan-t5-small'
5086
- | 'flan-t5-base'
5087
- | 'mt5-small'
5088
- | 'mt5-base'
5089
- | 'long-t5-lobal-base'
5090
- | 'long-t5-tglobal-base'
5091
- | DataLink
5042
+ model?: string | DataLink
5092
5043
  /* Model type */
5093
- modelType?:
5094
- | 'auto'
5095
- | 'text-generation'
5096
- | 'qwen2-vl'
5097
- | 'paligemma'
5098
- | 'llava'
5099
- | 'llava_onevision'
5100
- | 'moondream1'
5101
- | 'florence2'
5102
- | 'idefics3'
5103
- | 'smolvlm'
5104
- | 'phi3_v'
5105
- | 't5'
5106
- | 'mt5'
5107
- | 'longt5'
5108
- | DataLink
5109
- /* Load quantized model (deprecated, use `quantizeType` instead) */
5110
- quantized?: boolean | DataLink
5044
+ modelType?: string | DataLink
5111
5045
  /* Quantize type */
5112
5046
  quantizeType?:
5113
5047
  | 'auto'
@@ -5120,10 +5054,6 @@ Default property:
5120
5054
  | 'bnb4'
5121
5055
  | 'q4f16'
5122
5056
  | DataLink
5123
- /* Custom model name
5124
- Choose model from https://huggingface.co/models?pipeline_tag=text2text-generation&library=transformers.js
5125
- or https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending */
5126
- customModel?: string | DataLink
5127
5057
  /* Prompt to inference */
5128
5058
  prompt?: string | DataLink
5129
5059
  /* Messages to inference */
@@ -5253,27 +5183,9 @@ Default property:
5253
5183
  /* Initialize the TTS context on generator initialization */
5254
5184
  init?: boolean | DataLink
5255
5185
  /* STT model */
5256
- model?:
5257
- | 'Custom'
5258
- | 'whisper-tiny'
5259
- | 'whisper-tiny.en'
5260
- | 'whisper-small'
5261
- | 'whisper-small.en'
5262
- | 'whisper-base'
5263
- | 'whisper-base.en'
5264
- | 'whisper-medium'
5265
- | 'whisper-medium.en'
5266
- | 'whisper-large'
5267
- | 'whisper-large-v2'
5268
- | 'whisper-large-v3'
5269
- | 'mms-1b-all'
5270
- | 'mms-1b-fl102'
5271
- | 'mms-1b-l1107'
5272
- | DataLink
5186
+ model?: string | DataLink
5273
5187
  /* Model type */
5274
- modelType?: 'auto' | 'whisper' | 'hubert' | 'wav2vec2' | 'wav2vec2-bert' | DataLink
5275
- /* Load quantized model (deprecated, use `quantizeType` instead) */
5276
- quantized?: boolean | DataLink
5188
+ modelType?: string | DataLink
5277
5189
  /* Quantize type */
5278
5190
  quantizeType?:
5279
5191
  | 'auto'
@@ -5286,9 +5198,6 @@ Default property:
5286
5198
  | 'bnb4'
5287
5199
  | 'q4f16'
5288
5200
  | DataLink
5289
- /* Custom model name
5290
- Choose model from https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=transformers.js */
5291
- customModel?: string | DataLink
5292
5201
  /* Return timestamps */
5293
5202
  returnTimestamps?: 'none' | 'enable' | 'word' | DataLink
5294
5203
  /* Transcription language
@@ -5521,7 +5430,7 @@ export type GeneratorSpeechInferenceActionTranscribeData = ActionWithParams & {
5521
5430
  >
5522
5431
  }
5523
5432
 
5524
- /* Transcribe microphone audio source */
5433
+ /* [Deprecated] Transcribe microphone audio source */
5525
5434
  export type GeneratorSpeechInferenceActionTranscribeRealtime = ActionWithParams & {
5526
5435
  __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME'
5527
5436
  params?: Array<
@@ -5588,7 +5497,7 @@ export type GeneratorSpeechInferenceActionTranscribeRealtime = ActionWithParams
5588
5497
  >
5589
5498
  }
5590
5499
 
5591
- /* Stop transcribing microphone audio source */
5500
+ /* [Deprecated] Stop transcribing microphone audio source */
5592
5501
  export type GeneratorSpeechInferenceActionTranscribeRealtimeStop = Action & {
5593
5502
  __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME_STOP'
5594
5503
  }
@@ -5836,7 +5745,7 @@ Default property:
5836
5745
  inferRealtimeVadFreqThold?: number | DataLink
5837
5746
  }
5838
5747
  events?: {
5839
- /* Event triggered when load is done */
5748
+ /* Event triggered when context state changes */
5840
5749
  onContextStateChange?: Array<EventAction>
5841
5750
  /* Event triggered when error occurs */
5842
5751
  onError?: Array<EventAction>
@@ -5892,6 +5801,369 @@ export type GeneratorSpeechInference = Generator &
5892
5801
  >
5893
5802
  }
5894
5803
 
5804
+ /* Load the model */
5805
+ export type GeneratorVadInferenceActionLoadModel = Action & {
5806
+ __actionName: 'GENERATOR_VAD_INFERENCE_LOAD_MODEL'
5807
+ }
5808
+
5809
+ /* Detect speech in audio file. You can provide `File URL` property, if not provided, it will use the default `File URL` */
5810
+ export type GeneratorVadInferenceActionDetectFile = ActionWithParams & {
5811
+ __actionName: 'GENERATOR_VAD_INFERENCE_DETECT_FILE'
5812
+ params?: Array<
5813
+ | {
5814
+ input: 'fileUrl'
5815
+ value?: string | DataLink | EventProperty
5816
+ mapping?: string
5817
+ }
5818
+ | {
5819
+ input: 'threshold'
5820
+ value?: number | DataLink | EventProperty
5821
+ mapping?: string
5822
+ }
5823
+ | {
5824
+ input: 'minSpeechDurationMs'
5825
+ value?: number | DataLink | EventProperty
5826
+ mapping?: string
5827
+ }
5828
+ | {
5829
+ input: 'minSilenceDurationMs'
5830
+ value?: number | DataLink | EventProperty
5831
+ mapping?: string
5832
+ }
5833
+ | {
5834
+ input: 'maxSpeechDurationS'
5835
+ value?: number | DataLink | EventProperty
5836
+ mapping?: string
5837
+ }
5838
+ | {
5839
+ input: 'speechPadMs'
5840
+ value?: number | DataLink | EventProperty
5841
+ mapping?: string
5842
+ }
5843
+ | {
5844
+ input: 'samplesOverlap'
5845
+ value?: number | DataLink | EventProperty
5846
+ mapping?: string
5847
+ }
5848
+ >
5849
+ }
5850
+
5851
+ /* Detect speech in audio data. Currently only support base64 encoded audio data (16-bit PCM, mono, 16kHz) */
5852
+ export type GeneratorVadInferenceActionDetectData = ActionWithParams & {
5853
+ __actionName: 'GENERATOR_VAD_INFERENCE_DETECT_DATA'
5854
+ params?: Array<
5855
+ | {
5856
+ input: 'data'
5857
+ value?: any | EventProperty
5858
+ mapping?: string
5859
+ }
5860
+ | {
5861
+ input: 'threshold'
5862
+ value?: number | DataLink | EventProperty
5863
+ mapping?: string
5864
+ }
5865
+ | {
5866
+ input: 'minSpeechDurationMs'
5867
+ value?: number | DataLink | EventProperty
5868
+ mapping?: string
5869
+ }
5870
+ | {
5871
+ input: 'minSilenceDurationMs'
5872
+ value?: number | DataLink | EventProperty
5873
+ mapping?: string
5874
+ }
5875
+ | {
5876
+ input: 'maxSpeechDurationS'
5877
+ value?: number | DataLink | EventProperty
5878
+ mapping?: string
5879
+ }
5880
+ | {
5881
+ input: 'speechPadMs'
5882
+ value?: number | DataLink | EventProperty
5883
+ mapping?: string
5884
+ }
5885
+ | {
5886
+ input: 'samplesOverlap'
5887
+ value?: number | DataLink | EventProperty
5888
+ mapping?: string
5889
+ }
5890
+ >
5891
+ }
5892
+
5893
+ /* Clear downloaded files (model, audio) & current jobs */
5894
+ export type GeneratorVadInferenceActionClearDownload = Action & {
5895
+ __actionName: 'GENERATOR_VAD_INFERENCE_CLEAR_DOWNLOAD'
5896
+ }
5897
+
5898
+ /* Release context */
5899
+ export type GeneratorVadInferenceActionReleaseContext = Action & {
5900
+ __actionName: 'GENERATOR_VAD_INFERENCE_RELEASE_CONTEXT'
5901
+ }
5902
+
5903
+ interface GeneratorVadInferenceDef {
5904
+ /*
5905
+ Default property:
5906
+ {
5907
+ "init": false,
5908
+ "modelName": "silero-v5.1.2",
5909
+ "modelUseGPU": true,
5910
+ "modelThreads": 4,
5911
+ "detectThreshold": 0.5,
5912
+ "detectMinSpeechDurationMs": 250,
5913
+ "detectMinSilenceDurationMs": 100,
5914
+ "detectMaxSpeechDurationS": 30,
5915
+ "detectSpeechPadMs": 30,
5916
+ "detectSamplesOverlap": 0.1
5917
+ }
5918
+ */
5919
+ property?: {
5920
+ /* Initialize the VAD context on generator initialization
5921
+ Please note that it will take some RAM depending on the model size */
5922
+ init?: boolean | DataLink
5923
+ /* Use model name, currently only supports the Silero VAD model.
5924
+ The model download progress will be done in preload stage or the generator initialization stage.
5925
+ You can also choose `custom` option and set `Model URL` and `Model SHA1` to use your own model */
5926
+ modelName?: 'custom' | 'silero-v5.1.2' | DataLink
5927
+ /* The URL or path of model
5928
+ We used `ggml` format model, please refer to https://huggingface.co/ggml-org/whisper-vad */
5929
+ modelUrl?: string | DataLink
5930
+ /* Hash type of model */
5931
+ modelHashType?: 'md5' | 'sha256' | 'sha1' | DataLink
5932
+ /* Hash of model */
5933
+ modelHash?: string | DataLink
5934
+ /* Use GPU Acceleration for inference. Currently iOS only. */
5935
+ modelUseGPU?: boolean | DataLink
5936
+ /* Number of threads to use for processing */
5937
+ modelThreads?: number | DataLink
5938
+ /* Speech probability threshold (0.0-1.0) */
5939
+ detectThreshold?: number | DataLink
5940
+ /* Minimum speech duration in milliseconds */
5941
+ detectMinSpeechDurationMs?: number | DataLink
5942
+ /* Minimum silence duration in milliseconds */
5943
+ detectMinSilenceDurationMs?: number | DataLink
5944
+ /* Maximum speech duration in seconds */
5945
+ detectMaxSpeechDurationS?: number | DataLink
5946
+ /* Padding around speech segments in milliseconds */
5947
+ detectSpeechPadMs?: number | DataLink
5948
+ /* Overlap between analysis windows (0.0-1.0) */
5949
+ detectSamplesOverlap?: number | DataLink
5950
+ /* The file URL or path to be analyzed.
5951
+ It only supported `wav` format with 16kHz sample rate & single (mono) channel */
5952
+ detectFileUrl?: string | DataLink
5953
+ /* MD5 of file to be analyzed */
5954
+ detectFileMd5?: string | DataLink
5955
+ }
5956
+ events?: {
5957
+ /* Event triggered when context state changes */
5958
+ onContextStateChange?: Array<EventAction>
5959
+ /* Event triggered when error occurs */
5960
+ onError?: Array<EventAction>
5961
+ /* Event triggered when got detection result */
5962
+ onDetected?: Array<EventAction>
5963
+ }
5964
+ outlets?: {
5965
+ /* Context state */
5966
+ contextState?: () => Data
5967
+ /* Context details */
5968
+ contextDetails?: () => Data
5969
+ /* Is detecting */
5970
+ isDetecting?: () => Data
5971
+ /* Detection segments result */
5972
+ detectionSegments?: () => Data
5973
+ /* Detection details */
5974
+ detectionDetails?: () => Data
5975
+ }
5976
+ }
5977
+
5978
+ /* Local Voice Activity Detection (VAD) inference based on GGML and [whisper.rn](https://github.com/mybigday/whisper.rn) */
5979
+ export type GeneratorVadInference = Generator &
5980
+ GeneratorVadInferenceDef & {
5981
+ templateKey: 'GENERATOR_VAD_INFERENCE'
5982
+ switches: Array<
5983
+ SwitchDef &
5984
+ GeneratorVadInferenceDef & {
5985
+ conds?: Array<{
5986
+ method: '==' | '!=' | '>' | '<' | '>=' | '<='
5987
+ cond:
5988
+ | SwitchCondInnerStateCurrentCanvas
5989
+ | SwitchCondData
5990
+ | {
5991
+ __typename: 'SwitchCondInnerStateOutlet'
5992
+ outlet:
5993
+ | 'contextState'
5994
+ | 'contextDetails'
5995
+ | 'isDetecting'
5996
+ | 'detectionSegments'
5997
+ | 'detectionDetails'
5998
+ value: any
5999
+ }
6000
+ }>
6001
+ }
6002
+ >
6003
+ }
6004
+
6005
+ /* Start realtime transcription */
6006
+ export type GeneratorRealtimeTranscriptionActionStart = Action & {
6007
+ __actionName: 'GENERATOR_REALTIME_TRANSCRIPTION_START'
6008
+ }
6009
+
6010
+ /* Stop realtime transcription */
6011
+ export type GeneratorRealtimeTranscriptionActionStop = Action & {
6012
+ __actionName: 'GENERATOR_REALTIME_TRANSCRIPTION_STOP'
6013
+ }
6014
+
6015
+ /* Force move to next slice */
6016
+ export type GeneratorRealtimeTranscriptionActionNextSlice = Action & {
6017
+ __actionName: 'GENERATOR_REALTIME_TRANSCRIPTION_NEXT_SLICE'
6018
+ }
6019
+
6020
+ /* Reset transcriber state */
6021
+ export type GeneratorRealtimeTranscriptionActionReset = Action & {
6022
+ __actionName: 'GENERATOR_REALTIME_TRANSCRIPTION_RESET'
6023
+ }
6024
+
6025
+ interface GeneratorRealtimeTranscriptionDef {
6026
+ /*
6027
+ Default property:
6028
+ {
6029
+ "sttLivePolicy": "only-in-use",
6030
+ "vadInferenceLivePolicy": "only-in-use",
6031
+ "vadEnabled": true,
6032
+ "audioSliceSec": 30,
6033
+ "audioMinSec": 1,
6034
+ "maxSlicesInMemory": 5,
6035
+ "vadStrategy": "use-preset",
6036
+ "vadPreset": "default",
6037
+ "autoSliceOnSpeechEnd": true,
6038
+ "autoSliceThreshold": 2,
6039
+ "initialPrompt": "",
6040
+ "promptPreviousSlices": false,
6041
+ "saveAudio": true,
6042
+ "testMode": false,
6043
+ "testPlaybackSpeed": 1,
6044
+ "testChunkDurationMs": 100,
6045
+ "testLoop": false
6046
+ }
6047
+ */
6048
+ property?: {
6049
+ /* STT Generator for Whisper context */
6050
+ sttGeneratorId?: string | DataLink
6051
+ /* STT Live Policy. If the policy is `only-in-use`, the STT context will be released when not in use. */
6052
+ sttLivePolicy?: 'only-in-use' | 'manual' | DataLink
6053
+ /* VAD Inference Generator for voice activity detection */
6054
+ vadInferenceGeneratorId?: string | DataLink
6055
+ /* VAD Inference Live Policy. If the policy is `only-in-use`, the VAD Inference context will be released when not in use. */
6056
+ vadInferenceLivePolicy?: 'only-in-use' | 'manual' | DataLink
6057
+ /* Enable VAD (Voice Activity Detection) */
6058
+ vadEnabled?: boolean | DataLink
6059
+ /* Audio slice duration in seconds */
6060
+ audioSliceSec?: number | DataLink
6061
+ /* Minimum audio duration to start transcription in seconds */
6062
+ audioMinSec?: number | DataLink
6063
+ /* Maximum number of slices to keep in memory */
6064
+ maxSlicesInMemory?: number | DataLink
6065
+ /* VAD Strategy */
6066
+ vadStrategy?: 'use-preset' | 'use-generator-options' | DataLink
6067
+ /* VAD preset configuration */
6068
+ vadPreset?:
6069
+ | 'default'
6070
+ | 'sensitive'
6071
+ | 'very-sensitive'
6072
+ | 'conservative'
6073
+ | 'very-conservative'
6074
+ | 'continuous-speech'
6075
+ | 'meeting'
6076
+ | 'noisy-environment'
6077
+ | DataLink
6078
+ /* Auto slice on speech end */
6079
+ autoSliceOnSpeechEnd?: boolean | DataLink
6080
+ /* Auto slice threshold in seconds */
6081
+ autoSliceThreshold?: number | DataLink
6082
+ /* Initial prompt for transcription */
6083
+ initialPrompt?: string | DataLink
6084
+ /* Include previous slices in prompt */
6085
+ promptPreviousSlices?: boolean | DataLink
6086
+ /* Enable audio output saving (auto-generates file path) */
6087
+ saveAudio?: boolean | DataLink
6088
+ /* Use test mode with file simulation */
6089
+ testMode?: boolean | DataLink
6090
+ /* Test audio file path for simulation */
6091
+ testFilePath?: string | DataLink
6092
+ /* Test audio file hash */
6093
+ testFileHash?: string | DataLink
6094
+ /* Test audio file hash type */
6095
+ testFileHashType?: string | DataLink
6096
+ /* Test playback speed */
6097
+ testPlaybackSpeed?: number | DataLink
6098
+ /* Test chunk duration in milliseconds */
6099
+ testChunkDurationMs?: number | DataLink
6100
+ /* Loop test audio file */
6101
+ testLoop?: boolean | DataLink
6102
+ }
6103
+ events?: {
6104
+ /* Event triggered when transcription starts, processes, or ends */
6105
+ onTranscribe?: Array<EventAction>
6106
+ /* Event triggered on VAD (Voice Activity Detection) events */
6107
+ onVad?: Array<EventAction>
6108
+ /* Event triggered when error occurs */
6109
+ onError?: Array<EventAction>
6110
+ /* Event triggered when status changes */
6111
+ onStatusChange?: Array<EventAction>
6112
+ /* Event triggered when statistics update */
6113
+ onStatsUpdate?: Array<EventAction>
6114
+ /* Event triggered when transcription ends */
6115
+ onEnd?: Array<EventAction>
6116
+ }
6117
+ outlets?: {
6118
+ /* Is realtime transcription currently active */
6119
+ isActive?: () => Data
6120
+ /* Is currently transcribing audio */
6121
+ isTranscribing?: () => Data
6122
+ /* Current transcription results */
6123
+ results?: () => Data
6124
+ /* Current transcription result text */
6125
+ resultText?: () => Data
6126
+ /* Current statistics */
6127
+ statistics?: () => Data
6128
+ /* Latest transcribe event */
6129
+ lastTranscribeEvent?: () => Data
6130
+ /* Latest VAD event */
6131
+ lastVadEvent?: () => Data
6132
+ /* Audio output file path (auto-generated when saving audio) */
6133
+ audioOutputPath?: () => Data
6134
+ }
6135
+ }
6136
+
6137
+ /* Realtime speech-to-text transcription using Whisper and VAD with live audio streaming */
6138
+ export type GeneratorRealtimeTranscription = Generator &
6139
+ GeneratorRealtimeTranscriptionDef & {
6140
+ templateKey: 'GENERATOR_REALTIME_TRANSCRIPTION'
6141
+ switches: Array<
6142
+ SwitchDef &
6143
+ GeneratorRealtimeTranscriptionDef & {
6144
+ conds?: Array<{
6145
+ method: '==' | '!=' | '>' | '<' | '>=' | '<='
6146
+ cond:
6147
+ | SwitchCondInnerStateCurrentCanvas
6148
+ | SwitchCondData
6149
+ | {
6150
+ __typename: 'SwitchCondInnerStateOutlet'
6151
+ outlet:
6152
+ | 'isActive'
6153
+ | 'isTranscribing'
6154
+ | 'results'
6155
+ | 'resultText'
6156
+ | 'statistics'
6157
+ | 'lastTranscribeEvent'
6158
+ | 'lastVadEvent'
6159
+ | 'audioOutputPath'
6160
+ value: any
6161
+ }
6162
+ }>
6163
+ }
6164
+ >
6165
+ }
6166
+
5895
6167
  /* Load the model */
5896
6168
  export type GeneratorLLMActionLoadModel = Action & {
5897
6169
  __actionName: 'GENERATOR_LLM_LOAD_MODEL'
@@ -5973,6 +6245,11 @@ export type GeneratorLLMActionProcessPrompt = ActionWithParams & {
5973
6245
  value?: string | DataLink | EventProperty
5974
6246
  mapping?: string
5975
6247
  }
6248
+ | {
6249
+ input: 'enableThinking'
6250
+ value?: boolean | DataLink | EventProperty
6251
+ mapping?: string
6252
+ }
5976
6253
  | {
5977
6254
  input: 'prompt'
5978
6255
  value?: string | DataLink | EventProperty
@@ -6035,6 +6312,11 @@ export type GeneratorLLMActionCompletion = ActionWithParams & {
6035
6312
  value?: string | DataLink | EventProperty
6036
6313
  mapping?: string
6037
6314
  }
6315
+ | {
6316
+ input: 'enableThinking'
6317
+ value?: boolean | DataLink | EventProperty
6318
+ mapping?: string
6319
+ }
6038
6320
  | {
6039
6321
  input: 'prompt'
6040
6322
  value?: string | DataLink | EventProperty
@@ -6385,7 +6667,7 @@ Default property:
6385
6667
  /* Prompt (text mode) */
6386
6668
  completionPrompt?: string | DataLink
6387
6669
  /* Media paths to be used in the prompt template (PREVIEW FEATURE)
6388
- In prompt, use `<__image__>` for position of media content */
6670
+ In prompt, use `<__media__>` for position of media content */
6389
6671
  completionPromptMediaPaths?: Array<string | DataLink> | DataLink
6390
6672
  /* Data to be used in the prompt template (e.g. `Hello ${name}`). Supports nested data, such as `Hello ${user.name}`. */
6391
6673
  completionPromptTemplateData?: {} | DataLink
@@ -6404,6 +6686,8 @@ Default property:
6404
6686
  }
6405
6687
  schema?: {} | DataLink
6406
6688
  }
6689
+ /* Enable thinking */
6690
+ completionEnableThinking?: boolean | DataLink
6407
6691
  /* Stop words */
6408
6692
  completionStopWords?: Array<string | DataLink> | DataLink
6409
6693
  /* Number of tokens to predict */
@@ -6462,7 +6746,7 @@ Default property:
6462
6746
  completionIgnoreEOS?: boolean | DataLink
6463
6747
  }
6464
6748
  events?: {
6465
- /* Event triggered when load is done */
6749
+ /* Event triggered when context state changes */
6466
6750
  onContextStateChange?: Array<EventAction>
6467
6751
  /* Event triggered when error occurs */
6468
6752
  onError?: Array<EventAction>
@@ -6508,7 +6792,7 @@ Default property:
6508
6792
  - iOS: Supported GPU acceleration, recommended use M1+ / A17+ chip device
6509
6793
  - macOS: Supported GPU acceleration, recommended use M1+ chip device
6510
6794
  - Android: Currently not supported GPU acceleration (Coming soon), recommended use Android 13+ system
6511
- - Linux / Windows: Supported GPU acceleration, currently only Vulkan backend available */
6795
+ - Linux / Windows: Supported GPU acceleration, you can choose `vulkan` or `cuda` backend in Accel Variant property */
6512
6796
  export type GeneratorLLM = Generator &
6513
6797
  GeneratorLLMDef & {
6514
6798
  templateKey: 'GENERATOR_LLM'
@@ -6542,6 +6826,308 @@ export type GeneratorLLM = Generator &
6542
6826
  >
6543
6827
  }
6544
6828
 
6829
+ /* Load the model */
6830
+ export type GeneratorGGMLTTSActionLoadModel = Action & {
6831
+ __actionName: 'GENERATOR_GGML_TTS_LOAD_MODEL'
6832
+ }
6833
+
6834
+ /* Generate audio */
6835
+ export type GeneratorGGMLTTSActionGenerate = ActionWithParams & {
6836
+ __actionName: 'GENERATOR_GGML_TTS_GENERATE'
6837
+ params?: Array<{
6838
+ input: 'text'
6839
+ value?: string | DataLink | EventProperty
6840
+ mapping?: string
6841
+ }>
6842
+ }
6843
+
6844
+ /* Clean cache */
6845
+ export type GeneratorGGMLTTSActionCleanCache = Action & {
6846
+ __actionName: 'GENERATOR_GGML_TTS_CLEAN_CACHE'
6847
+ }
6848
+
6849
+ /* Release context */
6850
+ export type GeneratorGGMLTTSActionReleaseContext = Action & {
6851
+ __actionName: 'GENERATOR_GGML_TTS_RELEASE_CONTEXT'
6852
+ }
6853
+
6854
+ interface GeneratorGGMLTTSDef {
6855
+ /*
6856
+ Default property:
6857
+ {
6858
+ "vocoderUrl": "https://huggingface.co/ggml-org/WavTokenizer/resolve/main/WavTokenizer-Large-75-F16.gguf",
6859
+ "vocoderHashType": "sha256",
6860
+ "vocoderHash": "2356baa8631cc2995ea3465196a017a2733600d849a91180c0f97fa7fb375bbe",
6861
+ "outputType": "play",
6862
+ "cacheGenerated": true,
6863
+ "autoInferEnable": false,
6864
+ "softBreakRegex": "^[^\\r\\n\\t\\f\\v]*([\\r\\n]+|[。!?!?.]\\B)",
6865
+ "hardBreakTime": 500,
6866
+ "completionTemperature": 0.1,
6867
+ "completionRepetitionPenalty": 1.1,
6868
+ "completionTopK": 40,
6869
+ "completionTopP": 0.9,
6870
+ "completionMinP": 0.05,
6871
+ "useGuideToken": false,
6872
+ "contextSize": 4096,
6873
+ "batchSize": 512,
6874
+ "maxThreads": 1,
6875
+ "accelVariant": "default",
6876
+ "mainGpu": 0,
6877
+ "gpuLayers": 0,
6878
+ "useMlock": true,
6879
+ "useMmap": true,
6880
+ "useFlashAttn": false
6881
+ }
6882
+ */
6883
+ property?: {
6884
+ /* Initialize the TTS context on generator initialization */
6885
+ init?: boolean | DataLink
6886
+ /* The URL or path of model
6887
+ We used GGUF format model, please refer to https://github.com/ggerganov/llama.cpp/tree/master#description */
6888
+ modelUrl?: string | DataLink
6889
+ /* Hash type of model */
6890
+ modelHashType?: 'md5' | 'sha256' | 'sha1' | DataLink
6891
+ /* Hash of model */
6892
+ modelHash?: string | DataLink
6893
+ /* The URL or path of vocoder model */
6894
+ vocoderUrl?: string | DataLink
6895
+ /* Hash type of vocoder model */
6896
+ vocoderHashType?: 'md5' | 'sha256' | 'sha1' | DataLink
6897
+ /* Hash of vocoder model */
6898
+ vocoderHash?: string | DataLink
6899
+ /* Output mode */
6900
+ outputType?: 'play' | 'file' | DataLink
6901
+ /* Enable cache for generated audio */
6902
+ cacheGenerated?: boolean | DataLink
6903
+ /* Text to generate */
6904
+ prompt?: string | DataLink
6905
+ /* Speaker JSON */
6906
+ speaker?: {} | DataLink
6907
+ /* Auto inference when prompt changes */
6908
+ autoInferEnable?: boolean | DataLink
6909
+ /* Segmentation rule for auto inference */
6910
+ softBreakRegex?: string | DataLink
6911
+ /* Time to force inference when softBreakRegex is not satisfied */
6912
+ hardBreakTime?: number | DataLink
6913
+ /* Temperature */
6914
+ completionTemperature?: number | DataLink
6915
+ /* Repetition Penalty */
6916
+ completionRepetitionPenalty?: number | DataLink
6917
+ /* Top K sampling */
6918
+ completionTopK?: number | DataLink
6919
+ /* Top P sampling */
6920
+ completionTopP?: number | DataLink
6921
+ /* Min P sampling */
6922
+ completionMinP?: number | DataLink
6923
+ /* Set the random number generator (RNG) seed (default: -1, -1 = random seed) */
6924
+ completionSeed?: number | DataLink
6925
+ /* Enable guide token to help prevent hallucinations by forcing the TTS to use the correct words. */
6926
+ useGuideToken?: boolean | DataLink
6927
+ /* Context size, for OutTTS recommended 4096 ~ 8192 (Default to 4096) */
6928
+ contextSize?: number | DataLink
6929
+ /* Logical batch size for prompt processing */
6930
+ batchSize?: number | DataLink
6931
+ /* Number of threads */
6932
+ maxThreads?: number | DataLink
6933
+ /* Accelerator variant (Only for desktop)
6934
+ `default` - CPU / Metal (macOS)
6935
+ `vulkan` - Use Vulkan
6936
+ `cuda` - Use CUDA */
6937
+ accelVariant?: 'default' | 'vulkan' | 'cuda' | DataLink
6938
+ /* Main GPU index */
6939
+ mainGpu?: number | DataLink
6940
+ /* Number of GPU layers (NOTE: Currently not supported for Android) */
6941
+ gpuLayers?: number | DataLink
6942
+ /* Use memory lock */
6943
+ useMlock?: boolean | DataLink
6944
+ /* Use mmap */
6945
+ useMmap?: boolean | DataLink
6946
+ /* Use Flash Attention for inference (Recommended with GPU enabled) */
6947
+ useFlashAttn?: boolean | DataLink
6948
+ }
6949
+ events?: {
6950
+ /* Event triggered when state change */
6951
+ onContextStateChange?: Array<EventAction>
6952
+ /* Event triggered when error occurs */
6953
+ onError?: Array<EventAction>
6954
+ }
6955
+ outlets?: {
6956
+ /* Context state */
6957
+ contextState?: () => Data
6958
+ /* Generated audio file */
6959
+ generatedAudio?: () => Data
6960
+ /* Generated audio file is playing */
6961
+ generatedAudioPlaying?: () => Data
6962
+ }
6963
+ }
6964
+
6965
+ /* Local Text-to-Speech (TTS) inference based on GGML and [llama.cpp](https://github.com/ggerganov/llama.cpp)
6966
+ You can use any converted model on HuggingFace. */
6967
+ export type GeneratorGGMLTTS = Generator &
6968
+ GeneratorGGMLTTSDef & {
6969
+ templateKey: 'GENERATOR_GGML_TTS'
6970
+ switches: Array<
6971
+ SwitchDef &
6972
+ GeneratorGGMLTTSDef & {
6973
+ conds?: Array<{
6974
+ method: '==' | '!=' | '>' | '<' | '>=' | '<='
6975
+ cond:
6976
+ | SwitchCondInnerStateCurrentCanvas
6977
+ | SwitchCondData
6978
+ | {
6979
+ __typename: 'SwitchCondInnerStateOutlet'
6980
+ outlet: 'contextState' | 'generatedAudio' | 'generatedAudioPlaying'
6981
+ value: any
6982
+ }
6983
+ }>
6984
+ }
6985
+ >
6986
+ }
6987
+
6988
+ /* Load the model */
6989
+ export type GeneratorRerankerActionLoadModel = Action & {
6990
+ __actionName: 'GENERATOR_RERANKER_LOAD_MODEL'
6991
+ }
6992
+
6993
+ /* Rerank documents based on query relevance */
6994
+ export type GeneratorRerankerActionRerank = ActionWithParams & {
6995
+ __actionName: 'GENERATOR_RERANKER_RERANK'
6996
+ params?: Array<
6997
+ | {
6998
+ input: 'query'
6999
+ value?: string | DataLink | EventProperty
7000
+ mapping?: string
7001
+ }
7002
+ | {
7003
+ input: 'documents'
7004
+ value?: Array<any> | DataLink | EventProperty
7005
+ mapping?: string
7006
+ }
7007
+ >
7008
+ }
7009
+
7010
+ /* Release context */
7011
+ export type GeneratorRerankerActionReleaseContext = Action & {
7012
+ __actionName: 'GENERATOR_RERANKER_RELEASE_CONTEXT'
7013
+ }
7014
+
7015
+ interface GeneratorRerankerDef {
7016
+ /*
7017
+ Default property:
7018
+ {
7019
+ "init": false,
7020
+ "contextSize": 512,
7021
+ "batchSize": 512,
7022
+ "uBatchSize": 512,
7023
+ "accelVariant": "default",
7024
+ "mainGpu": 0,
7025
+ "gpuLayers": 0,
7026
+ "useMlock": true,
7027
+ "useMmap": true,
7028
+ "normalize": 1
7029
+ }
7030
+ */
7031
+ property?: {
7032
+ /* Initialize the Reranker context on generator initialization */
7033
+ init?: boolean | DataLink
7034
+ /* The URL or path of reranker model (GGUF format) */
7035
+ modelUrl?: string | DataLink
7036
+ /* Hash of reranker model */
7037
+ modelHash?: string | DataLink
7038
+ /* Hash type of reranker model */
7039
+ modelHashType?: 'md5' | 'sha256' | 'sha1' | DataLink
7040
+ /* Context size (0 ~ 4096) (Default to 512) */
7041
+ contextSize?: number | DataLink
7042
+ /* Logical batch size for processing (default: 512) */
7043
+ batchSize?: number | DataLink
7044
+ /* Physical maximum batch size (default: 512) */
7045
+ uBatchSize?: number | DataLink
7046
+ /* Accelerator variant (default: default) */
7047
+ accelVariant?:
7048
+ | 'default'
7049
+ | 'avx'
7050
+ | 'avx2'
7051
+ | 'avx512'
7052
+ | 'metal'
7053
+ | 'opencl'
7054
+ | 'vulkan'
7055
+ | 'cuda'
7056
+ | 'rocm'
7057
+ | DataLink
7058
+ /* Main GPU index (default: 0) */
7059
+ mainGpu?: number | DataLink
7060
+ /* Number of layers to store in VRAM (default: 0) */
7061
+ gpuLayers?: number | DataLink
7062
+ /* Maximum number of threads to use (default: auto) */
7063
+ maxThreads?: number | DataLink
7064
+ /* Use mlock to keep model in memory (default: true) */
7065
+ useMlock?: boolean | DataLink
7066
+ /* Use mmap for model loading (default: true) */
7067
+ useMmap?: boolean | DataLink
7068
+ /* Query text for reranking */
7069
+ query?: string | DataLink
7070
+ /* Array of documents to rerank */
7071
+ documents?: Array<string | DataLink> | DataLink
7072
+ /* Normalize reranking scores (default: from model config) */
7073
+ normalize?: number | DataLink | boolean | DataLink | DataLink
7074
+ /* Maximum number of documents to return (default: unlimited) */
7075
+ topK?: number | DataLink
7076
+ }
7077
+ events?: {
7078
+ /* Event triggered when the reranker context state changes (loading, ready, error, released) */
7079
+ onContextStateChange?: Array<EventAction>
7080
+ /* Event triggered when an error occurs during reranker operations */
7081
+ onError?: Array<EventAction>
7082
+ }
7083
+ outlets?: {
7084
+ /* Current state of the reranker context (loading, ready, error, released) */
7085
+ contextState?: () => Data
7086
+ /* Loading progress of the reranker model (0-100) */
7087
+ contextLoadProgress?: () => Data
7088
+ /* Detailed information about the reranker context including instance ID and processing status */
7089
+ contextDetails?: () => Data
7090
+ /* Result of the reranking operation containing scored and ranked documents */
7091
+ rerankResult?: () => Data
7092
+ /* Boolean indicating whether the reranker is currently processing a request */
7093
+ isProcessing?: () => Data
7094
+ }
7095
+ }
7096
+
7097
+ /* Local rerank based on GGML and [llama.cpp](https://github.com/ggerganov/llama.cpp)
7098
+
7099
+ ## Notice
7100
+ - The device RAM must be larger than 8GB
7101
+ - iOS: Supported GPU acceleration, recommended use M1+ / A17+ chip device
7102
+ - macOS: Supported GPU acceleration, recommended use M1+ chip device
7103
+ - Android: Currently not supported GPU acceleration (Coming soon), recommended use Android 13+ system
7104
+ - Linux / Windows: Supported GPU acceleration, currently only Vulkan backend available */
7105
+ export type GeneratorReranker = Generator &
7106
+ GeneratorRerankerDef & {
7107
+ templateKey: 'GENERATOR_RERANKER'
7108
+ switches: Array<
7109
+ SwitchDef &
7110
+ GeneratorRerankerDef & {
7111
+ conds?: Array<{
7112
+ method: '==' | '!=' | '>' | '<' | '>=' | '<='
7113
+ cond:
7114
+ | SwitchCondInnerStateCurrentCanvas
7115
+ | SwitchCondData
7116
+ | {
7117
+ __typename: 'SwitchCondInnerStateOutlet'
7118
+ outlet:
7119
+ | 'contextState'
7120
+ | 'contextLoadProgress'
7121
+ | 'contextDetails'
7122
+ | 'rerankResult'
7123
+ | 'isProcessing'
7124
+ value: any
7125
+ }
7126
+ }>
7127
+ }
7128
+ >
7129
+ }
7130
+
6545
7131
  /* Load the model */
6546
7132
  export type GeneratorQnnLlmActionLoadModel = Action & {
6547
7133
  __actionName: 'GENERATOR_QNN_LLM_LOAD_MODEL'
@@ -6669,7 +7255,7 @@ Default property:
6669
7255
  greedy?: boolean | DataLink
6670
7256
  }
6671
7257
  events?: {
6672
- /* Event triggered when load is done */
7258
+ /* Event triggered when context state changes */
6673
7259
  onContextStateChange?: Array<EventAction>
6674
7260
  /* Event triggered when generate is done */
6675
7261
  onGenerate?: Array<EventAction>
package/types/system.ts CHANGED
@@ -467,6 +467,16 @@ export type SystemActionChannelPublish = ActionWithParams & {
467
467
  >
468
468
  }
469
469
 
470
+ /* Delay for a certain time between actions */
471
+ export type SystemActionDelay = ActionWithParams & {
472
+ __actionName: 'DELAY'
473
+ params?: Array<{
474
+ input: 'time'
475
+ value?: number | DataLink | EventProperty
476
+ mapping?: string
477
+ }>
478
+ }
479
+
470
480
  /* [Internal] Use a shared application */
471
481
  export type SystemActionUseShareApplication = ActionWithParams & {
472
482
  __actionName: 'USE_SHARE_APPLICATION'
package/utils/data.ts CHANGED
@@ -64,6 +64,7 @@ type SystemDataName =
64
64
  | 'workspaceName'
65
65
  | 'applicationInfo'
66
66
  | 'applicationName'
67
+ | 'deviceID'
67
68
  | 'macAddress'
68
69
  | 'bindDeviceCode'
69
70
  | 'bindDeviceCodeExpire'
@@ -343,6 +344,14 @@ export const systemDataList: Array<SystemDataInfo> = [
343
344
  type: 'string',
344
345
  value: '',
345
346
  },
347
+ {
348
+ name: 'deviceID',
349
+ id: 'PROPERTY_BANK_DATA_NODE_8b5c2d9e-f0a1-4b2c-8d3e-4f5a6b7c8d9e',
350
+ title: 'SYSTEM: Device ID',
351
+ description: 'Device ID of current device',
352
+ type: 'string',
353
+ value: 'unknown',
354
+ },
346
355
  {
347
356
  name: 'macAddress',
348
357
  id: 'PROPERTY_BANK_DATA_NODE_f01fcc78-0723-11ed-ac00-877339de1030',
@@ -776,6 +776,46 @@ export const templateEventPropsMap = {
776
776
  'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_TIME', // type: number
777
777
  ],
778
778
  },
779
+ GENERATOR_VAD_INFERENCE: {
780
+ onContextStateChange: [
781
+ 'GENERATOR_VAD_INFERENCE_CONTEXT_STATE', // type: string
782
+ 'GENERATOR_VAD_INFERENCE_CONTEXT_DETAILS', // type: object
783
+ ],
784
+ onError: [
785
+ 'GENERATOR_VAD_INFERENCE_ERROR', // type: string
786
+ ],
787
+ onDetected: [
788
+ 'GENERATOR_VAD_INFERENCE_DETECTION_SEGMENTS', // type: array
789
+ 'GENERATOR_VAD_INFERENCE_DETECTION_TIME', // type: number
790
+ ],
791
+ },
792
+ GENERATOR_REALTIME_TRANSCRIPTION: {
793
+ onTranscribe: [
794
+ 'GENERATOR_REALTIME_TRANSCRIPTION_TRANSCRIBE_EVENT', // type: object
795
+ ],
796
+ onVad: [
797
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT', // type: object
798
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT_TYPE', // type: string
799
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT_CONFIDENCE', // type: number
800
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT_DURATION', // type: number
801
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT_SLICE_INDEX', // type: number
802
+ ],
803
+ onError: [
804
+ 'GENERATOR_REALTIME_TRANSCRIPTION_ERROR', // type: string
805
+ ],
806
+ onStatusChange: [
807
+ 'GENERATOR_REALTIME_TRANSCRIPTION_IS_ACTIVE', // type: bool
808
+ ],
809
+ onStatsUpdate: [
810
+ 'GENERATOR_REALTIME_TRANSCRIPTION_STATS_TYPE', // type: string
811
+ 'GENERATOR_REALTIME_TRANSCRIPTION_STATS_TIMESTAMP', // type: number
812
+ 'GENERATOR_REALTIME_TRANSCRIPTION_STATS', // type: object
813
+ ],
814
+ onEnd: [
815
+ 'GENERATOR_REALTIME_TRANSCRIPTION_END_RESULTS', // type: array
816
+ 'GENERATOR_REALTIME_TRANSCRIPTION_END_AUDIO_OUTPUT_PATH', // type: string
817
+ ],
818
+ },
779
819
  GENERATOR_LLM: {
780
820
  onContextStateChange: [
781
821
  'GENERATOR_LLM_CONTEXT_STATE', // type: string
@@ -806,6 +846,15 @@ export const templateEventPropsMap = {
806
846
  'GENERATOR_LLM_COMPLETION_FUNCTION_DETAILS', // type: object
807
847
  ],
808
848
  },
849
+ GENERATOR_GGML_TTS: {
850
+ onContextStateChange: [
851
+ 'GENERATOR_GGML_TTS_CONTEXT_STATE', // type: string
852
+ ],
853
+ onError: [
854
+ 'GENERATOR_GGML_TTS_ERROR', // type: string
855
+ ],
856
+ },
857
+ GENERATOR_RERANKER: {},
809
858
  GENERATOR_QNN_LLM: {
810
859
  onContextStateChange: [
811
860
  'GENERATOR_QNN_LLM_CONTEXT_STATE', // type: string