@fugood/bricks-project 2.22.0-beta.15 → 2.22.0-beta.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -610,6 +610,27 @@ export const templateActionNameMap = {
610
610
  realtimeVadFreqThold: 'GENERATOR_SPEECH_INFERENCE_REALTIME_VAD_FREQ_THOLD',
611
611
  },
612
612
  },
613
+ GENERATOR_VAD_INFERENCE: {
614
+ GENERATOR_VAD_INFERENCE_DETECT_FILE: {
615
+ fileUrl: 'GENERATOR_VAD_INFERENCE_FILE_URL',
616
+ threshold: 'GENERATOR_VAD_INFERENCE_THRESHOLD',
617
+ minSpeechDurationMs: 'GENERATOR_VAD_INFERENCE_MIN_SPEECH_DURATION_MS',
618
+ minSilenceDurationMs: 'GENERATOR_VAD_INFERENCE_MIN_SILENCE_DURATION_MS',
619
+ maxSpeechDurationS: 'GENERATOR_VAD_INFERENCE_MAX_SPEECH_DURATION_S',
620
+ speechPadMs: 'GENERATOR_VAD_INFERENCE_SPEECH_PAD_MS',
621
+ samplesOverlap: 'GENERATOR_VAD_INFERENCE_SAMPLES_OVERLAP',
622
+ },
623
+ GENERATOR_VAD_INFERENCE_DETECT_DATA: {
624
+ data: 'GENERATOR_VAD_INFERENCE_DATA',
625
+ threshold: 'GENERATOR_VAD_INFERENCE_THRESHOLD',
626
+ minSpeechDurationMs: 'GENERATOR_VAD_INFERENCE_MIN_SPEECH_DURATION_MS',
627
+ minSilenceDurationMs: 'GENERATOR_VAD_INFERENCE_MIN_SILENCE_DURATION_MS',
628
+ maxSpeechDurationS: 'GENERATOR_VAD_INFERENCE_MAX_SPEECH_DURATION_S',
629
+ speechPadMs: 'GENERATOR_VAD_INFERENCE_SPEECH_PAD_MS',
630
+ samplesOverlap: 'GENERATOR_VAD_INFERENCE_SAMPLES_OVERLAP',
631
+ },
632
+ },
633
+
613
634
  GENERATOR_LLM: {
614
635
  GENERATOR_LLM_TOKENIZE: {
615
636
  mode: 'GENERATOR_LLM_MODE',
@@ -685,6 +706,12 @@ export const templateActionNameMap = {
685
706
  text: 'GENERATOR_GGML_TTS_TEXT',
686
707
  },
687
708
  },
709
+ GENERATOR_RERANKER: {
710
+ GENERATOR_RERANKER_RERANK: {
711
+ query: 'GENERATOR_RERANKER_QUERY',
712
+ documents: 'GENERATOR_RERANKER_DOCUMENTS',
713
+ },
714
+ },
688
715
  GENERATOR_QNN_LLM: {
689
716
  GENERATOR_QNN_LLM_GENERATE: {
690
717
  prompt: 'GENERATOR_QNN_LLM_PROMPT',
package/package.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "@fugood/bricks-project",
3
- "version": "2.22.0-beta.15",
3
+ "version": "2.22.0-beta.17",
4
4
  "main": "index.ts",
5
5
  "scripts": {
6
6
  "build": "node scripts/build.js"
7
7
  },
8
8
  "dependencies": {
9
- "@modelcontextprotocol/sdk": "^1.11.1",
9
+ "@modelcontextprotocol/sdk": "^1.15.0",
10
10
  "@types/escodegen": "^0.0.10",
11
11
  "@types/lodash": "^4.17.12",
12
12
  "acorn": "^8.13.0",
@@ -14,5 +14,5 @@
14
14
  "lodash": "^4.17.4",
15
15
  "uuid": "^8.3.1"
16
16
  },
17
- "gitHead": "4995b78120bd6e75df65a302398a9ba3070da3e7"
17
+ "gitHead": "5e8893af505fa5a7b1a902e11764c1f9ebb36a4f"
18
18
  }
@@ -1,15 +1,7 @@
1
1
  import { $ } from 'bun'
2
2
  import { stat, readFile, writeFile } from 'fs/promises'
3
- const cwd = process.cwd()
4
-
5
- const libFiles = ['types', 'utils', 'index.ts']
6
3
 
7
- await $`mkdir -p ${cwd}/project`
8
- for (const file of libFiles) {
9
- await $`cp -r ${__dirname}/../${file} ${cwd}/project`
10
- }
11
-
12
- console.log('Copied files to project/')
4
+ const cwd = process.cwd()
13
5
 
14
6
  async function exists(f: string) {
15
7
  try {
@@ -20,6 +12,21 @@ async function exists(f: string) {
20
12
  }
21
13
  }
22
14
 
15
+ // handle flag --skip-copy
16
+ const skipCopyProject = process.argv.includes('--skip-copy-project')
17
+ if (skipCopyProject) {
18
+ console.log('Skipping copy of files to project/')
19
+ } else {
20
+
21
+ const libFiles = ['types', 'utils', 'index.ts']
22
+
23
+ await $`mkdir -p ${cwd}/project`
24
+ for (const file of libFiles) {
25
+ await $`cp -r ${__dirname}/../${file} ${cwd}/project`
26
+ }
27
+ console.log('Copied files to project/')
28
+ }
29
+
23
30
  const projectMcpServer = {
24
31
  command: 'bun',
25
32
  args: [`${cwd}/node_modules/@fugood/bricks-project/tools/mcp-server.ts`],
package/types/common.ts CHANGED
@@ -39,7 +39,7 @@ export type SubpsaceAction = string
39
39
  export type Action = {
40
40
  __actionName: string
41
41
  parent: 'Brick' | 'Generator' | 'Subspace' | 'System'
42
- name: string
42
+ name?: string
43
43
  }
44
44
 
45
45
  // Find correct key in bricks-project/utils/event-props for EventAction
@@ -65,7 +65,7 @@ export type ItemBrickID = string
65
65
 
66
66
  export type EventAction = {
67
67
  handler: 'system' | (() => Brick | Generator) | SubspaceID | ItemBrickID
68
- action: Action
68
+ action: ActionWithParams | ActionWithDataParams
69
69
  waitAsync?: boolean
70
70
  }
71
71
 
@@ -4856,6 +4856,7 @@ Default property:
4856
4856
  "doSample": true,
4857
4857
  "outputType": "play",
4858
4858
  "cacheGenerated": true,
4859
+ "speed": 1,
4859
4860
  "autoInferEnable": false,
4860
4861
  "softBreakRegex": "^[^\\r\\n\\t\\f\\v]*([\\r\\n]+|[。!?!?.]\\B)",
4861
4862
  "hardBreakTime": 500,
@@ -4867,30 +4868,9 @@ Default property:
4867
4868
  init?: boolean | DataLink
4868
4869
  /* TTS model
4869
4870
  The mms-tts models are licensed under CC-BY-NC-4.0 */
4870
- model?:
4871
- | 'Custom'
4872
- | 'BricksDisplay/vits-eng'
4873
- | 'BricksDisplay/vits-cmn'
4874
- | 'BricksDisplay/ellie-Bert-VITS2'
4875
- | 'onnx-community/OuteTTS-1.0-0.6B-ONNX'
4876
- | 'mms-tts-ara (NC)'
4877
- | 'mms-tts-deu (NC)'
4878
- | 'mms-tts-eng (NC)'
4879
- | 'mms-tts-fra (NC)'
4880
- | 'mms-tts-hin (NC)'
4881
- | 'mms-tts-kor (NC)'
4882
- | 'mms-tts-por (NC)'
4883
- | 'mms-tts-ron (NC)'
4884
- | 'mms-tts-rus (NC)'
4885
- | 'mms-tts-spa (NC)'
4886
- | 'mms-tts-vie (NC)'
4887
- | 'mms-tts-yor (NC)'
4888
- | 'speecht5_tts'
4889
- | DataLink
4871
+ model?: string | DataLink
4890
4872
  /* Model type */
4891
- modelType?: 'auto' | 'vits' | 'bert_vits2' | 'speecht5' | 'outetts-1.0' | DataLink
4892
- /* Load quantized model (deprecated, use `quantizeType` instead) */
4893
- quantized?: boolean | DataLink
4873
+ modelType?: string | DataLink
4894
4874
  /* Quantize type */
4895
4875
  quantizeType?:
4896
4876
  | 'auto'
@@ -4903,18 +4883,17 @@ Default property:
4903
4883
  | 'bnb4'
4904
4884
  | 'q4f16'
4905
4885
  | DataLink
4906
- /* Custom model name
4907
- Choose model from https://huggingface.co/models?pipeline_tag=text-to-audio&library=transformers.js */
4908
- customModel?: string | DataLink
4909
4886
  /* Vocoder model for SpeechT5 */
4910
4887
  vocoderModel?: 'Custom' | 'speecht5_hifigan' | DataLink
4911
4888
  /* Custom vocoder model
4912
4889
  Choose model from https://huggingface.co/models?library=transformers.js&other=hifigan */
4913
4890
  customVocoderModel?: string | DataLink
4914
- /* XVector speaker embedding for HiFi-GAN */
4891
+ /* Speaker embedding, for SpeechT5 or StyleTTS (Kokoro) */
4915
4892
  speakerEmbedUrl?: string | DataLink
4916
- /* MD5 checksum of `speakerEmbedUrl` */
4917
- speakerEmbedMd5?: string | DataLink
4893
+ /* Hash of `speakerEmbedUrl` */
4894
+ speakerEmbedHash?: string | DataLink
4895
+ /* Hash type of `speakerEmbedUrl` */
4896
+ speakerEmbedHashType?: 'md5' | 'sha256' | 'sha1' | DataLink
4918
4897
  /* Speaker config, for OuteTTS model */
4919
4898
  speakerConfig?: {} | DataLink
4920
4899
  /* Audio token generation max length */
@@ -4929,6 +4908,8 @@ Default property:
4929
4908
  outputType?: 'play' | 'file' | DataLink
4930
4909
  /* Enable cache for generated audio */
4931
4910
  cacheGenerated?: boolean | DataLink
4911
+ /* Speed of the generated audio, for StyleTTS (Kokoro) */
4912
+ speed?: number | DataLink
4932
4913
  /* Text to generate */
4933
4914
  prompt?: string | DataLink
4934
4915
  /* Auto inference when prompt changes */
@@ -5042,7 +5023,6 @@ interface GeneratorOnnxLLMDef {
5042
5023
  /*
5043
5024
  Default property:
5044
5025
  {
5045
- "model": "BricksDisplay/phi-1_5-q4",
5046
5026
  "modelType": "auto",
5047
5027
  "toolCallParser": "llama3_json",
5048
5028
  "toolChoice": "auto",
@@ -5061,70 +5041,9 @@ Default property:
5061
5041
  /* Initialize the TTS context on generator initialization */
5062
5042
  init?: boolean | DataLink
5063
5043
  /* LLM model */
5064
- model?:
5065
- | 'Custom'
5066
- | 'onnx-community/gemma-3-1b-it-ONNX'
5067
- | 'BricksDisplay/phi-1_5'
5068
- | 'BricksDisplay/phi-1_5-q4'
5069
- | 'onnx-community/Phi-3.5-vision-instruct'
5070
- | 'onnx-community/Phi-3-vision-128k-instruct'
5071
- | 'onnx-community/Phi-4-mini-instruct-ONNX-MHA'
5072
- | 'onnx-community/Qwen2.5-0.5B'
5073
- | 'onnx-community/Qwen2.5-0.5B-Instruct'
5074
- | 'onnx-community/Qwen2.5-1.5B'
5075
- | 'onnx-community/Qwen2.5-1.5B-Instruct'
5076
- | 'onnx-community/Qwen2-VL-2B-Instruct'
5077
- | 'stablelm-2-1_6b'
5078
- | 'BricksDisplay/stablelm-2-1_6b-q4'
5079
- | 'stablelm-2-zephyr-1_6b'
5080
- | 'BricksDisplay/stablelm-2-zephyr-1_6b-q4'
5081
- | 'BricksDisplay/Llama-2-7b-chat-q4'
5082
- | 'TinyLLama-v0'
5083
- | 'TinyLlama-1.1B-Chat-v1.0'
5084
- | 'BricksDisplay/TinyLlama-1.1B-Chat-v1.0-q4'
5085
- | 'llama-160m'
5086
- | 'llama-68m'
5087
- | 'BricksDisplay/Yi-6B-q4'
5088
- | 'BricksDisplay/Yi-6B-Chat-q4'
5089
- | 'BricksDisplay/Mistral-7B-v0.1-q4'
5090
- | 'BricksDisplay/Mistral-7B-Instruct-v0.2-q4'
5091
- | 'BricksDisplay/Breeze-7B-Base-v1_0-q4'
5092
- | 'BricksDisplay/Breeze-7B-Instruct-v1_0-q4'
5093
- | 'gpt2'
5094
- | 'distilgpt2'
5095
- | 'gpt-neo-125M'
5096
- | 'opt-125m'
5097
- | 'opt-350m'
5098
- | 'bloom-560m'
5099
- | 'bloomz-560m'
5100
- | 't5-small'
5101
- | 't5-base'
5102
- | 'flan-t5-small'
5103
- | 'flan-t5-base'
5104
- | 'mt5-small'
5105
- | 'mt5-base'
5106
- | 'long-t5-lobal-base'
5107
- | 'long-t5-tglobal-base'
5108
- | DataLink
5044
+ model?: string | DataLink
5109
5045
  /* Model type */
5110
- modelType?:
5111
- | 'auto'
5112
- | 'text-generation'
5113
- | 'qwen2-vl'
5114
- | 'paligemma'
5115
- | 'llava'
5116
- | 'llava_onevision'
5117
- | 'moondream1'
5118
- | 'florence2'
5119
- | 'idefics3'
5120
- | 'smolvlm'
5121
- | 'phi3_v'
5122
- | 't5'
5123
- | 'mt5'
5124
- | 'longt5'
5125
- | DataLink
5126
- /* Load quantized model (deprecated, use `quantizeType` instead) */
5127
- quantized?: boolean | DataLink
5046
+ modelType?: string | DataLink
5128
5047
  /* Quantize type */
5129
5048
  quantizeType?:
5130
5049
  | 'auto'
@@ -5137,10 +5056,6 @@ Default property:
5137
5056
  | 'bnb4'
5138
5057
  | 'q4f16'
5139
5058
  | DataLink
5140
- /* Custom model name
5141
- Choose model from https://huggingface.co/models?pipeline_tag=text2text-generation&library=transformers.js
5142
- or https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending */
5143
- customModel?: string | DataLink
5144
5059
  /* Prompt to inference */
5145
5060
  prompt?: string | DataLink
5146
5061
  /* Messages to inference */
@@ -5270,27 +5185,9 @@ Default property:
5270
5185
  /* Initialize the TTS context on generator initialization */
5271
5186
  init?: boolean | DataLink
5272
5187
  /* STT model */
5273
- model?:
5274
- | 'Custom'
5275
- | 'whisper-tiny'
5276
- | 'whisper-tiny.en'
5277
- | 'whisper-small'
5278
- | 'whisper-small.en'
5279
- | 'whisper-base'
5280
- | 'whisper-base.en'
5281
- | 'whisper-medium'
5282
- | 'whisper-medium.en'
5283
- | 'whisper-large'
5284
- | 'whisper-large-v2'
5285
- | 'whisper-large-v3'
5286
- | 'mms-1b-all'
5287
- | 'mms-1b-fl102'
5288
- | 'mms-1b-l1107'
5289
- | DataLink
5188
+ model?: string | DataLink
5290
5189
  /* Model type */
5291
- modelType?: 'auto' | 'whisper' | 'hubert' | 'wav2vec2' | 'wav2vec2-bert' | DataLink
5292
- /* Load quantized model (deprecated, use `quantizeType` instead) */
5293
- quantized?: boolean | DataLink
5190
+ modelType?: string | DataLink
5294
5191
  /* Quantize type */
5295
5192
  quantizeType?:
5296
5193
  | 'auto'
@@ -5303,9 +5200,6 @@ Default property:
5303
5200
  | 'bnb4'
5304
5201
  | 'q4f16'
5305
5202
  | DataLink
5306
- /* Custom model name
5307
- Choose model from https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=transformers.js */
5308
- customModel?: string | DataLink
5309
5203
  /* Return timestamps */
5310
5204
  returnTimestamps?: 'none' | 'enable' | 'word' | DataLink
5311
5205
  /* Transcription language
@@ -5538,7 +5432,7 @@ export type GeneratorSpeechInferenceActionTranscribeData = ActionWithParams & {
5538
5432
  >
5539
5433
  }
5540
5434
 
5541
- /* Transcribe microphone audio source */
5435
+ /* [Deprecated] Transcribe microphone audio source */
5542
5436
  export type GeneratorSpeechInferenceActionTranscribeRealtime = ActionWithParams & {
5543
5437
  __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME'
5544
5438
  params?: Array<
@@ -5605,7 +5499,7 @@ export type GeneratorSpeechInferenceActionTranscribeRealtime = ActionWithParams
5605
5499
  >
5606
5500
  }
5607
5501
 
5608
- /* Stop transcribing microphone audio source */
5502
+ /* [Deprecated] Stop transcribing microphone audio source */
5609
5503
  export type GeneratorSpeechInferenceActionTranscribeRealtimeStop = Action & {
5610
5504
  __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME_STOP'
5611
5505
  }
@@ -5853,7 +5747,7 @@ Default property:
5853
5747
  inferRealtimeVadFreqThold?: number | DataLink
5854
5748
  }
5855
5749
  events?: {
5856
- /* Event triggered when load is done */
5750
+ /* Event triggered when context state changes */
5857
5751
  onContextStateChange?: Array<EventAction>
5858
5752
  /* Event triggered when error occurs */
5859
5753
  onError?: Array<EventAction>
@@ -5909,6 +5803,369 @@ export type GeneratorSpeechInference = Generator &
5909
5803
  >
5910
5804
  }
5911
5805
 
5806
+ /* Load the model */
5807
+ export type GeneratorVadInferenceActionLoadModel = Action & {
5808
+ __actionName: 'GENERATOR_VAD_INFERENCE_LOAD_MODEL'
5809
+ }
5810
+
5811
+ /* Detect speech in audio file. You can provide `File URL` property, if not provided, it will use the default `File URL` */
5812
+ export type GeneratorVadInferenceActionDetectFile = ActionWithParams & {
5813
+ __actionName: 'GENERATOR_VAD_INFERENCE_DETECT_FILE'
5814
+ params?: Array<
5815
+ | {
5816
+ input: 'fileUrl'
5817
+ value?: string | DataLink | EventProperty
5818
+ mapping?: string
5819
+ }
5820
+ | {
5821
+ input: 'threshold'
5822
+ value?: number | DataLink | EventProperty
5823
+ mapping?: string
5824
+ }
5825
+ | {
5826
+ input: 'minSpeechDurationMs'
5827
+ value?: number | DataLink | EventProperty
5828
+ mapping?: string
5829
+ }
5830
+ | {
5831
+ input: 'minSilenceDurationMs'
5832
+ value?: number | DataLink | EventProperty
5833
+ mapping?: string
5834
+ }
5835
+ | {
5836
+ input: 'maxSpeechDurationS'
5837
+ value?: number | DataLink | EventProperty
5838
+ mapping?: string
5839
+ }
5840
+ | {
5841
+ input: 'speechPadMs'
5842
+ value?: number | DataLink | EventProperty
5843
+ mapping?: string
5844
+ }
5845
+ | {
5846
+ input: 'samplesOverlap'
5847
+ value?: number | DataLink | EventProperty
5848
+ mapping?: string
5849
+ }
5850
+ >
5851
+ }
5852
+
5853
+ /* Detect speech in audio data. Currently only support base64 encoded audio data (16-bit PCM, mono, 16kHz) */
5854
+ export type GeneratorVadInferenceActionDetectData = ActionWithParams & {
5855
+ __actionName: 'GENERATOR_VAD_INFERENCE_DETECT_DATA'
5856
+ params?: Array<
5857
+ | {
5858
+ input: 'data'
5859
+ value?: any | EventProperty
5860
+ mapping?: string
5861
+ }
5862
+ | {
5863
+ input: 'threshold'
5864
+ value?: number | DataLink | EventProperty
5865
+ mapping?: string
5866
+ }
5867
+ | {
5868
+ input: 'minSpeechDurationMs'
5869
+ value?: number | DataLink | EventProperty
5870
+ mapping?: string
5871
+ }
5872
+ | {
5873
+ input: 'minSilenceDurationMs'
5874
+ value?: number | DataLink | EventProperty
5875
+ mapping?: string
5876
+ }
5877
+ | {
5878
+ input: 'maxSpeechDurationS'
5879
+ value?: number | DataLink | EventProperty
5880
+ mapping?: string
5881
+ }
5882
+ | {
5883
+ input: 'speechPadMs'
5884
+ value?: number | DataLink | EventProperty
5885
+ mapping?: string
5886
+ }
5887
+ | {
5888
+ input: 'samplesOverlap'
5889
+ value?: number | DataLink | EventProperty
5890
+ mapping?: string
5891
+ }
5892
+ >
5893
+ }
5894
+
5895
+ /* Clear downloaded files (model, audio) & current jobs */
5896
+ export type GeneratorVadInferenceActionClearDownload = Action & {
5897
+ __actionName: 'GENERATOR_VAD_INFERENCE_CLEAR_DOWNLOAD'
5898
+ }
5899
+
5900
+ /* Release context */
5901
+ export type GeneratorVadInferenceActionReleaseContext = Action & {
5902
+ __actionName: 'GENERATOR_VAD_INFERENCE_RELEASE_CONTEXT'
5903
+ }
5904
+
5905
+ interface GeneratorVadInferenceDef {
5906
+ /*
5907
+ Default property:
5908
+ {
5909
+ "init": false,
5910
+ "modelName": "silero-v5.1.2",
5911
+ "modelUseGPU": true,
5912
+ "modelThreads": 4,
5913
+ "detectThreshold": 0.5,
5914
+ "detectMinSpeechDurationMs": 250,
5915
+ "detectMinSilenceDurationMs": 100,
5916
+ "detectMaxSpeechDurationS": 30,
5917
+ "detectSpeechPadMs": 30,
5918
+ "detectSamplesOverlap": 0.1
5919
+ }
5920
+ */
5921
+ property?: {
5922
+ /* Initialize the VAD context on generator initialization
5923
+ Please note that it will take some RAM depending on the model size */
5924
+ init?: boolean | DataLink
5925
+ /* Use model name, currently only supports the Silero VAD model.
5926
+ The model download progress will be done in preload stage or the generator initialization stage.
5927
+ You can also choose `custom` option and set `Model URL` and `Model SHA1` to use your own model */
5928
+ modelName?: 'custom' | 'silero-v5.1.2' | DataLink
5929
+ /* The URL or path of model
5930
+ We used `ggml` format model, please refer to https://huggingface.co/ggml-org/whisper-vad */
5931
+ modelUrl?: string | DataLink
5932
+ /* Hash type of model */
5933
+ modelHashType?: 'md5' | 'sha256' | 'sha1' | DataLink
5934
+ /* Hash of model */
5935
+ modelHash?: string | DataLink
5936
+ /* Use GPU Acceleration for inference. Currently iOS only. */
5937
+ modelUseGPU?: boolean | DataLink
5938
+ /* Number of threads to use for processing */
5939
+ modelThreads?: number | DataLink
5940
+ /* Speech probability threshold (0.0-1.0) */
5941
+ detectThreshold?: number | DataLink
5942
+ /* Minimum speech duration in milliseconds */
5943
+ detectMinSpeechDurationMs?: number | DataLink
5944
+ /* Minimum silence duration in milliseconds */
5945
+ detectMinSilenceDurationMs?: number | DataLink
5946
+ /* Maximum speech duration in seconds */
5947
+ detectMaxSpeechDurationS?: number | DataLink
5948
+ /* Padding around speech segments in milliseconds */
5949
+ detectSpeechPadMs?: number | DataLink
5950
+ /* Overlap between analysis windows (0.0-1.0) */
5951
+ detectSamplesOverlap?: number | DataLink
5952
+ /* The file URL or path to be analyzed.
5953
+ It only supported `wav` format with 16kHz sample rate & single (mono) channel */
5954
+ detectFileUrl?: string | DataLink
5955
+ /* MD5 of file to be analyzed */
5956
+ detectFileMd5?: string | DataLink
5957
+ }
5958
+ events?: {
5959
+ /* Event triggered when context state changes */
5960
+ onContextStateChange?: Array<EventAction>
5961
+ /* Event triggered when error occurs */
5962
+ onError?: Array<EventAction>
5963
+ /* Event triggered when got detection result */
5964
+ onDetected?: Array<EventAction>
5965
+ }
5966
+ outlets?: {
5967
+ /* Context state */
5968
+ contextState?: () => Data
5969
+ /* Context details */
5970
+ contextDetails?: () => Data
5971
+ /* Is detecting */
5972
+ isDetecting?: () => Data
5973
+ /* Detection segments result */
5974
+ detectionSegments?: () => Data
5975
+ /* Detection details */
5976
+ detectionDetails?: () => Data
5977
+ }
5978
+ }
5979
+
5980
+ /* Local Voice Activity Detection (VAD) inference based on GGML and [whisper.rn](https://github.com/mybigday/whisper.rn) */
5981
+ export type GeneratorVadInference = Generator &
5982
+ GeneratorVadInferenceDef & {
5983
+ templateKey: 'GENERATOR_VAD_INFERENCE'
5984
+ switches: Array<
5985
+ SwitchDef &
5986
+ GeneratorVadInferenceDef & {
5987
+ conds?: Array<{
5988
+ method: '==' | '!=' | '>' | '<' | '>=' | '<='
5989
+ cond:
5990
+ | SwitchCondInnerStateCurrentCanvas
5991
+ | SwitchCondData
5992
+ | {
5993
+ __typename: 'SwitchCondInnerStateOutlet'
5994
+ outlet:
5995
+ | 'contextState'
5996
+ | 'contextDetails'
5997
+ | 'isDetecting'
5998
+ | 'detectionSegments'
5999
+ | 'detectionDetails'
6000
+ value: any
6001
+ }
6002
+ }>
6003
+ }
6004
+ >
6005
+ }
6006
+
6007
+ /* Start realtime transcription */
6008
+ export type GeneratorRealtimeTranscriptionActionStart = Action & {
6009
+ __actionName: 'GENERATOR_REALTIME_TRANSCRIPTION_START'
6010
+ }
6011
+
6012
+ /* Stop realtime transcription */
6013
+ export type GeneratorRealtimeTranscriptionActionStop = Action & {
6014
+ __actionName: 'GENERATOR_REALTIME_TRANSCRIPTION_STOP'
6015
+ }
6016
+
6017
+ /* Force move to next slice */
6018
+ export type GeneratorRealtimeTranscriptionActionNextSlice = Action & {
6019
+ __actionName: 'GENERATOR_REALTIME_TRANSCRIPTION_NEXT_SLICE'
6020
+ }
6021
+
6022
+ /* Reset transcriber state */
6023
+ export type GeneratorRealtimeTranscriptionActionReset = Action & {
6024
+ __actionName: 'GENERATOR_REALTIME_TRANSCRIPTION_RESET'
6025
+ }
6026
+
6027
+ interface GeneratorRealtimeTranscriptionDef {
6028
+ /*
6029
+ Default property:
6030
+ {
6031
+ "sttLivePolicy": "only-in-use",
6032
+ "vadInferenceLivePolicy": "only-in-use",
6033
+ "vadEnabled": true,
6034
+ "audioSliceSec": 30,
6035
+ "audioMinSec": 1,
6036
+ "maxSlicesInMemory": 5,
6037
+ "vadStrategy": "use-preset",
6038
+ "vadPreset": "default",
6039
+ "autoSliceOnSpeechEnd": true,
6040
+ "autoSliceThreshold": 2,
6041
+ "initialPrompt": "",
6042
+ "promptPreviousSlices": false,
6043
+ "saveAudio": true,
6044
+ "testMode": false,
6045
+ "testPlaybackSpeed": 1,
6046
+ "testChunkDurationMs": 100,
6047
+ "testLoop": false
6048
+ }
6049
+ */
6050
+ property?: {
6051
+ /* STT Generator for Whisper context */
6052
+ sttGeneratorId?: string | DataLink
6053
+ /* STT Live Policy. If the policy is `only-in-use`, the STT context will be released when not in use. */
6054
+ sttLivePolicy?: 'only-in-use' | 'manual' | DataLink
6055
+ /* VAD Inference Generator for voice activity detection */
6056
+ vadInferenceGeneratorId?: string | DataLink
6057
+ /* VAD Inference Live Policy. If the policy is `only-in-use`, the VAD Inference context will be released when not in use. */
6058
+ vadInferenceLivePolicy?: 'only-in-use' | 'manual' | DataLink
6059
+ /* Enable VAD (Voice Activity Detection) */
6060
+ vadEnabled?: boolean | DataLink
6061
+ /* Audio slice duration in seconds */
6062
+ audioSliceSec?: number | DataLink
6063
+ /* Minimum audio duration to start transcription in seconds */
6064
+ audioMinSec?: number | DataLink
6065
+ /* Maximum number of slices to keep in memory */
6066
+ maxSlicesInMemory?: number | DataLink
6067
+ /* VAD Strategy */
6068
+ vadStrategy?: 'use-preset' | 'use-generator-options' | DataLink
6069
+ /* VAD preset configuration */
6070
+ vadPreset?:
6071
+ | 'default'
6072
+ | 'sensitive'
6073
+ | 'very-sensitive'
6074
+ | 'conservative'
6075
+ | 'very-conservative'
6076
+ | 'continuous-speech'
6077
+ | 'meeting'
6078
+ | 'noisy-environment'
6079
+ | DataLink
6080
+ /* Auto slice on speech end */
6081
+ autoSliceOnSpeechEnd?: boolean | DataLink
6082
+ /* Auto slice threshold in seconds */
6083
+ autoSliceThreshold?: number | DataLink
6084
+ /* Initial prompt for transcription */
6085
+ initialPrompt?: string | DataLink
6086
+ /* Include previous slices in prompt */
6087
+ promptPreviousSlices?: boolean | DataLink
6088
+ /* Enable audio output saving (auto-generates file path) */
6089
+ saveAudio?: boolean | DataLink
6090
+ /* Use test mode with file simulation */
6091
+ testMode?: boolean | DataLink
6092
+ /* Test audio file path for simulation */
6093
+ testFilePath?: string | DataLink
6094
+ /* Test audio file hash */
6095
+ testFileHash?: string | DataLink
6096
+ /* Test audio file hash type */
6097
+ testFileHashType?: string | DataLink
6098
+ /* Test playback speed */
6099
+ testPlaybackSpeed?: number | DataLink
6100
+ /* Test chunk duration in milliseconds */
6101
+ testChunkDurationMs?: number | DataLink
6102
+ /* Loop test audio file */
6103
+ testLoop?: boolean | DataLink
6104
+ }
6105
+ events?: {
6106
+ /* Event triggered when transcription starts, processes, or ends */
6107
+ onTranscribe?: Array<EventAction>
6108
+ /* Event triggered on VAD (Voice Activity Detection) events */
6109
+ onVad?: Array<EventAction>
6110
+ /* Event triggered when error occurs */
6111
+ onError?: Array<EventAction>
6112
+ /* Event triggered when status changes */
6113
+ onStatusChange?: Array<EventAction>
6114
+ /* Event triggered when statistics update */
6115
+ onStatsUpdate?: Array<EventAction>
6116
+ /* Event triggered when transcription ends */
6117
+ onEnd?: Array<EventAction>
6118
+ }
6119
+ outlets?: {
6120
+ /* Is realtime transcription currently active */
6121
+ isActive?: () => Data
6122
+ /* Is currently transcribing audio */
6123
+ isTranscribing?: () => Data
6124
+ /* Current transcription results */
6125
+ results?: () => Data
6126
+ /* Current transcription result text */
6127
+ resultText?: () => Data
6128
+ /* Current statistics */
6129
+ statistics?: () => Data
6130
+ /* Latest transcribe event */
6131
+ lastTranscribeEvent?: () => Data
6132
+ /* Latest VAD event */
6133
+ lastVadEvent?: () => Data
6134
+ /* Audio output file path (auto-generated when saving audio) */
6135
+ audioOutputPath?: () => Data
6136
+ }
6137
+ }
6138
+
6139
+ /* Realtime speech-to-text transcription using Whisper and VAD with live audio streaming */
6140
+ export type GeneratorRealtimeTranscription = Generator &
6141
+ GeneratorRealtimeTranscriptionDef & {
6142
+ templateKey: 'GENERATOR_REALTIME_TRANSCRIPTION'
6143
+ switches: Array<
6144
+ SwitchDef &
6145
+ GeneratorRealtimeTranscriptionDef & {
6146
+ conds?: Array<{
6147
+ method: '==' | '!=' | '>' | '<' | '>=' | '<='
6148
+ cond:
6149
+ | SwitchCondInnerStateCurrentCanvas
6150
+ | SwitchCondData
6151
+ | {
6152
+ __typename: 'SwitchCondInnerStateOutlet'
6153
+ outlet:
6154
+ | 'isActive'
6155
+ | 'isTranscribing'
6156
+ | 'results'
6157
+ | 'resultText'
6158
+ | 'statistics'
6159
+ | 'lastTranscribeEvent'
6160
+ | 'lastVadEvent'
6161
+ | 'audioOutputPath'
6162
+ value: any
6163
+ }
6164
+ }>
6165
+ }
6166
+ >
6167
+ }
6168
+
5912
6169
  /* Load the model */
5913
6170
  export type GeneratorLLMActionLoadModel = Action & {
5914
6171
  __actionName: 'GENERATOR_LLM_LOAD_MODEL'
@@ -6492,7 +6749,7 @@ Default property:
6492
6749
  completionIgnoreEOS?: boolean | DataLink
6493
6750
  }
6494
6751
  events?: {
6495
- /* Event triggered when load is done */
6752
+ /* Event triggered when context state changes */
6496
6753
  onContextStateChange?: Array<EventAction>
6497
6754
  /* Event triggered when error occurs */
6498
6755
  onError?: Array<EventAction>
@@ -6731,6 +6988,149 @@ export type GeneratorGGMLTTS = Generator &
6731
6988
  >
6732
6989
  }
6733
6990
 
6991
+ /* Load the model */
6992
+ export type GeneratorRerankerActionLoadModel = Action & {
6993
+ __actionName: 'GENERATOR_RERANKER_LOAD_MODEL'
6994
+ }
6995
+
6996
+ /* Rerank documents based on query relevance */
6997
+ export type GeneratorRerankerActionRerank = ActionWithParams & {
6998
+ __actionName: 'GENERATOR_RERANKER_RERANK'
6999
+ params?: Array<
7000
+ | {
7001
+ input: 'query'
7002
+ value?: string | DataLink | EventProperty
7003
+ mapping?: string
7004
+ }
7005
+ | {
7006
+ input: 'documents'
7007
+ value?: Array<any> | DataLink | EventProperty
7008
+ mapping?: string
7009
+ }
7010
+ >
7011
+ }
7012
+
7013
+ /* Release context */
7014
+ export type GeneratorRerankerActionReleaseContext = Action & {
7015
+ __actionName: 'GENERATOR_RERANKER_RELEASE_CONTEXT'
7016
+ }
7017
+
7018
+ interface GeneratorRerankerDef {
7019
+ /*
7020
+ Default property:
7021
+ {
7022
+ "init": false,
7023
+ "contextSize": 512,
7024
+ "batchSize": 512,
7025
+ "uBatchSize": 512,
7026
+ "accelVariant": "default",
7027
+ "mainGpu": 0,
7028
+ "gpuLayers": 0,
7029
+ "useMlock": true,
7030
+ "useMmap": true,
7031
+ "normalize": 1
7032
+ }
7033
+ */
7034
+ property?: {
7035
+ /* Initialize the Reranker context on generator initialization */
7036
+ init?: boolean | DataLink
7037
+ /* The URL or path of reranker model (GGUF format) */
7038
+ modelUrl?: string | DataLink
7039
+ /* Hash of reranker model */
7040
+ modelHash?: string | DataLink
7041
+ /* Hash type of reranker model */
7042
+ modelHashType?: 'md5' | 'sha256' | 'sha1' | DataLink
7043
+ /* Context size (0 ~ 4096) (Default to 512) */
7044
+ contextSize?: number | DataLink
7045
+ /* Logical batch size for processing (default: 512) */
7046
+ batchSize?: number | DataLink
7047
+ /* Physical maximum batch size (default: 512) */
7048
+ uBatchSize?: number | DataLink
7049
+ /* Accelerator variant (default: default) */
7050
+ accelVariant?:
7051
+ | 'default'
7052
+ | 'avx'
7053
+ | 'avx2'
7054
+ | 'avx512'
7055
+ | 'metal'
7056
+ | 'opencl'
7057
+ | 'vulkan'
7058
+ | 'cuda'
7059
+ | 'rocm'
7060
+ | DataLink
7061
+ /* Main GPU index (default: 0) */
7062
+ mainGpu?: number | DataLink
7063
+ /* Number of layers to store in VRAM (default: 0) */
7064
+ gpuLayers?: number | DataLink
7065
+ /* Maximum number of threads to use (default: auto) */
7066
+ maxThreads?: number | DataLink
7067
+ /* Use mlock to keep model in memory (default: true) */
7068
+ useMlock?: boolean | DataLink
7069
+ /* Use mmap for model loading (default: true) */
7070
+ useMmap?: boolean | DataLink
7071
+ /* Query text for reranking */
7072
+ query?: string | DataLink
7073
+ /* Array of documents to rerank */
7074
+ documents?: Array<string | DataLink> | DataLink
7075
+ /* Normalize reranking scores (default: from model config) */
7076
+ normalize?: number | DataLink | boolean | DataLink | DataLink
7077
+ /* Maximum number of documents to return (default: unlimited) */
7078
+ topK?: number | DataLink
7079
+ }
7080
+ events?: {
7081
+ /* Event triggered when the reranker context state changes (loading, ready, error, released) */
7082
+ onContextStateChange?: Array<EventAction>
7083
+ /* Event triggered when an error occurs during reranker operations */
7084
+ onError?: Array<EventAction>
7085
+ }
7086
+ outlets?: {
7087
+ /* Current state of the reranker context (loading, ready, error, released) */
7088
+ contextState?: () => Data
7089
+ /* Loading progress of the reranker model (0-100) */
7090
+ contextLoadProgress?: () => Data
7091
+ /* Detailed information about the reranker context including instance ID and processing status */
7092
+ contextDetails?: () => Data
7093
+ /* Result of the reranking operation containing scored and ranked documents */
7094
+ rerankResult?: () => Data
7095
+ /* Boolean indicating whether the reranker is currently processing a request */
7096
+ isProcessing?: () => Data
7097
+ }
7098
+ }
7099
+
7100
+ /* Local rerank based on GGML and [llama.cpp](https://github.com/ggerganov/llama.cpp)
7101
+
7102
+ ## Notice
7103
+ - The device RAM must be larger than 8GB
7104
+ - iOS: Supported GPU acceleration, recommended use M1+ / A17+ chip device
7105
+ - macOS: Supported GPU acceleration, recommended use M1+ chip device
7106
+ - Android: Currently not supported GPU acceleration (Coming soon), recommended use Android 13+ system
7107
+ - Linux / Windows: Supported GPU acceleration, currently only Vulkan backend available */
7108
+ export type GeneratorReranker = Generator &
7109
+ GeneratorRerankerDef & {
7110
+ templateKey: 'GENERATOR_RERANKER'
7111
+ switches: Array<
7112
+ SwitchDef &
7113
+ GeneratorRerankerDef & {
7114
+ conds?: Array<{
7115
+ method: '==' | '!=' | '>' | '<' | '>=' | '<='
7116
+ cond:
7117
+ | SwitchCondInnerStateCurrentCanvas
7118
+ | SwitchCondData
7119
+ | {
7120
+ __typename: 'SwitchCondInnerStateOutlet'
7121
+ outlet:
7122
+ | 'contextState'
7123
+ | 'contextLoadProgress'
7124
+ | 'contextDetails'
7125
+ | 'rerankResult'
7126
+ | 'isProcessing'
7127
+ value: any
7128
+ }
7129
+ }>
7130
+ }
7131
+ >
7132
+ }
7133
+
6734
7134
  /* Load the model */
6735
7135
  export type GeneratorQnnLlmActionLoadModel = Action & {
6736
7136
  __actionName: 'GENERATOR_QNN_LLM_LOAD_MODEL'
@@ -6858,7 +7258,7 @@ Default property:
6858
7258
  greedy?: boolean | DataLink
6859
7259
  }
6860
7260
  events?: {
6861
- /* Event triggered when load is done */
7261
+ /* Event triggered when context state changes */
6862
7262
  onContextStateChange?: Array<EventAction>
6863
7263
  /* Event triggered when generate is done */
6864
7264
  onGenerate?: Array<EventAction>
@@ -761,6 +761,46 @@ export const templateEventPropsMap = {
761
761
  'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_TIME', // type: number
762
762
  ],
763
763
  },
764
+ GENERATOR_VAD_INFERENCE: {
765
+ onContextStateChange: [
766
+ 'GENERATOR_VAD_INFERENCE_CONTEXT_STATE', // type: string
767
+ 'GENERATOR_VAD_INFERENCE_CONTEXT_DETAILS', // type: object
768
+ ],
769
+ onError: [
770
+ 'GENERATOR_VAD_INFERENCE_ERROR', // type: string
771
+ ],
772
+ onDetected: [
773
+ 'GENERATOR_VAD_INFERENCE_DETECTION_SEGMENTS', // type: array
774
+ 'GENERATOR_VAD_INFERENCE_DETECTION_TIME', // type: number
775
+ ],
776
+ },
777
+ GENERATOR_REALTIME_TRANSCRIPTION: {
778
+ onTranscribe: [
779
+ 'GENERATOR_REALTIME_TRANSCRIPTION_TRANSCRIBE_EVENT', // type: object
780
+ ],
781
+ onVad: [
782
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT', // type: object
783
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT_TYPE', // type: string
784
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT_CONFIDENCE', // type: number
785
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT_DURATION', // type: number
786
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT_SLICE_INDEX', // type: number
787
+ ],
788
+ onError: [
789
+ 'GENERATOR_REALTIME_TRANSCRIPTION_ERROR', // type: string
790
+ ],
791
+ onStatusChange: [
792
+ 'GENERATOR_REALTIME_TRANSCRIPTION_IS_ACTIVE', // type: bool
793
+ ],
794
+ onStatsUpdate: [
795
+ 'GENERATOR_REALTIME_TRANSCRIPTION_STATS_TYPE', // type: string
796
+ 'GENERATOR_REALTIME_TRANSCRIPTION_STATS_TIMESTAMP', // type: number
797
+ 'GENERATOR_REALTIME_TRANSCRIPTION_STATS', // type: object
798
+ ],
799
+ onEnd: [
800
+ 'GENERATOR_REALTIME_TRANSCRIPTION_END_RESULTS', // type: array
801
+ 'GENERATOR_REALTIME_TRANSCRIPTION_END_AUDIO_OUTPUT_PATH', // type: string
802
+ ],
803
+ },
764
804
  GENERATOR_LLM: {
765
805
  onContextStateChange: [
766
806
  'GENERATOR_LLM_CONTEXT_STATE', // type: string
@@ -799,6 +839,7 @@ export const templateEventPropsMap = {
799
839
  'GENERATOR_GGML_TTS_ERROR', // type: string
800
840
  ],
801
841
  },
842
+ GENERATOR_RERANKER: {},
802
843
  GENERATOR_QNN_LLM: {
803
844
  onContextStateChange: [
804
845
  'GENERATOR_QNN_LLM_CONTEXT_STATE', // type: string