@fugood/bricks-project 2.22.0-beta.16 → 2.22.0-beta.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -630,6 +630,7 @@ export const templateActionNameMap = {
630
630
  samplesOverlap: 'GENERATOR_VAD_INFERENCE_SAMPLES_OVERLAP',
631
631
  },
632
632
  },
633
+
633
634
  GENERATOR_LLM: {
634
635
  GENERATOR_LLM_TOKENIZE: {
635
636
  mode: 'GENERATOR_LLM_MODE',
package/package.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "@fugood/bricks-project",
3
- "version": "2.22.0-beta.16",
3
+ "version": "2.22.0-beta.17",
4
4
  "main": "index.ts",
5
5
  "scripts": {
6
6
  "build": "node scripts/build.js"
7
7
  },
8
8
  "dependencies": {
9
- "@modelcontextprotocol/sdk": "^1.11.1",
9
+ "@modelcontextprotocol/sdk": "^1.15.0",
10
10
  "@types/escodegen": "^0.0.10",
11
11
  "@types/lodash": "^4.17.12",
12
12
  "acorn": "^8.13.0",
@@ -14,5 +14,5 @@
14
14
  "lodash": "^4.17.4",
15
15
  "uuid": "^8.3.1"
16
16
  },
17
- "gitHead": "d8231fa6787b8d95bc2bec94a86ceba596cf57f0"
17
+ "gitHead": "5e8893af505fa5a7b1a902e11764c1f9ebb36a4f"
18
18
  }
package/types/common.ts CHANGED
@@ -39,7 +39,7 @@ export type SubpsaceAction = string
39
39
  export type Action = {
40
40
  __actionName: string
41
41
  parent: 'Brick' | 'Generator' | 'Subspace' | 'System'
42
- name: string
42
+ name?: string
43
43
  }
44
44
 
45
45
  // Find correct key in bricks-project/utils/event-props for EventAction
@@ -4868,39 +4868,9 @@ Default property:
4868
4868
  init?: boolean | DataLink
4869
4869
  /* TTS model
4870
4870
  The mms-tts models are licensed under CC-BY-NC-4.0 */
4871
- model?:
4872
- | 'Custom'
4873
- | 'BricksDisplay/vits-eng'
4874
- | 'BricksDisplay/vits-cmn'
4875
- | 'BricksDisplay/ellie-Bert-VITS2'
4876
- | 'onnx-community/Kokoro-82M-v1.0-ONNX'
4877
- | 'onnx-community/Kokoro-82M-v1.1-zh-ONNX'
4878
- | 'onnx-community/OuteTTS-1.0-0.6B-ONNX'
4879
- | 'mms-tts-ara (NC)'
4880
- | 'mms-tts-deu (NC)'
4881
- | 'mms-tts-eng (NC)'
4882
- | 'mms-tts-fra (NC)'
4883
- | 'mms-tts-hin (NC)'
4884
- | 'mms-tts-kor (NC)'
4885
- | 'mms-tts-por (NC)'
4886
- | 'mms-tts-ron (NC)'
4887
- | 'mms-tts-rus (NC)'
4888
- | 'mms-tts-spa (NC)'
4889
- | 'mms-tts-vie (NC)'
4890
- | 'mms-tts-yor (NC)'
4891
- | 'speecht5_tts'
4892
- | DataLink
4871
+ model?: string | DataLink
4893
4872
  /* Model type */
4894
- modelType?:
4895
- | 'auto'
4896
- | 'vits'
4897
- | 'bert_vits2'
4898
- | 'style_text_to_speech_2'
4899
- | 'speecht5'
4900
- | 'outetts-1.0'
4901
- | DataLink
4902
- /* Load quantized model (deprecated, use `quantizeType` instead) */
4903
- quantized?: boolean | DataLink
4873
+ modelType?: string | DataLink
4904
4874
  /* Quantize type */
4905
4875
  quantizeType?:
4906
4876
  | 'auto'
@@ -4913,9 +4883,6 @@ Default property:
4913
4883
  | 'bnb4'
4914
4884
  | 'q4f16'
4915
4885
  | DataLink
4916
- /* Custom model name
4917
- Choose model from https://huggingface.co/models?pipeline_tag=text-to-audio&library=transformers.js */
4918
- customModel?: string | DataLink
4919
4886
  /* Vocoder model for SpeechT5 */
4920
4887
  vocoderModel?: 'Custom' | 'speecht5_hifigan' | DataLink
4921
4888
  /* Custom vocoder model
@@ -4923,8 +4890,10 @@ Default property:
4923
4890
  customVocoderModel?: string | DataLink
4924
4891
  /* Speaker embedding, for SpeechT5 or StyleTTS (Kokoro) */
4925
4892
  speakerEmbedUrl?: string | DataLink
4926
- /* MD5 checksum of `speakerEmbedUrl` */
4927
- speakerEmbedMd5?: string | DataLink
4893
+ /* Hash of `speakerEmbedUrl` */
4894
+ speakerEmbedHash?: string | DataLink
4895
+ /* Hash type of `speakerEmbedUrl` */
4896
+ speakerEmbedHashType?: 'md5' | 'sha256' | 'sha1' | DataLink
4928
4897
  /* Speaker config, for OuteTTS model */
4929
4898
  speakerConfig?: {} | DataLink
4930
4899
  /* Audio token generation max length */
@@ -5054,7 +5023,6 @@ interface GeneratorOnnxLLMDef {
5054
5023
  /*
5055
5024
  Default property:
5056
5025
  {
5057
- "model": "BricksDisplay/phi-1_5-q4",
5058
5026
  "modelType": "auto",
5059
5027
  "toolCallParser": "llama3_json",
5060
5028
  "toolChoice": "auto",
@@ -5073,70 +5041,9 @@ Default property:
5073
5041
  /* Initialize the TTS context on generator initialization */
5074
5042
  init?: boolean | DataLink
5075
5043
  /* LLM model */
5076
- model?:
5077
- | 'Custom'
5078
- | 'onnx-community/gemma-3-1b-it-ONNX'
5079
- | 'BricksDisplay/phi-1_5'
5080
- | 'BricksDisplay/phi-1_5-q4'
5081
- | 'onnx-community/Phi-3.5-vision-instruct'
5082
- | 'onnx-community/Phi-3-vision-128k-instruct'
5083
- | 'onnx-community/Phi-4-mini-instruct-ONNX-MHA'
5084
- | 'onnx-community/Qwen2.5-0.5B'
5085
- | 'onnx-community/Qwen2.5-0.5B-Instruct'
5086
- | 'onnx-community/Qwen2.5-1.5B'
5087
- | 'onnx-community/Qwen2.5-1.5B-Instruct'
5088
- | 'onnx-community/Qwen2-VL-2B-Instruct'
5089
- | 'stablelm-2-1_6b'
5090
- | 'BricksDisplay/stablelm-2-1_6b-q4'
5091
- | 'stablelm-2-zephyr-1_6b'
5092
- | 'BricksDisplay/stablelm-2-zephyr-1_6b-q4'
5093
- | 'BricksDisplay/Llama-2-7b-chat-q4'
5094
- | 'TinyLLama-v0'
5095
- | 'TinyLlama-1.1B-Chat-v1.0'
5096
- | 'BricksDisplay/TinyLlama-1.1B-Chat-v1.0-q4'
5097
- | 'llama-160m'
5098
- | 'llama-68m'
5099
- | 'BricksDisplay/Yi-6B-q4'
5100
- | 'BricksDisplay/Yi-6B-Chat-q4'
5101
- | 'BricksDisplay/Mistral-7B-v0.1-q4'
5102
- | 'BricksDisplay/Mistral-7B-Instruct-v0.2-q4'
5103
- | 'BricksDisplay/Breeze-7B-Base-v1_0-q4'
5104
- | 'BricksDisplay/Breeze-7B-Instruct-v1_0-q4'
5105
- | 'gpt2'
5106
- | 'distilgpt2'
5107
- | 'gpt-neo-125M'
5108
- | 'opt-125m'
5109
- | 'opt-350m'
5110
- | 'bloom-560m'
5111
- | 'bloomz-560m'
5112
- | 't5-small'
5113
- | 't5-base'
5114
- | 'flan-t5-small'
5115
- | 'flan-t5-base'
5116
- | 'mt5-small'
5117
- | 'mt5-base'
5118
- | 'long-t5-lobal-base'
5119
- | 'long-t5-tglobal-base'
5120
- | DataLink
5044
+ model?: string | DataLink
5121
5045
  /* Model type */
5122
- modelType?:
5123
- | 'auto'
5124
- | 'text-generation'
5125
- | 'qwen2-vl'
5126
- | 'paligemma'
5127
- | 'llava'
5128
- | 'llava_onevision'
5129
- | 'moondream1'
5130
- | 'florence2'
5131
- | 'idefics3'
5132
- | 'smolvlm'
5133
- | 'phi3_v'
5134
- | 't5'
5135
- | 'mt5'
5136
- | 'longt5'
5137
- | DataLink
5138
- /* Load quantized model (deprecated, use `quantizeType` instead) */
5139
- quantized?: boolean | DataLink
5046
+ modelType?: string | DataLink
5140
5047
  /* Quantize type */
5141
5048
  quantizeType?:
5142
5049
  | 'auto'
@@ -5149,10 +5056,6 @@ Default property:
5149
5056
  | 'bnb4'
5150
5057
  | 'q4f16'
5151
5058
  | DataLink
5152
- /* Custom model name
5153
- Choose model from https://huggingface.co/models?pipeline_tag=text2text-generation&library=transformers.js
5154
- or https://huggingface.co/models?pipeline_tag=text-generation&library=transformers.js&sort=trending */
5155
- customModel?: string | DataLink
5156
5059
  /* Prompt to inference */
5157
5060
  prompt?: string | DataLink
5158
5061
  /* Messages to inference */
@@ -5282,27 +5185,9 @@ Default property:
5282
5185
  /* Initialize the TTS context on generator initialization */
5283
5186
  init?: boolean | DataLink
5284
5187
  /* STT model */
5285
- model?:
5286
- | 'Custom'
5287
- | 'whisper-tiny'
5288
- | 'whisper-tiny.en'
5289
- | 'whisper-small'
5290
- | 'whisper-small.en'
5291
- | 'whisper-base'
5292
- | 'whisper-base.en'
5293
- | 'whisper-medium'
5294
- | 'whisper-medium.en'
5295
- | 'whisper-large'
5296
- | 'whisper-large-v2'
5297
- | 'whisper-large-v3'
5298
- | 'mms-1b-all'
5299
- | 'mms-1b-fl102'
5300
- | 'mms-1b-l1107'
5301
- | DataLink
5188
+ model?: string | DataLink
5302
5189
  /* Model type */
5303
- modelType?: 'auto' | 'whisper' | 'hubert' | 'wav2vec2' | 'wav2vec2-bert' | DataLink
5304
- /* Load quantized model (deprecated, use `quantizeType` instead) */
5305
- quantized?: boolean | DataLink
5190
+ modelType?: string | DataLink
5306
5191
  /* Quantize type */
5307
5192
  quantizeType?:
5308
5193
  | 'auto'
@@ -5315,9 +5200,6 @@ Default property:
5315
5200
  | 'bnb4'
5316
5201
  | 'q4f16'
5317
5202
  | DataLink
5318
- /* Custom model name
5319
- Choose model from https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=transformers.js */
5320
- customModel?: string | DataLink
5321
5203
  /* Return timestamps */
5322
5204
  returnTimestamps?: 'none' | 'enable' | 'word' | DataLink
5323
5205
  /* Transcription language
@@ -5550,7 +5432,7 @@ export type GeneratorSpeechInferenceActionTranscribeData = ActionWithParams & {
5550
5432
  >
5551
5433
  }
5552
5434
 
5553
- /* Transcribe microphone audio source */
5435
+ /* [Deprecated] Transcribe microphone audio source */
5554
5436
  export type GeneratorSpeechInferenceActionTranscribeRealtime = ActionWithParams & {
5555
5437
  __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME'
5556
5438
  params?: Array<
@@ -5617,7 +5499,7 @@ export type GeneratorSpeechInferenceActionTranscribeRealtime = ActionWithParams
5617
5499
  >
5618
5500
  }
5619
5501
 
5620
- /* Stop transcribing microphone audio source */
5502
+ /* [Deprecated] Stop transcribing microphone audio source */
5621
5503
  export type GeneratorSpeechInferenceActionTranscribeRealtimeStop = Action & {
5622
5504
  __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME_STOP'
5623
5505
  }
@@ -5865,7 +5747,7 @@ Default property:
5865
5747
  inferRealtimeVadFreqThold?: number | DataLink
5866
5748
  }
5867
5749
  events?: {
5868
- /* Event triggered when load is done */
5750
+ /* Event triggered when context state changes */
5869
5751
  onContextStateChange?: Array<EventAction>
5870
5752
  /* Event triggered when error occurs */
5871
5753
  onError?: Array<EventAction>
@@ -6074,7 +5956,7 @@ Default property:
6074
5956
  detectFileMd5?: string | DataLink
6075
5957
  }
6076
5958
  events?: {
6077
- /* Event triggered when load is done */
5959
+ /* Event triggered when context state changes */
6078
5960
  onContextStateChange?: Array<EventAction>
6079
5961
  /* Event triggered when error occurs */
6080
5962
  onError?: Array<EventAction>
@@ -6122,6 +6004,168 @@ export type GeneratorVadInference = Generator &
6122
6004
  >
6123
6005
  }
6124
6006
 
6007
+ /* Start realtime transcription */
6008
+ export type GeneratorRealtimeTranscriptionActionStart = Action & {
6009
+ __actionName: 'GENERATOR_REALTIME_TRANSCRIPTION_START'
6010
+ }
6011
+
6012
+ /* Stop realtime transcription */
6013
+ export type GeneratorRealtimeTranscriptionActionStop = Action & {
6014
+ __actionName: 'GENERATOR_REALTIME_TRANSCRIPTION_STOP'
6015
+ }
6016
+
6017
+ /* Force move to next slice */
6018
+ export type GeneratorRealtimeTranscriptionActionNextSlice = Action & {
6019
+ __actionName: 'GENERATOR_REALTIME_TRANSCRIPTION_NEXT_SLICE'
6020
+ }
6021
+
6022
+ /* Reset transcriber state */
6023
+ export type GeneratorRealtimeTranscriptionActionReset = Action & {
6024
+ __actionName: 'GENERATOR_REALTIME_TRANSCRIPTION_RESET'
6025
+ }
6026
+
6027
+ interface GeneratorRealtimeTranscriptionDef {
6028
+ /*
6029
+ Default property:
6030
+ {
6031
+ "sttLivePolicy": "only-in-use",
6032
+ "vadInferenceLivePolicy": "only-in-use",
6033
+ "vadEnabled": true,
6034
+ "audioSliceSec": 30,
6035
+ "audioMinSec": 1,
6036
+ "maxSlicesInMemory": 5,
6037
+ "vadStrategy": "use-preset",
6038
+ "vadPreset": "default",
6039
+ "autoSliceOnSpeechEnd": true,
6040
+ "autoSliceThreshold": 2,
6041
+ "initialPrompt": "",
6042
+ "promptPreviousSlices": false,
6043
+ "saveAudio": true,
6044
+ "testMode": false,
6045
+ "testPlaybackSpeed": 1,
6046
+ "testChunkDurationMs": 100,
6047
+ "testLoop": false
6048
+ }
6049
+ */
6050
+ property?: {
6051
+ /* STT Generator for Whisper context */
6052
+ sttGeneratorId?: string | DataLink
6053
+ /* STT Live Policy. If the policy is `only-in-use`, the STT context will be released when not in use. */
6054
+ sttLivePolicy?: 'only-in-use' | 'manual' | DataLink
6055
+ /* VAD Inference Generator for voice activity detection */
6056
+ vadInferenceGeneratorId?: string | DataLink
6057
+ /* VAD Inference Live Policy. If the policy is `only-in-use`, the VAD Inference context will be released when not in use. */
6058
+ vadInferenceLivePolicy?: 'only-in-use' | 'manual' | DataLink
6059
+ /* Enable VAD (Voice Activity Detection) */
6060
+ vadEnabled?: boolean | DataLink
6061
+ /* Audio slice duration in seconds */
6062
+ audioSliceSec?: number | DataLink
6063
+ /* Minimum audio duration to start transcription in seconds */
6064
+ audioMinSec?: number | DataLink
6065
+ /* Maximum number of slices to keep in memory */
6066
+ maxSlicesInMemory?: number | DataLink
6067
+ /* VAD Strategy */
6068
+ vadStrategy?: 'use-preset' | 'use-generator-options' | DataLink
6069
+ /* VAD preset configuration */
6070
+ vadPreset?:
6071
+ | 'default'
6072
+ | 'sensitive'
6073
+ | 'very-sensitive'
6074
+ | 'conservative'
6075
+ | 'very-conservative'
6076
+ | 'continuous-speech'
6077
+ | 'meeting'
6078
+ | 'noisy-environment'
6079
+ | DataLink
6080
+ /* Auto slice on speech end */
6081
+ autoSliceOnSpeechEnd?: boolean | DataLink
6082
+ /* Auto slice threshold in seconds */
6083
+ autoSliceThreshold?: number | DataLink
6084
+ /* Initial prompt for transcription */
6085
+ initialPrompt?: string | DataLink
6086
+ /* Include previous slices in prompt */
6087
+ promptPreviousSlices?: boolean | DataLink
6088
+ /* Enable audio output saving (auto-generates file path) */
6089
+ saveAudio?: boolean | DataLink
6090
+ /* Use test mode with file simulation */
6091
+ testMode?: boolean | DataLink
6092
+ /* Test audio file path for simulation */
6093
+ testFilePath?: string | DataLink
6094
+ /* Test audio file hash */
6095
+ testFileHash?: string | DataLink
6096
+ /* Test audio file hash type */
6097
+ testFileHashType?: string | DataLink
6098
+ /* Test playback speed */
6099
+ testPlaybackSpeed?: number | DataLink
6100
+ /* Test chunk duration in milliseconds */
6101
+ testChunkDurationMs?: number | DataLink
6102
+ /* Loop test audio file */
6103
+ testLoop?: boolean | DataLink
6104
+ }
6105
+ events?: {
6106
+ /* Event triggered when transcription starts, processes, or ends */
6107
+ onTranscribe?: Array<EventAction>
6108
+ /* Event triggered on VAD (Voice Activity Detection) events */
6109
+ onVad?: Array<EventAction>
6110
+ /* Event triggered when error occurs */
6111
+ onError?: Array<EventAction>
6112
+ /* Event triggered when status changes */
6113
+ onStatusChange?: Array<EventAction>
6114
+ /* Event triggered when statistics update */
6115
+ onStatsUpdate?: Array<EventAction>
6116
+ /* Event triggered when transcription ends */
6117
+ onEnd?: Array<EventAction>
6118
+ }
6119
+ outlets?: {
6120
+ /* Is realtime transcription currently active */
6121
+ isActive?: () => Data
6122
+ /* Is currently transcribing audio */
6123
+ isTranscribing?: () => Data
6124
+ /* Current transcription results */
6125
+ results?: () => Data
6126
+ /* Current transcription result text */
6127
+ resultText?: () => Data
6128
+ /* Current statistics */
6129
+ statistics?: () => Data
6130
+ /* Latest transcribe event */
6131
+ lastTranscribeEvent?: () => Data
6132
+ /* Latest VAD event */
6133
+ lastVadEvent?: () => Data
6134
+ /* Audio output file path (auto-generated when saving audio) */
6135
+ audioOutputPath?: () => Data
6136
+ }
6137
+ }
6138
+
6139
+ /* Realtime speech-to-text transcription using Whisper and VAD with live audio streaming */
6140
+ export type GeneratorRealtimeTranscription = Generator &
6141
+ GeneratorRealtimeTranscriptionDef & {
6142
+ templateKey: 'GENERATOR_REALTIME_TRANSCRIPTION'
6143
+ switches: Array<
6144
+ SwitchDef &
6145
+ GeneratorRealtimeTranscriptionDef & {
6146
+ conds?: Array<{
6147
+ method: '==' | '!=' | '>' | '<' | '>=' | '<='
6148
+ cond:
6149
+ | SwitchCondInnerStateCurrentCanvas
6150
+ | SwitchCondData
6151
+ | {
6152
+ __typename: 'SwitchCondInnerStateOutlet'
6153
+ outlet:
6154
+ | 'isActive'
6155
+ | 'isTranscribing'
6156
+ | 'results'
6157
+ | 'resultText'
6158
+ | 'statistics'
6159
+ | 'lastTranscribeEvent'
6160
+ | 'lastVadEvent'
6161
+ | 'audioOutputPath'
6162
+ value: any
6163
+ }
6164
+ }>
6165
+ }
6166
+ >
6167
+ }
6168
+
6125
6169
  /* Load the model */
6126
6170
  export type GeneratorLLMActionLoadModel = Action & {
6127
6171
  __actionName: 'GENERATOR_LLM_LOAD_MODEL'
@@ -6705,7 +6749,7 @@ Default property:
6705
6749
  completionIgnoreEOS?: boolean | DataLink
6706
6750
  }
6707
6751
  events?: {
6708
- /* Event triggered when load is done */
6752
+ /* Event triggered when context state changes */
6709
6753
  onContextStateChange?: Array<EventAction>
6710
6754
  /* Event triggered when error occurs */
6711
6755
  onError?: Array<EventAction>
@@ -7214,7 +7258,7 @@ Default property:
7214
7258
  greedy?: boolean | DataLink
7215
7259
  }
7216
7260
  events?: {
7217
- /* Event triggered when load is done */
7261
+ /* Event triggered when context state changes */
7218
7262
  onContextStateChange?: Array<EventAction>
7219
7263
  /* Event triggered when generate is done */
7220
7264
  onGenerate?: Array<EventAction>
@@ -774,6 +774,33 @@ export const templateEventPropsMap = {
774
774
  'GENERATOR_VAD_INFERENCE_DETECTION_TIME', // type: number
775
775
  ],
776
776
  },
777
+ GENERATOR_REALTIME_TRANSCRIPTION: {
778
+ onTranscribe: [
779
+ 'GENERATOR_REALTIME_TRANSCRIPTION_TRANSCRIBE_EVENT', // type: object
780
+ ],
781
+ onVad: [
782
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT', // type: object
783
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT_TYPE', // type: string
784
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT_CONFIDENCE', // type: number
785
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT_DURATION', // type: number
786
+ 'GENERATOR_REALTIME_TRANSCRIPTION_VAD_EVENT_SLICE_INDEX', // type: number
787
+ ],
788
+ onError: [
789
+ 'GENERATOR_REALTIME_TRANSCRIPTION_ERROR', // type: string
790
+ ],
791
+ onStatusChange: [
792
+ 'GENERATOR_REALTIME_TRANSCRIPTION_IS_ACTIVE', // type: bool
793
+ ],
794
+ onStatsUpdate: [
795
+ 'GENERATOR_REALTIME_TRANSCRIPTION_STATS_TYPE', // type: string
796
+ 'GENERATOR_REALTIME_TRANSCRIPTION_STATS_TIMESTAMP', // type: number
797
+ 'GENERATOR_REALTIME_TRANSCRIPTION_STATS', // type: object
798
+ ],
799
+ onEnd: [
800
+ 'GENERATOR_REALTIME_TRANSCRIPTION_END_RESULTS', // type: array
801
+ 'GENERATOR_REALTIME_TRANSCRIPTION_END_AUDIO_OUTPUT_PATH', // type: string
802
+ ],
803
+ },
777
804
  GENERATOR_LLM: {
778
805
  onContextStateChange: [
779
806
  'GENERATOR_LLM_CONTEXT_STATE', // type: string