@fugood/bricks-project 2.24.0-beta.0 → 2.24.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -601,6 +601,14 @@ export const templateActionNameMap = {
601
601
  variables: 'GENERATOR_MCP_VARIABLES',
602
602
  },
603
603
  },
604
+ GENERATOR_TRADITIONAL_VAD: {
605
+ GENERATOR_TRADITIONAL_VAD_DETECT_FILE: {
606
+ fileUrl: 'GENERATOR_TRADITIONAL_VAD_FILE_URL',
607
+ },
608
+ GENERATOR_TRADITIONAL_VAD_DETECT_DATA: {
609
+ data: 'GENERATOR_TRADITIONAL_VAD_DATA',
610
+ },
611
+ },
604
612
  GENERATOR_TTS: {
605
613
  GENERATOR_TTS_GENERATE: {
606
614
  text: 'GENERATOR_TTS_TEXT',
@@ -621,6 +629,24 @@ export const templateActionNameMap = {
621
629
  audioUri: 'GENERATOR_ONNX_STT_AUDIO_URI',
622
630
  },
623
631
  },
632
+ GENERATOR_ONNX_VAD: {
633
+ GENERATOR_ONNX_VAD_DETECT_FILE: {
634
+ fileUrl: 'GENERATOR_ONNX_VAD_FILE_URL',
635
+ threshold: 'GENERATOR_ONNX_VAD_THRESHOLD',
636
+ minSpeechDurationMs: 'GENERATOR_ONNX_VAD_MIN_SPEECH_DURATION_MS',
637
+ minSilenceDurationMs: 'GENERATOR_ONNX_VAD_MIN_SILENCE_DURATION_MS',
638
+ maxSpeechDurationS: 'GENERATOR_ONNX_VAD_MAX_SPEECH_DURATION_S',
639
+ speechPadMs: 'GENERATOR_ONNX_VAD_SPEECH_PAD_MS',
640
+ },
641
+ GENERATOR_ONNX_VAD_DETECT_DATA: {
642
+ data: 'GENERATOR_ONNX_VAD_DATA',
643
+ threshold: 'GENERATOR_ONNX_VAD_THRESHOLD',
644
+ minSpeechDurationMs: 'GENERATOR_ONNX_VAD_MIN_SPEECH_DURATION_MS',
645
+ minSilenceDurationMs: 'GENERATOR_ONNX_VAD_MIN_SILENCE_DURATION_MS',
646
+ maxSpeechDurationS: 'GENERATOR_ONNX_VAD_MAX_SPEECH_DURATION_S',
647
+ speechPadMs: 'GENERATOR_ONNX_VAD_SPEECH_PAD_MS',
648
+ },
649
+ },
624
650
  GENERATOR_SPEECH_INFERENCE: {
625
651
  GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_FILE: {
626
652
  fileUrl: 'GENERATOR_SPEECH_INFERENCE_FILE_URL',
@@ -636,20 +662,6 @@ export const templateActionNameMap = {
636
662
  language: 'GENERATOR_SPEECH_INFERENCE_LANGUAGE',
637
663
  translate: 'GENERATOR_SPEECH_INFERENCE_TRANSLATE',
638
664
  },
639
- GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME: {
640
- prompt: 'GENERATOR_SPEECH_INFERENCE_PROMPT',
641
- beamSize: 'GENERATOR_SPEECH_INFERENCE_BEAM_SIZE',
642
- language: 'GENERATOR_SPEECH_INFERENCE_LANGUAGE',
643
- translate: 'GENERATOR_SPEECH_INFERENCE_TRANSLATE',
644
- realtimeAudioSeconds: 'GENERATOR_SPEECH_INFERENCE_REALTIME_AUDIO_SECONDS',
645
- realtimeAudioSliceSeconds: 'GENERATOR_SPEECH_INFERENCE_REALTIME_AUDIO_SLICE_SECONDS',
646
- realtimeAudioMinSeconds: 'GENERATOR_SPEECH_INFERENCE_REALTIME_AUDIO_MIN_SECONDS',
647
- realtimeSaveAudio: 'GENERATOR_SPEECH_INFERENCE_REALTIME_SAVE_AUDIO',
648
- realtimeVadEnabled: 'GENERATOR_SPEECH_INFERENCE_REALTIME_VAD_ENABLED',
649
- realtimeVadMs: 'GENERATOR_SPEECH_INFERENCE_REALTIME_VAD_MS',
650
- realtimeVadThold: 'GENERATOR_SPEECH_INFERENCE_REALTIME_VAD_THOLD',
651
- realtimeVadFreqThold: 'GENERATOR_SPEECH_INFERENCE_REALTIME_VAD_FREQ_THOLD',
652
- },
653
665
  },
654
666
  GENERATOR_VAD_INFERENCE: {
655
667
  GENERATOR_VAD_INFERENCE_DETECT_FILE: {
package/compile/index.ts CHANGED
@@ -421,8 +421,9 @@ const compileAutomation = (automationMap: AutomationMap) =>
421
421
 
422
422
  export const compile = async (app: Application) => {
423
423
  await new Promise((resolve) => setImmediate(resolve, 0))
424
+ const timestamp = Date.now()
424
425
  const config = {
425
- title: app.name,
426
+ title: `${app.name || 'Unknown'}(${timestamp})`,
426
427
  subspace_map: app.subspaces.reduce((subspaceMap, subspace) => {
427
428
  subspaceMap[subspace.id] = {
428
429
  title: subspace.title,
@@ -969,6 +970,7 @@ export const compile = async (app: Application) => {
969
970
  automation_map: app.automationMap
970
971
  ? compileAutomation(app.automationMap)
971
972
  : app.metadata?.TEMP_automation_map || {},
973
+ update_timestamp: timestamp,
972
974
  }
973
975
  return config
974
976
  }
package/package.json CHANGED
@@ -1,13 +1,14 @@
1
1
  {
2
2
  "name": "@fugood/bricks-project",
3
- "version": "2.24.0-beta.0",
3
+ "version": "2.24.0-beta.2",
4
4
  "main": "index.ts",
5
5
  "scripts": {
6
6
  "build": "bun scripts/build.js"
7
7
  },
8
8
  "dependencies": {
9
- "@fugood/bricks-cli": "^2.23.0",
9
+ "@fugood/bricks-cli": "^2.24.0-beta.2",
10
10
  "@huggingface/gguf": "^0.3.2",
11
+ "@iarna/toml": "^3.0.0",
11
12
  "@modelcontextprotocol/sdk": "^1.15.0",
12
13
  "@toon-format/toon": "^2.1.0",
13
14
  "@types/escodegen": "^0.0.10",
@@ -18,5 +19,5 @@
18
19
  "lodash": "^4.17.4",
19
20
  "uuid": "^8.3.1"
20
21
  },
21
- "gitHead": "27bd4a30a4dd412ab73eab13c49265f73fd76e6c"
22
+ "gitHead": "f63649f1cb9ae4c3d0c4ac687152bfc1be6528bd"
22
23
  }
@@ -1,7 +1,7 @@
1
1
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
2
2
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
3
3
  import { z } from 'zod'
4
- import { $ } from 'bun'
4
+ import { $, JSON5 } from 'bun'
5
5
  import * as TOON from '@toon-format/toon'
6
6
  import Fuse from 'fuse.js'
7
7
  import { gguf } from '@huggingface/gguf'
@@ -729,7 +729,7 @@ server.tool(
729
729
 
730
730
  // Return JSON for huggingface_select to allow direct parsing by consumers
731
731
  return {
732
- content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
732
+ content: [{ type: 'text', text: JSON5.stringify(result, null, 2) }],
733
733
  }
734
734
  }
735
735
 
@@ -842,7 +842,7 @@ server.tool(
842
842
 
843
843
  // Return JSON for huggingface_select to allow direct parsing by consumers
844
844
  return {
845
- content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
845
+ content: [{ type: 'text', text: JSON5.stringify(result, null, 2) }],
846
846
  }
847
847
  } else {
848
848
  const result = {
@@ -875,7 +875,12 @@ server.tool(
875
875
 
876
876
  // Return JSON for huggingface_select to allow direct parsing by consumers
877
877
  return {
878
- content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
878
+ content: [
879
+ {
880
+ type: 'text',
881
+ text: `${JSON5.stringify(result, null, 2)} // Please use _hfRepoInfo undocumented field with @ts-ignore`,
882
+ },
883
+ ],
879
884
  }
880
885
  }
881
886
  } catch (err: any) {
@@ -1,5 +1,6 @@
1
1
  import { $ } from 'bun'
2
2
  import { stat, readFile, writeFile, readdir } from 'fs/promises'
3
+ import TOML from '@iarna/toml'
3
4
 
4
5
  const cwd = process.cwd()
5
6
 
@@ -63,9 +64,7 @@ if (hasClaudeCode || hasAgentsMd) {
63
64
  await handleMcpConfigOverride(mcpConfigPath)
64
65
  }
65
66
 
66
- if (hasClaudeCode) {
67
- // Install skills that don't already exist in the project
68
- const skillsDir = `${cwd}/.claude/skills`
67
+ const setupSkills = async (skillsDir) => {
69
68
  const packageSkillsDir = `${__dirname}/../skills`
70
69
 
71
70
  if (await exists(packageSkillsDir)) {
@@ -81,10 +80,53 @@ if (hasClaudeCode) {
81
80
  console.log(`Skill '${skill}' already exists, skipping`)
82
81
  } else {
83
82
  await $`cp -r ${packageSkillsDir}/${skill} ${targetSkillDir}`
84
- console.log(`Installed skill '${skill}' to .claude/skills/`)
83
+ console.log(`Installed skill '${skill}' to ${skillsDir}/`)
85
84
  }
86
85
  }),
87
86
  )
88
87
  }
89
- // TODO: .codex/skills, .cursor/skills if needed
90
88
  }
89
+
90
+ if (hasClaudeCode) {
91
+ // Install skills that don't already exist in the project
92
+ await setupSkills(`${cwd}/.claude/skills`)
93
+ }
94
+
95
+ if (hasAgentsMd) {
96
+ // Handle codex skills
97
+ // Currently no signal file for codex skills, so we just check if AGENTS.md exists
98
+ await setupSkills(`${cwd}/.codex/skills`)
99
+
100
+ const defaultCodexMcpConfig = {
101
+ mcp_servers: {
102
+ 'bricks-project': projectMcpServer,
103
+ },
104
+ }
105
+
106
+ const handleCodexMcpConfigOverride = async (mcpConfigPath: string) => {
107
+ let mcpConfig: { mcp_servers: Record<string, typeof projectMcpServer> } | null = null
108
+ if (await exists(mcpConfigPath)) {
109
+ const configStr = await readFile(mcpConfigPath, 'utf-8')
110
+ try {
111
+ mcpConfig = TOML.parse(configStr)
112
+ if (!mcpConfig?.mcp_servers) throw new Error('mcp_servers is not defined')
113
+ mcpConfig.mcp_servers['bricks-project'] = projectMcpServer
114
+ } catch (e) {
115
+ mcpConfig = defaultCodexMcpConfig
116
+ }
117
+ } else {
118
+ mcpConfig = defaultCodexMcpConfig
119
+ }
120
+
121
+ await writeFile(mcpConfigPath, `${TOML.stringify(mcpConfig, null, 2)}\n`)
122
+
123
+ console.log(`Updated ${mcpConfigPath}`)
124
+ }
125
+
126
+ // Setup MCP config (.codex/config.toml)
127
+ const codexConfigPath = `${cwd}/.codex/config.toml`
128
+ await handleCodexMcpConfigOverride(codexConfigPath)
129
+ }
130
+
131
+ // TODO: .cursor/skills if needed
132
+ // TODO: User setting in application.json to avoid unnecessary skills/config setup
@@ -41,10 +41,10 @@ Default property:
41
41
  "audioSliceSec": 30,
42
42
  "audioMinSec": 1,
43
43
  "maxSlicesInMemory": 5,
44
+ "transcribeProcessingPauseMs": 500,
45
+ "initTranscribeAfterMs": 500,
44
46
  "vadStrategy": "use-preset",
45
47
  "vadPreset": "default",
46
- "autoSliceOnSpeechEnd": true,
47
- "autoSliceThreshold": 2,
48
48
  "initialPrompt": "",
49
49
  "promptPreviousSlices": false,
50
50
  "saveAudio": true,
@@ -73,6 +73,10 @@ Default property:
73
73
  audioMinSec?: number | DataLink
74
74
  /* Maximum number of slices to keep in memory */
75
75
  maxSlicesInMemory?: number | DataLink
76
+ /* Transcribe processing interval in milliseconds */
77
+ transcribeProcessingPauseMs?: number | DataLink
78
+ /* Transcribe processing init after pause in milliseconds */
79
+ initTranscribeAfterMs?: number | DataLink
76
80
  /* VAD Strategy */
77
81
  vadStrategy?: 'use-preset' | 'use-generator-options' | DataLink
78
82
  /* VAD preset configuration */
@@ -86,10 +90,6 @@ Default property:
86
90
  | 'meeting'
87
91
  | 'noisy-environment'
88
92
  | DataLink
89
- /* Auto slice on speech end */
90
- autoSliceOnSpeechEnd?: boolean | DataLink
91
- /* Auto slice threshold in seconds */
92
- autoSliceThreshold?: number | DataLink
93
93
  /* Initial prompt for transcription */
94
94
  initialPrompt?: string | DataLink
95
95
  /* Include previous slices in prompt */
@@ -128,6 +128,8 @@ Default property:
128
128
  onStatusChange?: Array<EventAction>
129
129
  /* Event triggered when statistics update */
130
130
  onStatsUpdate?: Array<EventAction>
131
+ /* Event triggered when slice transcription is stabilized */
132
+ onStabilized?: Array<EventAction>
131
133
  /* Event triggered when transcription ends */
132
134
  onEnd?: Array<EventAction>
133
135
  }
@@ -140,12 +142,16 @@ Default property:
140
142
  results?: () => Data
141
143
  /* Current transcription result text */
142
144
  resultText?: () => Data
145
+ /* Last stabilized transcription result segment */
146
+ lastStabilizedSegment?: () => Data
143
147
  /* Current statistics */
144
148
  statistics?: () => Data
145
149
  /* Latest transcribe event */
146
150
  lastTranscribeEvent?: () => Data
147
151
  /* Latest VAD event */
148
152
  lastVadEvent?: () => Data
153
+ /* Stabilized transcription text from completed slices */
154
+ stabilizedText?: () => Data
149
155
  /* Audio output file path (auto-generated when saving audio) */
150
156
  audioOutputPath?: () => Data
151
157
  }
@@ -170,9 +176,11 @@ export type GeneratorRealtimeTranscription = Generator &
170
176
  | 'isTranscribing'
171
177
  | 'results'
172
178
  | 'resultText'
179
+ | 'lastStabilizedSegment'
173
180
  | 'statistics'
174
181
  | 'lastTranscribeEvent'
175
182
  | 'lastVadEvent'
183
+ | 'stabilizedText'
176
184
  | 'audioOutputPath'
177
185
  value: any
178
186
  }
@@ -80,76 +80,9 @@ export type GeneratorSpeechInferenceActionTranscribeData = ActionWithParams & {
80
80
  >
81
81
  }
82
82
 
83
- /* [Deprecated] Transcribe microphone audio source */
84
- export type GeneratorSpeechInferenceActionTranscribeRealtime = ActionWithParams & {
85
- __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME'
86
- params?: Array<
87
- | {
88
- input: 'prompt'
89
- value?: string | DataLink | EventProperty
90
- mapping?: string
91
- }
92
- | {
93
- input: 'beamSize'
94
- value?: number | DataLink | EventProperty
95
- mapping?: string
96
- }
97
- | {
98
- input: 'language'
99
- value?: string | DataLink | EventProperty
100
- mapping?: string
101
- }
102
- | {
103
- input: 'translate'
104
- value?: boolean | DataLink | EventProperty
105
- mapping?: string
106
- }
107
- | {
108
- input: 'realtimeAudioSeconds'
109
- value?: number | DataLink | EventProperty
110
- mapping?: string
111
- }
112
- | {
113
- input: 'realtimeAudioSliceSeconds'
114
- value?: number | DataLink | EventProperty
115
- mapping?: string
116
- }
117
- | {
118
- input: 'realtimeAudioMinSeconds'
119
- value?: number | DataLink | EventProperty
120
- mapping?: string
121
- }
122
- | {
123
- input: 'realtimeSaveAudio'
124
- value?: boolean | DataLink | EventProperty
125
- mapping?: string
126
- }
127
- | {
128
- input: 'realtimeVadEnabled'
129
- value?: boolean | DataLink | EventProperty
130
- mapping?: string
131
- }
132
- | {
133
- input: 'realtimeVadMs'
134
- value?: number | DataLink | EventProperty
135
- mapping?: string
136
- }
137
- | {
138
- input: 'realtimeVadThold'
139
- value?: number | DataLink | EventProperty
140
- mapping?: string
141
- }
142
- | {
143
- input: 'realtimeVadFreqThold'
144
- value?: number | DataLink | EventProperty
145
- mapping?: string
146
- }
147
- >
148
- }
149
-
150
- /* [Deprecated] Stop transcribing microphone audio source */
151
- export type GeneratorSpeechInferenceActionTranscribeRealtimeStop = Action & {
152
- __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME_STOP'
83
+ /* Stop current transcription */
84
+ export type GeneratorSpeechInferenceActionTranscribeStop = Action & {
85
+ __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_STOP'
153
86
  }
154
87
 
155
88
  /* Clear downloaded files (model, audio) & current jobs */
@@ -172,14 +105,7 @@ Default property:
172
105
  "modelUseCoreML": false,
173
106
  "modelUseGPU": true,
174
107
  "modelUseFlashAttn": false,
175
- "inferLanguage": "Auto",
176
- "inferRealtimeAudioSeconds": 30,
177
- "inferRealtimeAudioSliceSeconds": 30,
178
- "inferRealtimeSaveAudio": false,
179
- "inferRealtimeVadEnabled": false,
180
- "inferRealtimeVadMs": 2000,
181
- "inferRealtimeVadThold": 0.6,
182
- "inferRealtimeVadFreqThold": 100
108
+ "inferLanguage": "Auto"
183
109
  }
184
110
  */
185
111
  property?: {
@@ -381,23 +307,6 @@ Default property:
381
307
  inferFileUrl?: string | DataLink
382
308
  /* MD5 of file to be inferred */
383
309
  inferFileMd5?: string | DataLink
384
- /* Record duration in seconds. For performance, the value less than 30 seconds is recommended. */
385
- inferRealtimeAudioSeconds?: number | DataLink
386
- /* Optimize audio transcription performance by slicing audio samples when `Realtime Audio Seconds` > 30. */
387
- inferRealtimeAudioSliceSeconds?: number | DataLink
388
- /* Min duration of audio to start transcribe. Min: 0.5 seconds, Max: `Realtime Audio Slice Seconds`, Default: 1 second */
389
- inferRealtimeAudioMinSeconds?: number | DataLink
390
- /* Save recorded audio to file, the file path will be output to outlet. */
391
- inferRealtimeSaveAudio?: boolean | DataLink
392
- /* Start transcribe on recording when the audio volume is greater than the threshold by using VAD (Voice Activity Detection).
393
- The first VAD will be triggered after 2 second of recording. */
394
- inferRealtimeVadEnabled?: boolean | DataLink
395
- /* The length of the collected audio is used for VAD. (ms) */
396
- inferRealtimeVadMs?: number | DataLink
397
- /* VAD threshold */
398
- inferRealtimeVadThold?: number | DataLink
399
- /* Frequency to apply High-pass filter in VAD */
400
- inferRealtimeVadFreqThold?: number | DataLink
401
310
  /* Buttress connection settings for remote inference */
402
311
  buttressConnectionSettings?:
403
312
  | DataLink
@@ -0,0 +1,201 @@
1
+ /* Auto generated by build script */
2
+ import type { SwitchCondInnerStateCurrentCanvas, SwitchCondData, SwitchDef } from '../switch'
3
+ import type { Data, DataLink } from '../data'
4
+ import type {
5
+ Brick,
6
+ Generator,
7
+ EventAction,
8
+ ActionWithDataParams,
9
+ ActionWithParams,
10
+ Action,
11
+ EventProperty,
12
+ } from '../common'
13
+
14
+ /* Load the model */
15
+ export type GeneratorVadInferenceOnnxActionLoadModel = Action & {
16
+ __actionName: 'GENERATOR_ONNX_VAD_LOAD_MODEL'
17
+ }
18
+
19
+ /* Detect speech in audio file */
20
+ export type GeneratorVadInferenceOnnxActionDetectFile = ActionWithParams & {
21
+ __actionName: 'GENERATOR_ONNX_VAD_DETECT_FILE'
22
+ params?: Array<
23
+ | {
24
+ input: 'fileUrl'
25
+ value?: string | DataLink | EventProperty
26
+ mapping?: string
27
+ }
28
+ | {
29
+ input: 'threshold'
30
+ value?: number | DataLink | EventProperty
31
+ mapping?: string
32
+ }
33
+ | {
34
+ input: 'minSpeechDurationMs'
35
+ value?: number | DataLink | EventProperty
36
+ mapping?: string
37
+ }
38
+ | {
39
+ input: 'minSilenceDurationMs'
40
+ value?: number | DataLink | EventProperty
41
+ mapping?: string
42
+ }
43
+ | {
44
+ input: 'maxSpeechDurationS'
45
+ value?: number | DataLink | EventProperty
46
+ mapping?: string
47
+ }
48
+ | {
49
+ input: 'speechPadMs'
50
+ value?: number | DataLink | EventProperty
51
+ mapping?: string
52
+ }
53
+ >
54
+ }
55
+
56
+ /* Detect speech in audio data */
57
+ export type GeneratorVadInferenceOnnxActionDetectData = ActionWithParams & {
58
+ __actionName: 'GENERATOR_ONNX_VAD_DETECT_DATA'
59
+ params?: Array<
60
+ | {
61
+ input: 'data'
62
+ value?: any | EventProperty
63
+ mapping?: string
64
+ }
65
+ | {
66
+ input: 'threshold'
67
+ value?: number | DataLink | EventProperty
68
+ mapping?: string
69
+ }
70
+ | {
71
+ input: 'minSpeechDurationMs'
72
+ value?: number | DataLink | EventProperty
73
+ mapping?: string
74
+ }
75
+ | {
76
+ input: 'minSilenceDurationMs'
77
+ value?: number | DataLink | EventProperty
78
+ mapping?: string
79
+ }
80
+ | {
81
+ input: 'maxSpeechDurationS'
82
+ value?: number | DataLink | EventProperty
83
+ mapping?: string
84
+ }
85
+ | {
86
+ input: 'speechPadMs'
87
+ value?: number | DataLink | EventProperty
88
+ mapping?: string
89
+ }
90
+ >
91
+ }
92
+
93
+ /* Clean cache */
94
+ export type GeneratorVadInferenceOnnxActionCleanCache = Action & {
95
+ __actionName: 'GENERATOR_ONNX_VAD_CLEAN_CACHE'
96
+ }
97
+
98
+ /* Release context */
99
+ export type GeneratorVadInferenceOnnxActionReleaseContext = Action & {
100
+ __actionName: 'GENERATOR_ONNX_VAD_RELEASE_CONTEXT'
101
+ }
102
+
103
+ interface GeneratorVadInferenceOnnxDef {
104
+ /*
105
+ Default property:
106
+ {
107
+ "modelType": "auto",
108
+ "detectThreshold": 0.5,
109
+ "detectMinSpeechDurationMs": 250,
110
+ "detectMinSilenceDurationMs": 100,
111
+ "detectMaxSpeechDurationS": 30,
112
+ "detectSpeechPadMs": 30,
113
+ "executionMode": "sequential"
114
+ }
115
+ */
116
+ property?: {
117
+ /* Initialize the VAD context on generator initialization */
118
+ init?: boolean | DataLink
119
+ /* VAD model */
120
+ model?: string | DataLink
121
+ /* Model type (auto-detected from config.json) */
122
+ modelType?: string | DataLink
123
+ /* Quantize type */
124
+ quantizeType?:
125
+ | 'auto'
126
+ | 'none'
127
+ | 'fp16'
128
+ | 'q8'
129
+ | 'int8'
130
+ | 'uint8'
131
+ | 'q4'
132
+ | 'bnb4'
133
+ | 'q4f16'
134
+ | DataLink
135
+ /* Speech probability threshold (0.0-1.0) */
136
+ detectThreshold?: number | DataLink
137
+ /* Minimum speech duration in milliseconds */
138
+ detectMinSpeechDurationMs?: number | DataLink
139
+ /* Minimum silence duration in milliseconds */
140
+ detectMinSilenceDurationMs?: number | DataLink
141
+ /* Maximum speech duration in seconds */
142
+ detectMaxSpeechDurationS?: number | DataLink
143
+ /* Padding around speech segments in milliseconds */
144
+ detectSpeechPadMs?: number | DataLink
145
+ /* Executor candidates, descending order of priority
146
+ Default will be xnnpack, wasm, cpu */
147
+ executors?:
148
+ | Array<'qnn' | 'dml' | 'nnapi' | 'xnnpack' | 'coreml' | 'cpu' | 'wasm' | 'webgpu' | DataLink>
149
+ | DataLink
150
+ /* Execution mode
151
+ Usually when the model has many branches, setting this option to `parallel` will give you better performance. */
152
+ executionMode?: 'sequential' | 'parallel' | DataLink
153
+ /* QNN backend */
154
+ qnnBackend?: 'HTP' | 'HTA' | 'DSP' | 'GPU' | 'CPU' | DataLink
155
+ /* Enable FP16 for QNN HTP */
156
+ qnnHtpEnableFp16?: boolean | DataLink
157
+ /* Enable QNN debug */
158
+ qnnEnableDebug?: boolean | DataLink
159
+ }
160
+ events?: {
161
+ /* Event triggered when context state changes */
162
+ onContextStateChange?: Array<EventAction>
163
+ /* Event triggered when error occurs */
164
+ onError?: Array<EventAction>
165
+ /* Event triggered when got detection result */
166
+ onDetected?: Array<EventAction>
167
+ }
168
+ outlets?: {
169
+ /* Context state */
170
+ contextState?: () => Data
171
+ /* Is detecting */
172
+ isDetecting?: () => Data
173
+ /* Detection segments result */
174
+ detectionSegments?: () => Data
175
+ /* Detection details */
176
+ detectionDetails?: () => Data
177
+ }
178
+ }
179
+
180
+ /* Local Voice Activity Detection (VAD) inference based on [transformers.js](https://huggingface.co/docs/transformers.js)
181
+ You can use any compatible VAD model from HuggingFace (Silero VAD, smart-turn, etc.) */
182
+ export type GeneratorVadInferenceOnnx = Generator &
183
+ GeneratorVadInferenceOnnxDef & {
184
+ templateKey: 'GENERATOR_ONNX_VAD'
185
+ switches: Array<
186
+ SwitchDef &
187
+ GeneratorVadInferenceOnnxDef & {
188
+ conds?: Array<{
189
+ method: '==' | '!=' | '>' | '<' | '>=' | '<='
190
+ cond:
191
+ | SwitchCondInnerStateCurrentCanvas
192
+ | SwitchCondData
193
+ | {
194
+ __typename: 'SwitchCondInnerStateOutlet'
195
+ outlet: 'contextState' | 'isDetecting' | 'detectionSegments' | 'detectionDetails'
196
+ value: any
197
+ }
198
+ }>
199
+ }
200
+ >
201
+ }
@@ -0,0 +1,123 @@
1
+ /* Auto generated by build script */
2
+ import type { SwitchCondInnerStateCurrentCanvas, SwitchCondData, SwitchDef } from '../switch'
3
+ import type { Data, DataLink } from '../data'
4
+ import type {
5
+ Brick,
6
+ Generator,
7
+ EventAction,
8
+ ActionWithDataParams,
9
+ ActionWithParams,
10
+ Action,
11
+ EventProperty,
12
+ } from '../common'
13
+
14
+ /* Detect speech in audio file */
15
+ export type GeneratorVadInferenceTraditionalActionDetectFile = ActionWithParams & {
16
+ __actionName: 'GENERATOR_TRADITIONAL_VAD_DETECT_FILE'
17
+ params?: Array<{
18
+ input: 'fileUrl'
19
+ value?: string | DataLink | EventProperty
20
+ mapping?: string
21
+ }>
22
+ }
23
+
24
+ /* Detect speech in audio data stream */
25
+ export type GeneratorVadInferenceTraditionalActionDetectData = ActionWithParams & {
26
+ __actionName: 'GENERATOR_TRADITIONAL_VAD_DETECT_DATA'
27
+ params?: Array<{
28
+ input: 'data'
29
+ value?: any | EventProperty
30
+ mapping?: string
31
+ }>
32
+ }
33
+
34
+ interface GeneratorVadInferenceTraditionalDef {
35
+ /*
36
+ Default property:
37
+ {
38
+ "detectVocalFreqMin": 75,
39
+ "detectVocalFreqMax": 900,
40
+ "detectThreshold": 0.5,
41
+ "detectMinSpeechDurationMs": 250,
42
+ "detectMinSilenceDurationMs": 100,
43
+ "detectMaxSpeechDurationS": 30,
44
+ "detectSpeechPadMs": 30
45
+ }
46
+ */
47
+ property?: {
48
+ /* Minimum vocal frequency in Hz */
49
+ detectVocalFreqMin?: number | DataLink
50
+ /* Maximum vocal frequency in Hz */
51
+ detectVocalFreqMax?: number | DataLink
52
+ /* Volume threshold in dB */
53
+ detectVolumeThreshold?: number | DataLink
54
+ /* Speech probability threshold (0.0-1.0) - maps to frequency clarity */
55
+ detectThreshold?: number | DataLink
56
+ /* Minimum speech duration in milliseconds */
57
+ detectMinSpeechDurationMs?: number | DataLink
58
+ /* Minimum silence duration in milliseconds */
59
+ detectMinSilenceDurationMs?: number | DataLink
60
+ /* Maximum speech duration in seconds */
61
+ detectMaxSpeechDurationS?: number | DataLink
62
+ /* Padding around speech segments in milliseconds */
63
+ detectSpeechPadMs?: number | DataLink
64
+ /* The file URL or path to be analyzed */
65
+ detectFileUrl?: string | DataLink
66
+ /* MD5 of file to be analyzed */
67
+ detectFileMd5?: string | DataLink
68
+ }
69
+ events?: {
70
+ /* Event triggered when context state changes */
71
+ onContextStateChange?: Array<EventAction>
72
+ /* Event triggered when detection result is available */
73
+ onDetected?: Array<EventAction>
74
+ /* Event triggered when error occurs */
75
+ onError?: Array<EventAction>
76
+ }
77
+ outlets?: {
78
+ /* Context state */
79
+ contextState?: () => Data
80
+ /* Is detecting */
81
+ isDetecting?: () => Data
82
+ /* Is speaking (real-time) */
83
+ isSpeaking?: () => Data
84
+ /* Detection segments result */
85
+ detectionSegments?: () => Data
86
+ /* Current volume in dB */
87
+ currentVolume?: () => Data
88
+ /* Current frequency clarity (0-1) */
89
+ currentClarity?: () => Data
90
+ /* Current detected frequency in Hz */
91
+ currentFrequency?: () => Data
92
+ }
93
+ }
94
+
95
+ /* Traditional Voice Activity Detection (VAD) using pitch detection and RMS volume analysis
96
+ No model download required - pure algorithmic approach */
97
+ export type GeneratorVadInferenceTraditional = Generator &
98
+ GeneratorVadInferenceTraditionalDef & {
99
+ templateKey: 'GENERATOR_TRADITIONAL_VAD'
100
+ switches: Array<
101
+ SwitchDef &
102
+ GeneratorVadInferenceTraditionalDef & {
103
+ conds?: Array<{
104
+ method: '==' | '!=' | '>' | '<' | '>=' | '<='
105
+ cond:
106
+ | SwitchCondInnerStateCurrentCanvas
107
+ | SwitchCondData
108
+ | {
109
+ __typename: 'SwitchCondInnerStateOutlet'
110
+ outlet:
111
+ | 'contextState'
112
+ | 'isDetecting'
113
+ | 'isSpeaking'
114
+ | 'detectionSegments'
115
+ | 'currentVolume'
116
+ | 'currentClarity'
117
+ | 'currentFrequency'
118
+ value: any
119
+ }
120
+ }>
121
+ }
122
+ >
123
+ }
@@ -33,9 +33,11 @@ export * from './ThermalPrinter'
33
33
  export * from './SqLite'
34
34
  export * from './McpServer'
35
35
  export * from './Mcp'
36
+ export * from './VadTraditional'
36
37
  export * from './TextToSpeechOnnx'
37
38
  export * from './LlmOnnx'
38
39
  export * from './SpeechToTextOnnx'
40
+ export * from './VadOnnx'
39
41
  export * from './SpeechToTextGgml'
40
42
  export * from './VadGgml'
41
43
  export * from './RealtimeTranscription'
@@ -733,6 +733,18 @@ export const templateEventPropsMap = {
733
733
  'GENERATOR_MCP_ERROR_MESSAGE', // type: string
734
734
  ],
735
735
  },
736
+ GENERATOR_TRADITIONAL_VAD: {
737
+ onContextStateChange: [
738
+ 'GENERATOR_TRADITIONAL_VAD_CONTEXT_STATE', // type: string
739
+ ],
740
+ onDetected: [
741
+ 'GENERATOR_TRADITIONAL_VAD_DETECTION_SEGMENTS', // type: array
742
+ 'GENERATOR_TRADITIONAL_VAD_DETECTION_TIME', // type: number
743
+ ],
744
+ onError: [
745
+ 'GENERATOR_TRADITIONAL_VAD_ERROR', // type: string
746
+ ],
747
+ },
736
748
  GENERATOR_TTS: {
737
749
  onContextStateChange: [
738
750
  'GENERATOR_TTS_CONTEXT_STATE', // type: string
@@ -767,6 +779,18 @@ export const templateEventPropsMap = {
767
779
  'GENERATOR_ONNX_STT_ERROR', // type: string
768
780
  ],
769
781
  },
782
+ GENERATOR_ONNX_VAD: {
783
+ onContextStateChange: [
784
+ 'GENERATOR_ONNX_VAD_CONTEXT_STATE', // type: string
785
+ ],
786
+ onError: [
787
+ 'GENERATOR_ONNX_VAD_ERROR', // type: string
788
+ ],
789
+ onDetected: [
790
+ 'GENERATOR_ONNX_VAD_DETECTION_SEGMENTS', // type: array
791
+ 'GENERATOR_ONNX_VAD_DETECTION_TIME', // type: number
792
+ ],
793
+ },
770
794
  GENERATOR_SPEECH_INFERENCE: {
771
795
  onContextStateChange: [
772
796
  'GENERATOR_SPEECH_INFERENCE_CONTEXT_STATE', // type: string
@@ -823,6 +847,9 @@ export const templateEventPropsMap = {
823
847
  'GENERATOR_REALTIME_TRANSCRIPTION_STATS_TIMESTAMP', // type: number
824
848
  'GENERATOR_REALTIME_TRANSCRIPTION_STATS', // type: object
825
849
  ],
850
+ onStabilized: [
851
+ 'GENERATOR_REALTIME_TRANSCRIPTION_STABILIZED_TEXT', // type: string
852
+ ],
826
853
  onEnd: [
827
854
  'GENERATOR_REALTIME_TRANSCRIPTION_END_RESULTS', // type: array
828
855
  'GENERATOR_REALTIME_TRANSCRIPTION_END_AUDIO_OUTPUT_PATH', // type: string