npm - @fugood/bricks-project - Versions diffs - 2.24.0-beta.0 → 2.24.0-beta.2 - Mend

@fugood/bricks-project 2.24.0-beta.0 → 2.24.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/compile/action-name-map.ts +26 -14
package/compile/index.ts +3 -1
package/package.json +4 -3
package/tools/mcp-server.ts +9 -4
package/tools/postinstall.ts +47 -5
package/types/generators/RealtimeTranscription.ts +14 -6
package/types/generators/SpeechToTextGgml.ts +4 -95
package/types/generators/VadOnnx.ts +201 -0
package/types/generators/VadTraditional.ts +123 -0
package/types/generators/index.ts +2 -0
package/utils/event-props.ts +27 -0

package/compile/action-name-map.ts CHANGED Viewed

@@ -601,6 +601,14 @@ export const templateActionNameMap = {
       variables: 'GENERATOR_MCP_VARIABLES',
     },
   },
+  GENERATOR_TRADITIONAL_VAD: {
+    GENERATOR_TRADITIONAL_VAD_DETECT_FILE: {
+      fileUrl: 'GENERATOR_TRADITIONAL_VAD_FILE_URL',
+    },
+    GENERATOR_TRADITIONAL_VAD_DETECT_DATA: {
+      data: 'GENERATOR_TRADITIONAL_VAD_DATA',
+    },
+  },
   GENERATOR_TTS: {
     GENERATOR_TTS_GENERATE: {
       text: 'GENERATOR_TTS_TEXT',
@@ -621,6 +629,24 @@ export const templateActionNameMap = {
       audioUri: 'GENERATOR_ONNX_STT_AUDIO_URI',
     },
   },
+  GENERATOR_ONNX_VAD: {
+    GENERATOR_ONNX_VAD_DETECT_FILE: {
+      fileUrl: 'GENERATOR_ONNX_VAD_FILE_URL',
+      threshold: 'GENERATOR_ONNX_VAD_THRESHOLD',
+      minSpeechDurationMs: 'GENERATOR_ONNX_VAD_MIN_SPEECH_DURATION_MS',
+      minSilenceDurationMs: 'GENERATOR_ONNX_VAD_MIN_SILENCE_DURATION_MS',
+      maxSpeechDurationS: 'GENERATOR_ONNX_VAD_MAX_SPEECH_DURATION_S',
+      speechPadMs: 'GENERATOR_ONNX_VAD_SPEECH_PAD_MS',
+    },
+    GENERATOR_ONNX_VAD_DETECT_DATA: {
+      data: 'GENERATOR_ONNX_VAD_DATA',
+      threshold: 'GENERATOR_ONNX_VAD_THRESHOLD',
+      minSpeechDurationMs: 'GENERATOR_ONNX_VAD_MIN_SPEECH_DURATION_MS',
+      minSilenceDurationMs: 'GENERATOR_ONNX_VAD_MIN_SILENCE_DURATION_MS',
+      maxSpeechDurationS: 'GENERATOR_ONNX_VAD_MAX_SPEECH_DURATION_S',
+      speechPadMs: 'GENERATOR_ONNX_VAD_SPEECH_PAD_MS',
+    },
+  },
   GENERATOR_SPEECH_INFERENCE: {
     GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_FILE: {
       fileUrl: 'GENERATOR_SPEECH_INFERENCE_FILE_URL',
@@ -636,20 +662,6 @@ export const templateActionNameMap = {
       language: 'GENERATOR_SPEECH_INFERENCE_LANGUAGE',
       translate: 'GENERATOR_SPEECH_INFERENCE_TRANSLATE',
     },
-    GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME: {
-      prompt: 'GENERATOR_SPEECH_INFERENCE_PROMPT',
-      beamSize: 'GENERATOR_SPEECH_INFERENCE_BEAM_SIZE',
-      language: 'GENERATOR_SPEECH_INFERENCE_LANGUAGE',
-      translate: 'GENERATOR_SPEECH_INFERENCE_TRANSLATE',
-      realtimeAudioSeconds: 'GENERATOR_SPEECH_INFERENCE_REALTIME_AUDIO_SECONDS',
-      realtimeAudioSliceSeconds: 'GENERATOR_SPEECH_INFERENCE_REALTIME_AUDIO_SLICE_SECONDS',
-      realtimeAudioMinSeconds: 'GENERATOR_SPEECH_INFERENCE_REALTIME_AUDIO_MIN_SECONDS',
-      realtimeSaveAudio: 'GENERATOR_SPEECH_INFERENCE_REALTIME_SAVE_AUDIO',
-      realtimeVadEnabled: 'GENERATOR_SPEECH_INFERENCE_REALTIME_VAD_ENABLED',
-      realtimeVadMs: 'GENERATOR_SPEECH_INFERENCE_REALTIME_VAD_MS',
-      realtimeVadThold: 'GENERATOR_SPEECH_INFERENCE_REALTIME_VAD_THOLD',
-      realtimeVadFreqThold: 'GENERATOR_SPEECH_INFERENCE_REALTIME_VAD_FREQ_THOLD',
-    },
   },
   GENERATOR_VAD_INFERENCE: {
     GENERATOR_VAD_INFERENCE_DETECT_FILE: {

package/compile/index.ts CHANGED Viewed

@@ -421,8 +421,9 @@ const compileAutomation = (automationMap: AutomationMap) =>
 export const compile = async (app: Application) => {
   await new Promise((resolve) => setImmediate(resolve, 0))
+  const timestamp = Date.now()
   const config = {
-    title: app.name,
+    title: `${app.name || 'Unknown'}(${timestamp})`,
     subspace_map: app.subspaces.reduce((subspaceMap, subspace) => {
       subspaceMap[subspace.id] = {
         title: subspace.title,
@@ -969,6 +970,7 @@ export const compile = async (app: Application) => {
     automation_map: app.automationMap
       ? compileAutomation(app.automationMap)
       : app.metadata?.TEMP_automation_map || {},
+    update_timestamp: timestamp,
   }
   return config
 }

package/package.json CHANGED Viewed

@@ -1,13 +1,14 @@
 {
   "name": "@fugood/bricks-project",
-  "version": "2.24.0-beta.0",
+  "version": "2.24.0-beta.2",
   "main": "index.ts",
   "scripts": {
     "build": "bun scripts/build.js"
   },
   "dependencies": {
-    "@fugood/bricks-cli": "^2.23.0",
+    "@fugood/bricks-cli": "^2.24.0-beta.2",
     "@huggingface/gguf": "^0.3.2",
+    "@iarna/toml": "^3.0.0",
     "@modelcontextprotocol/sdk": "^1.15.0",
     "@toon-format/toon": "^2.1.0",
     "@types/escodegen": "^0.0.10",
@@ -18,5 +19,5 @@
     "lodash": "^4.17.4",
     "uuid": "^8.3.1"
   },
-  "gitHead": "27bd4a30a4dd412ab73eab13c49265f73fd76e6c"
+  "gitHead": "f63649f1cb9ae4c3d0c4ac687152bfc1be6528bd"
 }

package/tools/mcp-server.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
 import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
 import { z } from 'zod'
-import { $ } from 'bun'
+import { $, JSON5 } from 'bun'
 import * as TOON from '@toon-format/toon'
 import Fuse from 'fuse.js'
 import { gguf } from '@huggingface/gguf'
@@ -729,7 +729,7 @@ server.tool(
         // Return JSON for huggingface_select to allow direct parsing by consumers
         return {
-          content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
+          content: [{ type: 'text', text: JSON5.stringify(result, null, 2) }],
         }
       }
@@ -842,7 +842,7 @@ server.tool(
         // Return JSON for huggingface_select to allow direct parsing by consumers
         return {
-          content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
+          content: [{ type: 'text', text: JSON5.stringify(result, null, 2) }],
         }
       } else {
         const result = {
@@ -875,7 +875,12 @@ server.tool(
         // Return JSON for huggingface_select to allow direct parsing by consumers
         return {
-          content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
+          content: [
+            {
+              type: 'text',
+              text: `${JSON5.stringify(result, null, 2)} // Please use _hfRepoInfo undocumented field with @ts-ignore`,
+            },
+          ],
         }
       }
     } catch (err: any) {

package/tools/postinstall.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { $ } from 'bun'
 import { stat, readFile, writeFile, readdir } from 'fs/promises'
+import TOML from '@iarna/toml'
 const cwd = process.cwd()
@@ -63,9 +64,7 @@ if (hasClaudeCode || hasAgentsMd) {
   await handleMcpConfigOverride(mcpConfigPath)
 }
-if (hasClaudeCode) {
-  // Install skills that don't already exist in the project
-  const skillsDir = `${cwd}/.claude/skills`
+const setupSkills = async (skillsDir) => {
   const packageSkillsDir = `${__dirname}/../skills`
   if (await exists(packageSkillsDir)) {
@@ -81,10 +80,53 @@ if (hasClaudeCode) {
           console.log(`Skill '${skill}' already exists, skipping`)
         } else {
           await $`cp -r ${packageSkillsDir}/${skill} ${targetSkillDir}`
-          console.log(`Installed skill '${skill}' to .claude/skills/`)
+          console.log(`Installed skill '${skill}' to ${skillsDir}/`)
         }
       }),
     )
   }
-  // TODO: .codex/skills, .cursor/skills if needed
 }
+if (hasClaudeCode) {
+  // Install skills that don't already exist in the project
+  await setupSkills(`${cwd}/.claude/skills`)
+}
+if (hasAgentsMd) {
+  // Handle codex skills
+  // Currently no signal file for codex skills, so we just check if AGENTS.md exists
+  await setupSkills(`${cwd}/.codex/skills`)
+  const defaultCodexMcpConfig = {
+    mcp_servers: {
+      'bricks-project': projectMcpServer,
+    },
+  }
+  const handleCodexMcpConfigOverride = async (mcpConfigPath: string) => {
+    let mcpConfig: { mcp_servers: Record<string, typeof projectMcpServer> } | null = null
+    if (await exists(mcpConfigPath)) {
+      const configStr = await readFile(mcpConfigPath, 'utf-8')
+      try {
+        mcpConfig = TOML.parse(configStr)
+        if (!mcpConfig?.mcp_servers) throw new Error('mcp_servers is not defined')
+        mcpConfig.mcp_servers['bricks-project'] = projectMcpServer
+      } catch (e) {
+        mcpConfig = defaultCodexMcpConfig
+      }
+    } else {
+      mcpConfig = defaultCodexMcpConfig
+    }
+    await writeFile(mcpConfigPath, `${TOML.stringify(mcpConfig, null, 2)}\n`)
+    console.log(`Updated ${mcpConfigPath}`)
+  }
+  // Setup MCP config (.codex/config.toml)
+  const codexConfigPath = `${cwd}/.codex/config.toml`
+  await handleCodexMcpConfigOverride(codexConfigPath)
+}
+// TODO: .cursor/skills if needed
+// TODO: User setting in application.json to avoid unnecessary skills/config setup

package/types/generators/RealtimeTranscription.ts CHANGED Viewed

@@ -41,10 +41,10 @@ Default property:
   "audioSliceSec": 30,
   "audioMinSec": 1,
   "maxSlicesInMemory": 5,
+  "transcribeProcessingPauseMs": 500,
+  "initTranscribeAfterMs": 500,
   "vadStrategy": "use-preset",
   "vadPreset": "default",
-  "autoSliceOnSpeechEnd": true,
-  "autoSliceThreshold": 2,
   "initialPrompt": "",
   "promptPreviousSlices": false,
   "saveAudio": true,
@@ -73,6 +73,10 @@ Default property:
     audioMinSec?: number | DataLink
     /* Maximum number of slices to keep in memory */
     maxSlicesInMemory?: number | DataLink
+    /* Transcribe processing interval in milliseconds */
+    transcribeProcessingPauseMs?: number | DataLink
+    /* Transcribe processing init after pause in milliseconds */
+    initTranscribeAfterMs?: number | DataLink
     /* VAD Strategy */
     vadStrategy?: 'use-preset' | 'use-generator-options' | DataLink
     /* VAD preset configuration */
@@ -86,10 +90,6 @@ Default property:
       | 'meeting'
       | 'noisy-environment'
       | DataLink
-    /* Auto slice on speech end */
-    autoSliceOnSpeechEnd?: boolean | DataLink
-    /* Auto slice threshold in seconds */
-    autoSliceThreshold?: number | DataLink
     /* Initial prompt for transcription */
     initialPrompt?: string | DataLink
     /* Include previous slices in prompt */
@@ -128,6 +128,8 @@ Default property:
     onStatusChange?: Array<EventAction>
     /* Event triggered when statistics update */
     onStatsUpdate?: Array<EventAction>
+    /* Event triggered when slice transcription is stabilized */
+    onStabilized?: Array<EventAction>
     /* Event triggered when transcription ends */
     onEnd?: Array<EventAction>
   }
@@ -140,12 +142,16 @@ Default property:
     results?: () => Data
     /* Current transcription result text */
     resultText?: () => Data
+    /* Last stabilized transcription result segment */
+    lastStabilizedSegment?: () => Data
     /* Current statistics */
     statistics?: () => Data
     /* Latest transcribe event */
     lastTranscribeEvent?: () => Data
     /* Latest VAD event */
     lastVadEvent?: () => Data
+    /* Stabilized transcription text from completed slices */
+    stabilizedText?: () => Data
     /* Audio output file path (auto-generated when saving audio) */
     audioOutputPath?: () => Data
   }
@@ -170,9 +176,11 @@ export type GeneratorRealtimeTranscription = Generator &
                     | 'isTranscribing'
                     | 'results'
                     | 'resultText'
+                    | 'lastStabilizedSegment'
                     | 'statistics'
                     | 'lastTranscribeEvent'
                     | 'lastVadEvent'
+                    | 'stabilizedText'
                     | 'audioOutputPath'
                   value: any
                 }

package/types/generators/SpeechToTextGgml.ts CHANGED Viewed

@@ -80,76 +80,9 @@ export type GeneratorSpeechInferenceActionTranscribeData = ActionWithParams & {
   >
 }
-/* [Deprecated] Transcribe microphone audio source */
-export type GeneratorSpeechInferenceActionTranscribeRealtime = ActionWithParams & {
-  __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME'
-  params?: Array<
-    | {
-        input: 'prompt'
-        value?: string | DataLink | EventProperty
-        mapping?: string
-      }
-    | {
-        input: 'beamSize'
-        value?: number | DataLink | EventProperty
-        mapping?: string
-      }
-    | {
-        input: 'language'
-        value?: string | DataLink | EventProperty
-        mapping?: string
-      }
-    | {
-        input: 'translate'
-        value?: boolean | DataLink | EventProperty
-        mapping?: string
-      }
-    | {
-        input: 'realtimeAudioSeconds'
-        value?: number | DataLink | EventProperty
-        mapping?: string
-      }
-    | {
-        input: 'realtimeAudioSliceSeconds'
-        value?: number | DataLink | EventProperty
-        mapping?: string
-      }
-    | {
-        input: 'realtimeAudioMinSeconds'
-        value?: number | DataLink | EventProperty
-        mapping?: string
-      }
-    | {
-        input: 'realtimeSaveAudio'
-        value?: boolean | DataLink | EventProperty
-        mapping?: string
-      }
-    | {
-        input: 'realtimeVadEnabled'
-        value?: boolean | DataLink | EventProperty
-        mapping?: string
-      }
-    | {
-        input: 'realtimeVadMs'
-        value?: number | DataLink | EventProperty
-        mapping?: string
-      }
-    | {
-        input: 'realtimeVadThold'
-        value?: number | DataLink | EventProperty
-        mapping?: string
-      }
-    | {
-        input: 'realtimeVadFreqThold'
-        value?: number | DataLink | EventProperty
-        mapping?: string
-      }
-  >
-}
-/* [Deprecated] Stop transcribing microphone audio source */
-export type GeneratorSpeechInferenceActionTranscribeRealtimeStop = Action & {
-  __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME_STOP'
+/* Stop current transcription */
+export type GeneratorSpeechInferenceActionTranscribeStop = Action & {
+  __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_STOP'
 }
 /* Clear downloaded files (model, audio) & current jobs */
@@ -172,14 +105,7 @@ Default property:
   "modelUseCoreML": false,
   "modelUseGPU": true,
   "modelUseFlashAttn": false,
-  "inferLanguage": "Auto",
-  "inferRealtimeAudioSeconds": 30,
-  "inferRealtimeAudioSliceSeconds": 30,
-  "inferRealtimeSaveAudio": false,
-  "inferRealtimeVadEnabled": false,
-  "inferRealtimeVadMs": 2000,
-  "inferRealtimeVadThold": 0.6,
-  "inferRealtimeVadFreqThold": 100
+  "inferLanguage": "Auto"
 }
   */
   property?: {
@@ -381,23 +307,6 @@ Default property:
     inferFileUrl?: string | DataLink
     /* MD5 of file to be inferred */
     inferFileMd5?: string | DataLink
-    /* Record duration in seconds. For performance, the value less than 30 seconds is recommended. */
-    inferRealtimeAudioSeconds?: number | DataLink
-    /* Optimize audio transcription performance by slicing audio samples when `Realtime Audio Seconds` > 30. */
-    inferRealtimeAudioSliceSeconds?: number | DataLink
-    /* Min duration of audio to start transcribe. Min: 0.5 seconds, Max: `Realtime Audio Slice Seconds`, Default: 1 second */
-    inferRealtimeAudioMinSeconds?: number | DataLink
-    /* Save recorded audio to file, the file path will be output to outlet. */
-    inferRealtimeSaveAudio?: boolean | DataLink
-    /* Start transcribe on recording when the audio volume is greater than the threshold by using VAD (Voice Activity Detection).
- The first VAD will be triggered after 2 second of recording. */
-    inferRealtimeVadEnabled?: boolean | DataLink
-    /* The length of the collected audio is used for VAD. (ms) */
-    inferRealtimeVadMs?: number | DataLink
-    /* VAD threshold */
-    inferRealtimeVadThold?: number | DataLink
-    /* Frequency to apply High-pass filter in VAD */
-    inferRealtimeVadFreqThold?: number | DataLink
     /* Buttress connection settings for remote inference */
     buttressConnectionSettings?:
       | DataLink

package/types/generators/VadOnnx.ts ADDED Viewed

@@ -0,0 +1,201 @@
+/* Auto generated by build script */
+import type { SwitchCondInnerStateCurrentCanvas, SwitchCondData, SwitchDef } from '../switch'
+import type { Data, DataLink } from '../data'
+import type {
+  Brick,
+  Generator,
+  EventAction,
+  ActionWithDataParams,
+  ActionWithParams,
+  Action,
+  EventProperty,
+} from '../common'
+/* Load the model */
+export type GeneratorVadInferenceOnnxActionLoadModel = Action & {
+  __actionName: 'GENERATOR_ONNX_VAD_LOAD_MODEL'
+}
+/* Detect speech in audio file */
+export type GeneratorVadInferenceOnnxActionDetectFile = ActionWithParams & {
+  __actionName: 'GENERATOR_ONNX_VAD_DETECT_FILE'
+  params?: Array<
+    | {
+        input: 'fileUrl'
+        value?: string | DataLink | EventProperty
+        mapping?: string
+      }
+    | {
+        input: 'threshold'
+        value?: number | DataLink | EventProperty
+        mapping?: string
+      }
+    | {
+        input: 'minSpeechDurationMs'
+        value?: number | DataLink | EventProperty
+        mapping?: string
+      }
+    | {
+        input: 'minSilenceDurationMs'
+        value?: number | DataLink | EventProperty
+        mapping?: string
+      }
+    | {
+        input: 'maxSpeechDurationS'
+        value?: number | DataLink | EventProperty
+        mapping?: string
+      }
+    | {
+        input: 'speechPadMs'
+        value?: number | DataLink | EventProperty
+        mapping?: string
+      }
+  >
+}
+/* Detect speech in audio data */
+export type GeneratorVadInferenceOnnxActionDetectData = ActionWithParams & {
+  __actionName: 'GENERATOR_ONNX_VAD_DETECT_DATA'
+  params?: Array<
+    | {
+        input: 'data'
+        value?: any | EventProperty
+        mapping?: string
+      }
+    | {
+        input: 'threshold'
+        value?: number | DataLink | EventProperty
+        mapping?: string
+      }
+    | {
+        input: 'minSpeechDurationMs'
+        value?: number | DataLink | EventProperty
+        mapping?: string
+      }
+    | {
+        input: 'minSilenceDurationMs'
+        value?: number | DataLink | EventProperty
+        mapping?: string
+      }
+    | {
+        input: 'maxSpeechDurationS'
+        value?: number | DataLink | EventProperty
+        mapping?: string
+      }
+    | {
+        input: 'speechPadMs'
+        value?: number | DataLink | EventProperty
+        mapping?: string
+      }
+  >
+}
+/* Clean cache */
+export type GeneratorVadInferenceOnnxActionCleanCache = Action & {
+  __actionName: 'GENERATOR_ONNX_VAD_CLEAN_CACHE'
+}
+/* Release context */
+export type GeneratorVadInferenceOnnxActionReleaseContext = Action & {
+  __actionName: 'GENERATOR_ONNX_VAD_RELEASE_CONTEXT'
+}
+interface GeneratorVadInferenceOnnxDef {
+  /*
+Default property:
+{
+  "modelType": "auto",
+  "detectThreshold": 0.5,
+  "detectMinSpeechDurationMs": 250,
+  "detectMinSilenceDurationMs": 100,
+  "detectMaxSpeechDurationS": 30,
+  "detectSpeechPadMs": 30,
+  "executionMode": "sequential"
+}
+  */
+  property?: {
+    /* Initialize the VAD context on generator initialization */
+    init?: boolean | DataLink
+    /* VAD model */
+    model?: string | DataLink
+    /* Model type (auto-detected from config.json) */
+    modelType?: string | DataLink
+    /* Quantize type */
+    quantizeType?:
+      | 'auto'
+      | 'none'
+      | 'fp16'
+      | 'q8'
+      | 'int8'
+      | 'uint8'
+      | 'q4'
+      | 'bnb4'
+      | 'q4f16'
+      | DataLink
+    /* Speech probability threshold (0.0-1.0) */
+    detectThreshold?: number | DataLink
+    /* Minimum speech duration in milliseconds */
+    detectMinSpeechDurationMs?: number | DataLink
+    /* Minimum silence duration in milliseconds */
+    detectMinSilenceDurationMs?: number | DataLink
+    /* Maximum speech duration in seconds */
+    detectMaxSpeechDurationS?: number | DataLink
+    /* Padding around speech segments in milliseconds */
+    detectSpeechPadMs?: number | DataLink
+    /* Executor candidates, descending order of priority
+  Default will be xnnpack, wasm, cpu */
+    executors?:
+      | Array<'qnn' | 'dml' | 'nnapi' | 'xnnpack' | 'coreml' | 'cpu' | 'wasm' | 'webgpu' | DataLink>
+      | DataLink
+    /* Execution mode
+  Usually when the model has many branches, setting this option to `parallel` will give you better performance. */
+    executionMode?: 'sequential' | 'parallel' | DataLink
+    /* QNN backend */
+    qnnBackend?: 'HTP' | 'HTA' | 'DSP' | 'GPU' | 'CPU' | DataLink
+    /* Enable FP16 for QNN HTP */
+    qnnHtpEnableFp16?: boolean | DataLink
+    /* Enable QNN debug */
+    qnnEnableDebug?: boolean | DataLink
+  }
+  events?: {
+    /* Event triggered when context state changes */
+    onContextStateChange?: Array<EventAction>
+    /* Event triggered when error occurs */
+    onError?: Array<EventAction>
+    /* Event triggered when got detection result */
+    onDetected?: Array<EventAction>
+  }
+  outlets?: {
+    /* Context state */
+    contextState?: () => Data
+    /* Is detecting */
+    isDetecting?: () => Data
+    /* Detection segments result */
+    detectionSegments?: () => Data
+    /* Detection details */
+    detectionDetails?: () => Data
+  }
+}
+/* Local Voice Activity Detection (VAD) inference based on [transformers.js](https://huggingface.co/docs/transformers.js)
+ You can use any compatible VAD model from HuggingFace (Silero VAD, smart-turn, etc.) */
+export type GeneratorVadInferenceOnnx = Generator &
+  GeneratorVadInferenceOnnxDef & {
+    templateKey: 'GENERATOR_ONNX_VAD'
+    switches: Array<
+      SwitchDef &
+        GeneratorVadInferenceOnnxDef & {
+          conds?: Array<{
+            method: '==' | '!=' | '>' | '<' | '>=' | '<='
+            cond:
+              | SwitchCondInnerStateCurrentCanvas
+              | SwitchCondData
+              | {
+                  __typename: 'SwitchCondInnerStateOutlet'
+                  outlet: 'contextState' | 'isDetecting' | 'detectionSegments' | 'detectionDetails'
+                  value: any
+                }
+          }>
+        }
+    >
+  }

package/types/generators/VadTraditional.ts ADDED Viewed

@@ -0,0 +1,123 @@
+/* Auto generated by build script */
+import type { SwitchCondInnerStateCurrentCanvas, SwitchCondData, SwitchDef } from '../switch'
+import type { Data, DataLink } from '../data'
+import type {
+  Brick,
+  Generator,
+  EventAction,
+  ActionWithDataParams,
+  ActionWithParams,
+  Action,
+  EventProperty,
+} from '../common'
+/* Detect speech in audio file */
+export type GeneratorVadInferenceTraditionalActionDetectFile = ActionWithParams & {
+  __actionName: 'GENERATOR_TRADITIONAL_VAD_DETECT_FILE'
+  params?: Array<{
+    input: 'fileUrl'
+    value?: string | DataLink | EventProperty
+    mapping?: string
+  }>
+}
+/* Detect speech in audio data stream */
+export type GeneratorVadInferenceTraditionalActionDetectData = ActionWithParams & {
+  __actionName: 'GENERATOR_TRADITIONAL_VAD_DETECT_DATA'
+  params?: Array<{
+    input: 'data'
+    value?: any | EventProperty
+    mapping?: string
+  }>
+}
+interface GeneratorVadInferenceTraditionalDef {
+  /*
+Default property:
+{
+  "detectVocalFreqMin": 75,
+  "detectVocalFreqMax": 900,
+  "detectThreshold": 0.5,
+  "detectMinSpeechDurationMs": 250,
+  "detectMinSilenceDurationMs": 100,
+  "detectMaxSpeechDurationS": 30,
+  "detectSpeechPadMs": 30
+}
+  */
+  property?: {
+    /* Minimum vocal frequency in Hz */
+    detectVocalFreqMin?: number | DataLink
+    /* Maximum vocal frequency in Hz */
+    detectVocalFreqMax?: number | DataLink
+    /* Volume threshold in dB */
+    detectVolumeThreshold?: number | DataLink
+    /* Speech probability threshold (0.0-1.0) - maps to frequency clarity */
+    detectThreshold?: number | DataLink
+    /* Minimum speech duration in milliseconds */
+    detectMinSpeechDurationMs?: number | DataLink
+    /* Minimum silence duration in milliseconds */
+    detectMinSilenceDurationMs?: number | DataLink
+    /* Maximum speech duration in seconds */
+    detectMaxSpeechDurationS?: number | DataLink
+    /* Padding around speech segments in milliseconds */
+    detectSpeechPadMs?: number | DataLink
+    /* The file URL or path to be analyzed */
+    detectFileUrl?: string | DataLink
+    /* MD5 of file to be analyzed */
+    detectFileMd5?: string | DataLink
+  }
+  events?: {
+    /* Event triggered when context state changes */
+    onContextStateChange?: Array<EventAction>
+    /* Event triggered when detection result is available */
+    onDetected?: Array<EventAction>
+    /* Event triggered when error occurs */
+    onError?: Array<EventAction>
+  }
+  outlets?: {
+    /* Context state */
+    contextState?: () => Data
+    /* Is detecting */
+    isDetecting?: () => Data
+    /* Is speaking (real-time) */
+    isSpeaking?: () => Data
+    /* Detection segments result */
+    detectionSegments?: () => Data
+    /* Current volume in dB */
+    currentVolume?: () => Data
+    /* Current frequency clarity (0-1) */
+    currentClarity?: () => Data
+    /* Current detected frequency in Hz */
+    currentFrequency?: () => Data
+  }
+}
+/* Traditional Voice Activity Detection (VAD) using pitch detection and RMS volume analysis
+ No model download required - pure algorithmic approach */
+export type GeneratorVadInferenceTraditional = Generator &
+  GeneratorVadInferenceTraditionalDef & {
+    templateKey: 'GENERATOR_TRADITIONAL_VAD'
+    switches: Array<
+      SwitchDef &
+        GeneratorVadInferenceTraditionalDef & {
+          conds?: Array<{
+            method: '==' | '!=' | '>' | '<' | '>=' | '<='
+            cond:
+              | SwitchCondInnerStateCurrentCanvas
+              | SwitchCondData
+              | {
+                  __typename: 'SwitchCondInnerStateOutlet'
+                  outlet:
+                    | 'contextState'
+                    | 'isDetecting'
+                    | 'isSpeaking'
+                    | 'detectionSegments'
+                    | 'currentVolume'
+                    | 'currentClarity'
+                    | 'currentFrequency'
+                  value: any
+                }
+          }>
+        }
+    >
+  }

package/types/generators/index.ts CHANGED Viewed

@@ -33,9 +33,11 @@ export * from './ThermalPrinter'
 export * from './SqLite'
 export * from './McpServer'
 export * from './Mcp'
+export * from './VadTraditional'
 export * from './TextToSpeechOnnx'
 export * from './LlmOnnx'
 export * from './SpeechToTextOnnx'
+export * from './VadOnnx'
 export * from './SpeechToTextGgml'
 export * from './VadGgml'
 export * from './RealtimeTranscription'

package/utils/event-props.ts CHANGED Viewed

@@ -733,6 +733,18 @@ export const templateEventPropsMap = {
       'GENERATOR_MCP_ERROR_MESSAGE', // type: string
     ],
   },
+  GENERATOR_TRADITIONAL_VAD: {
+    onContextStateChange: [
+      'GENERATOR_TRADITIONAL_VAD_CONTEXT_STATE', // type: string
+    ],
+    onDetected: [
+      'GENERATOR_TRADITIONAL_VAD_DETECTION_SEGMENTS', // type: array
+      'GENERATOR_TRADITIONAL_VAD_DETECTION_TIME', // type: number
+    ],
+    onError: [
+      'GENERATOR_TRADITIONAL_VAD_ERROR', // type: string
+    ],
+  },
   GENERATOR_TTS: {
     onContextStateChange: [
       'GENERATOR_TTS_CONTEXT_STATE', // type: string
@@ -767,6 +779,18 @@ export const templateEventPropsMap = {
       'GENERATOR_ONNX_STT_ERROR', // type: string
     ],
   },
+  GENERATOR_ONNX_VAD: {
+    onContextStateChange: [
+      'GENERATOR_ONNX_VAD_CONTEXT_STATE', // type: string
+    ],
+    onError: [
+      'GENERATOR_ONNX_VAD_ERROR', // type: string
+    ],
+    onDetected: [
+      'GENERATOR_ONNX_VAD_DETECTION_SEGMENTS', // type: array
+      'GENERATOR_ONNX_VAD_DETECTION_TIME', // type: number
+    ],
+  },
   GENERATOR_SPEECH_INFERENCE: {
     onContextStateChange: [
       'GENERATOR_SPEECH_INFERENCE_CONTEXT_STATE', // type: string
@@ -823,6 +847,9 @@ export const templateEventPropsMap = {
       'GENERATOR_REALTIME_TRANSCRIPTION_STATS_TIMESTAMP', // type: number
       'GENERATOR_REALTIME_TRANSCRIPTION_STATS', // type: object
     ],
+    onStabilized: [
+      'GENERATOR_REALTIME_TRANSCRIPTION_STABILIZED_TEXT', // type: string
+    ],
     onEnd: [
       'GENERATOR_REALTIME_TRANSCRIPTION_END_RESULTS', // type: array
       'GENERATOR_REALTIME_TRANSCRIPTION_END_AUDIO_OUTPUT_PATH', // type: string