@fugood/bricks-project 2.23.0 → 2.23.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -601,6 +601,14 @@ export const templateActionNameMap = {
601
601
  variables: 'GENERATOR_MCP_VARIABLES',
602
602
  },
603
603
  },
604
+ GENERATOR_TRADITIONAL_VAD: {
605
+ GENERATOR_TRADITIONAL_VAD_DETECT_FILE: {
606
+ fileUrl: 'GENERATOR_TRADITIONAL_VAD_FILE_URL',
607
+ },
608
+ GENERATOR_TRADITIONAL_VAD_DETECT_DATA: {
609
+ data: 'GENERATOR_TRADITIONAL_VAD_DATA',
610
+ },
611
+ },
604
612
  GENERATOR_TTS: {
605
613
  GENERATOR_TTS_GENERATE: {
606
614
  text: 'GENERATOR_TTS_TEXT',
@@ -621,6 +629,24 @@ export const templateActionNameMap = {
621
629
  audioUri: 'GENERATOR_ONNX_STT_AUDIO_URI',
622
630
  },
623
631
  },
632
+ GENERATOR_ONNX_VAD: {
633
+ GENERATOR_ONNX_VAD_DETECT_FILE: {
634
+ fileUrl: 'GENERATOR_ONNX_VAD_FILE_URL',
635
+ threshold: 'GENERATOR_ONNX_VAD_THRESHOLD',
636
+ minSpeechDurationMs: 'GENERATOR_ONNX_VAD_MIN_SPEECH_DURATION_MS',
637
+ minSilenceDurationMs: 'GENERATOR_ONNX_VAD_MIN_SILENCE_DURATION_MS',
638
+ maxSpeechDurationS: 'GENERATOR_ONNX_VAD_MAX_SPEECH_DURATION_S',
639
+ speechPadMs: 'GENERATOR_ONNX_VAD_SPEECH_PAD_MS',
640
+ },
641
+ GENERATOR_ONNX_VAD_DETECT_DATA: {
642
+ data: 'GENERATOR_ONNX_VAD_DATA',
643
+ threshold: 'GENERATOR_ONNX_VAD_THRESHOLD',
644
+ minSpeechDurationMs: 'GENERATOR_ONNX_VAD_MIN_SPEECH_DURATION_MS',
645
+ minSilenceDurationMs: 'GENERATOR_ONNX_VAD_MIN_SILENCE_DURATION_MS',
646
+ maxSpeechDurationS: 'GENERATOR_ONNX_VAD_MAX_SPEECH_DURATION_S',
647
+ speechPadMs: 'GENERATOR_ONNX_VAD_SPEECH_PAD_MS',
648
+ },
649
+ },
624
650
  GENERATOR_SPEECH_INFERENCE: {
625
651
  GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_FILE: {
626
652
  fileUrl: 'GENERATOR_SPEECH_INFERENCE_FILE_URL',
package/compile/index.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  /* eslint-disable no-underscore-dangle -- Uses __typename, __actionName, etc. for type system */
2
- import _ from 'lodash'
2
+ import snakeCase from 'lodash/snakeCase'
3
+ import omit from 'lodash/omit'
3
4
  import { parse as parseAST } from 'acorn'
4
5
  import type { ExportNamedDeclaration, FunctionDeclaration } from 'acorn'
5
6
  import escodegen from 'escodegen'
@@ -70,7 +71,7 @@ const compileEventActionValue = (templateKey, eventKey, value, errorReference) =
70
71
  }
71
72
 
72
73
  const convertOutletKey = (templateKey: string, key: string) =>
73
- `${templateKey}_${_.snakeCase(key).toUpperCase()}`
74
+ `${templateKey}_${snakeCase(key).toUpperCase()}`
74
75
 
75
76
  const compileOutlets = (
76
77
  templateKey: string,
@@ -84,7 +85,7 @@ const compileOutlets = (
84
85
  }, {})
85
86
 
86
87
  const convertEventKey = (templateKey: string, key: string) =>
87
- `${templateKey ? `${templateKey}_` : ''}${_.snakeCase(key).toUpperCase()}`
88
+ `${templateKey ? `${templateKey}_` : ''}${snakeCase(key).toUpperCase()}`
88
89
 
89
90
  const basicAnimationEvents = ['show', 'standby', 'breatheStart']
90
91
 
@@ -421,8 +422,9 @@ const compileAutomation = (automationMap: AutomationMap) =>
421
422
 
422
423
  export const compile = async (app: Application) => {
423
424
  await new Promise((resolve) => setImmediate(resolve, 0))
425
+ const timestamp = Date.now()
424
426
  const config = {
425
- title: app.name,
427
+ title: `${app.name || 'Unknown'}(${timestamp})`,
426
428
  subspace_map: app.subspaces.reduce((subspaceMap, subspace) => {
427
429
  subspaceMap[subspace.id] = {
428
430
  title: subspace.title,
@@ -459,7 +461,7 @@ export const compile = async (app: Application) => {
459
461
  property: animationDef.property,
460
462
  type: animationTypeMap[animationDef.config.__type],
461
463
  config: compileProperty(
462
- _.omit(animationDef.config, '__type'),
464
+ omit(animationDef.config, '__type'),
463
465
  `(animation: ${animation.id}, subspace ${subspace.id})`,
464
466
  ),
465
467
  }
@@ -781,6 +783,7 @@ export const compile = async (app: Application) => {
781
783
  title: dataCalc.title,
782
784
  description: dataCalc.description,
783
785
  }
786
+ if (dataCalc.triggerMode) calc.trigger_type = dataCalc.triggerMode
784
787
  if (dataCalc.__typename === 'DataCalculationMap') {
785
788
  calc.type = 'general'
786
789
  const mapCalc = dataCalc as DataCalculationMap
@@ -915,6 +918,7 @@ export const compile = async (app: Application) => {
915
918
  note: scriptCalc.note,
916
919
  code,
917
920
  enable_async: scriptCalc.enableAsync,
921
+ trigger_mode: scriptCalc.triggerMode,
918
922
  inputs: scriptCalc.inputs.reduce((acc, input) => {
919
923
  acc[input.data().id] = input.key
920
924
  return acc
@@ -967,6 +971,7 @@ export const compile = async (app: Application) => {
967
971
  automation_map: app.automationMap
968
972
  ? compileAutomation(app.automationMap)
969
973
  : app.metadata?.TEMP_automation_map || {},
974
+ update_timestamp: timestamp,
970
975
  }
971
976
  return config
972
977
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fugood/bricks-project",
3
- "version": "2.23.0",
3
+ "version": "2.23.2",
4
4
  "main": "index.ts",
5
5
  "scripts": {
6
6
  "build": "bun scripts/build.js"
@@ -8,6 +8,7 @@
8
8
  "dependencies": {
9
9
  "@fugood/bricks-cli": "^2.23.0",
10
10
  "@huggingface/gguf": "^0.3.2",
11
+ "@iarna/toml": "^3.0.0",
11
12
  "@modelcontextprotocol/sdk": "^1.15.0",
12
13
  "@toon-format/toon": "^2.1.0",
13
14
  "@types/escodegen": "^0.0.10",
@@ -17,6 +18,5 @@
17
18
  "fuse.js": "^7.0.0",
18
19
  "lodash": "^4.17.4",
19
20
  "uuid": "^8.3.1"
20
- },
21
- "gitHead": "398352b9923f97e914ac60acab519ca014aa6fb8"
21
+ }
22
22
  }
@@ -4,15 +4,16 @@ E2E testing and scheduled execution for BRICKS applications. Simulates user beha
4
4
 
5
5
  ## Automation Types
6
6
 
7
- | Type | Description |
8
- |------|-------------|
9
- | `launch` | Run on application launch (restarts app when run manually) |
10
- | `anytime` | Execute anytime via manual trigger |
11
- | `cron` | Scheduled execution using crontab expressions |
7
+ | Type | Description |
8
+ | --------- | ---------------------------------------------------------- |
9
+ | `launch` | Run on application launch (restarts app when run manually) |
10
+ | `anytime` | Execute anytime via manual trigger |
11
+ | `cron` | Scheduled execution using crontab expressions |
12
12
 
13
13
  ## Simulation Actions
14
14
 
15
15
  Automations can simulate:
16
+
16
17
  - **Brick Press**: Tap/click on bricks
17
18
  - **Key Events**: Key up/down for keyboard input
18
19
  - **HTTP Request**: API calls
@@ -21,6 +22,7 @@ Automations can simulate:
21
22
  ## Assertions
22
23
 
23
24
  Automations can validate:
25
+
24
26
  - **Brick Exists**: Check if brick is rendered
25
27
  - **Event Triggered**: Verify event from Brick/Generator/Canvas
26
28
  - **Canvas Changed**: Confirm canvas navigation
@@ -100,25 +102,58 @@ const testLoginFlow: AutomationTest = {
100
102
 
101
103
  ## Test Methods
102
104
 
103
- | Method | Signature | Description |
104
- |--------|-----------|-------------|
105
- | `brick_press` | `[subspace, brick, options?]` | Simulate brick press |
106
- | `brick_exists` | `[subspace, brick, frame?]` | Check brick exists |
107
- | `wait_until_brick_exists` | `[subspace, brick, timeout?, frame?]` | Wait for brick |
108
- | `wait_until_event_trigger` | `[subspace, sender, eventKey, timeout?]` | Wait for event |
109
- | `wait_until_canvas_change` | `[subspace, canvas, timeout?]` | Wait for canvas |
110
- | `keydown` | `[keyCode, pressedKey?, flags?]` | Key down event |
111
- | `keyup` | `[keyCode, pressedKey?, flags?]` | Key up event |
112
- | `http_request` | `[url, options?]` | HTTP request |
113
- | `assert_property` | `[subspace, property, value]` | Assert data value |
114
- | `wait_until_property_change` | `[subspace, property, value, timeout?]` | Wait for value |
115
- | `execute_action` | `[subspace, handler, action, params?, options?]` | Execute action |
116
- | `match_screenshot` | `[name, threshold?, maxRetry?]` | Screenshot compare |
117
- | `delay` | `[subspace?, property?, defaultValue?]` | Delay execution |
105
+ | Method | Signature | Description |
106
+ | ---------------------------- | ------------------------------------------------ | -------------------- |
107
+ | `brick_press` | `[subspace, brick, options?]` | Simulate brick press |
108
+ | `brick_exists` | `[subspace, brick, frame?]` | Check brick exists |
109
+ | `wait_until_brick_exists` | `[subspace, brick, timeout?, frame?]` | Wait for brick |
110
+ | `wait_until_event_trigger` | `[subspace, sender, eventKey, timeout?]` | Wait for event |
111
+ | `wait_until_canvas_change` | `[subspace, canvas, timeout?]` | Wait for canvas |
112
+ | `keydown` | `[keyCode, pressedKey?, flags?]` | Key down event |
113
+ | `keyup` | `[keyCode, pressedKey?, flags?]` | Key up event |
114
+ | `http_request` | `[url, options?]` | HTTP request |
115
+ | `assert_property` | `[subspace, property, value]` | Assert data value |
116
+ | `wait_until_property_change` | `[subspace, property, value, timeout?]` | Wait for value |
117
+ | `execute_action` | `[subspace, handler, action, params?, options?]` | Execute action |
118
+ | `match_screenshot` | `[name, threshold?, maxRetry?]` | Screenshot compare |
119
+ | `delay` | `[subspace?, property?, defaultValue?]` | Delay execution |
120
+
121
+ ### execute_action Params
122
+
123
+ The `params` object in `execute_action` uses **runtime event property keys** from `event-props.ts`, NOT the action config `input` names from type definitions.
124
+
125
+ ```typescript
126
+ // CORRECT — use runtime event property key
127
+ run: ['execute_action', () => subspace0, bricks.bInput.id, 'BRICK_TEXT_INPUT_SET_TEXT',
128
+ { BRICK_TEXT_INPUT_TEXT: 'hello' }]
129
+
130
+ // WRONG — action config input name doesn't work in automation
131
+ run: ['execute_action', () => subspace0, bricks.bInput.id, 'BRICK_TEXT_INPUT_SET_TEXT',
132
+ { text: 'hello' }]
133
+ ```
134
+
135
+ Reference `event-props.ts` for the correct runtime keys (e.g., `BRICK_TEXT_INPUT_TEXT`, `GENERATOR_MQTT_PAYLOAD`).
136
+
137
+ ### Prefer UI Interactions Over Direct Generator Calls
138
+
139
+ For realistic E2E testing, prefer simulating user actions (set text input + press button) over calling generator actions directly:
140
+
141
+ ```typescript
142
+ // GOOD — simulates real user behavior
143
+ { run: ['execute_action', () => sub, bricks.bInput.id, 'BRICK_TEXT_INPUT_SET_TEXT',
144
+ { BRICK_TEXT_INPUT_TEXT: 'hello' }] },
145
+ { run: ['brick_press', () => sub, () => bricks.bSendBtn] },
146
+ { run: ['wait_until_property_change', () => sub, () => data.dPayload, 'hello', 10000] },
147
+
148
+ // AVOID — bypasses UI, doesn't test the full flow
149
+ { run: ['execute_action', () => sub, generators.gClient.id, 'GENERATOR_MQTT_PUBLISH',
150
+ { topic: 'test', payload: 'hello', qos: '0' }] },
151
+ ```
118
152
 
119
153
  ## Recording Automations
120
154
 
121
155
  In BRICKS Editor Preview mode:
156
+
122
157
  1. Perform operations normally
123
158
  2. Open menu (right-bottom corner)
124
159
  3. Select "Record Events as Automation"
@@ -127,12 +162,15 @@ In BRICKS Editor Preview mode:
127
162
  ## Running Automations
128
163
 
129
164
  ### Manual Run
165
+
130
166
  `Menu` → `Automations` → Select automation → `Run`
131
167
 
132
168
  ### On Launch
169
+
133
170
  `Bind Device` → `Select Automation` (only `launch` or `cron` types)
134
171
 
135
172
  ### Scheduled (Cron)
173
+
136
174
  `Bind Device` → `Cron Automation` (allows multi-select)
137
175
 
138
176
  Use [crontab.guru](https://crontab.guru) to build cron expressions.
@@ -156,6 +194,7 @@ Visual regression testing with screenshot comparison:
156
194
  ```
157
195
 
158
196
  Screenshots can be stored:
197
+
159
198
  - Local file system
160
199
  - Media Flow workspace
161
200
 
@@ -165,11 +204,18 @@ First run captures baseline. Use "Run with Update" to update baseline.
165
204
 
166
205
  Automations work with Modules. Use Manual Run in Preview mode for module testing.
167
206
 
207
+ ## Important Notes
208
+
209
+ - **Automation map key**: Always use `'AUTOMATION_MAP_DEFAULT'` as the automation map ID (not `makeId()`). The preview test runner reads from `automationMap['AUTOMATION_MAP_DEFAULT']?.map`.
210
+ - **Valid makeId types**: Use `'test'` for AutomationTest, `'test_case'` for TestCase, `'test_var'` for TestVariable. Do NOT use `'automation_test'` or `'automation_test_map'`.
211
+ - **handler in execute_action**: Pass the entity's `.id` string (e.g., `bricks.bInput.id`), not a getter function.
212
+
168
213
  ## Best Practices
169
214
 
170
215
  1. **Test culture**: Create automations for every significant flow
171
216
  2. **CI/CD integration**: Use `launch` automations for deployment validation
172
- 3. **Incremental waits**: Use `EXPECT_*` steps with appropriate timeouts
217
+ 3. **Incremental waits**: Use `wait_until_property_change` with appropriate timeouts
173
218
  4. **Visual testing**: Add screenshot comparisons for critical UI states
174
219
  5. **Cron monitoring**: Schedule health checks for production displays
175
220
  6. **Isolation**: Each automation should be independent and idempotent
221
+ 7. **UI-first testing**: Simulate real user interactions (text input, button press) rather than calling generators directly
@@ -1,7 +1,7 @@
1
1
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
2
2
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
3
3
  import { z } from 'zod'
4
- import { $ } from 'bun'
4
+ import { $, JSON5 } from 'bun'
5
5
  import * as TOON from '@toon-format/toon'
6
6
  import Fuse from 'fuse.js'
7
7
  import { gguf } from '@huggingface/gguf'
@@ -729,7 +729,7 @@ server.tool(
729
729
 
730
730
  // Return JSON for huggingface_select to allow direct parsing by consumers
731
731
  return {
732
- content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
732
+ content: [{ type: 'text', text: JSON5.stringify(result, null, 2) }],
733
733
  }
734
734
  }
735
735
 
@@ -842,7 +842,7 @@ server.tool(
842
842
 
843
843
  // Return JSON for huggingface_select to allow direct parsing by consumers
844
844
  return {
845
- content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
845
+ content: [{ type: 'text', text: JSON5.stringify(result, null, 2) }],
846
846
  }
847
847
  } else {
848
848
  const result = {
@@ -875,7 +875,12 @@ server.tool(
875
875
 
876
876
  // Return JSON for huggingface_select to allow direct parsing by consumers
877
877
  return {
878
- content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
878
+ content: [
879
+ {
880
+ type: 'text',
881
+ text: `${JSON5.stringify(result, null, 2)} // Please use _hfRepoInfo undocumented field with @ts-ignore`,
882
+ },
883
+ ],
879
884
  }
880
885
  }
881
886
  } catch (err: any) {
@@ -1,5 +1,6 @@
1
1
  import { $ } from 'bun'
2
2
  import { stat, readFile, writeFile, readdir } from 'fs/promises'
3
+ import TOML from '@iarna/toml'
3
4
 
4
5
  const cwd = process.cwd()
5
6
 
@@ -63,9 +64,7 @@ if (hasClaudeCode || hasAgentsMd) {
63
64
  await handleMcpConfigOverride(mcpConfigPath)
64
65
  }
65
66
 
66
- if (hasClaudeCode) {
67
- // Install skills that don't already exist in the project
68
- const skillsDir = `${cwd}/.claude/skills`
67
+ const setupSkills = async (skillsDir) => {
69
68
  const packageSkillsDir = `${__dirname}/../skills`
70
69
 
71
70
  if (await exists(packageSkillsDir)) {
@@ -81,10 +80,53 @@ if (hasClaudeCode) {
81
80
  console.log(`Skill '${skill}' already exists, skipping`)
82
81
  } else {
83
82
  await $`cp -r ${packageSkillsDir}/${skill} ${targetSkillDir}`
84
- console.log(`Installed skill '${skill}' to .claude/skills/`)
83
+ console.log(`Installed skill '${skill}' to ${skillsDir}/`)
85
84
  }
86
85
  }),
87
86
  )
88
87
  }
89
- // TODO: .codex/skills, .cursor/skills if needed
90
88
  }
89
+
90
+ if (hasClaudeCode) {
91
+ // Install skills that don't already exist in the project
92
+ await setupSkills(`${cwd}/.claude/skills`)
93
+ }
94
+
95
+ if (hasAgentsMd) {
96
+ // Handle codex skills
97
+ // Currently no signal file for codex skills, so we just check if AGENTS.md exists
98
+ await setupSkills(`${cwd}/.codex/skills`)
99
+
100
+ const defaultCodexMcpConfig = {
101
+ mcp_servers: {
102
+ 'bricks-project': projectMcpServer,
103
+ },
104
+ }
105
+
106
+ const handleCodexMcpConfigOverride = async (mcpConfigPath: string) => {
107
+ let mcpConfig: { mcp_servers: Record<string, typeof projectMcpServer> } | null = null
108
+ if (await exists(mcpConfigPath)) {
109
+ const configStr = await readFile(mcpConfigPath, 'utf-8')
110
+ try {
111
+ mcpConfig = TOML.parse(configStr)
112
+ if (!mcpConfig?.mcp_servers) throw new Error('mcp_servers is not defined')
113
+ mcpConfig.mcp_servers['bricks-project'] = projectMcpServer
114
+ } catch (e) {
115
+ mcpConfig = defaultCodexMcpConfig
116
+ }
117
+ } else {
118
+ mcpConfig = defaultCodexMcpConfig
119
+ }
120
+
121
+ await writeFile(mcpConfigPath, `${TOML.stringify(mcpConfig, null, 2)}\n`)
122
+
123
+ console.log(`Updated ${mcpConfigPath}`)
124
+ }
125
+
126
+ // Setup MCP config (.codex/config.toml)
127
+ const codexConfigPath = `${cwd}/.codex/config.toml`
128
+ await handleCodexMcpConfigOverride(codexConfigPath)
129
+ }
130
+
131
+ // TODO: .cursor/skills if needed
132
+ // TODO: User setting in application.json to avoid unnecessary skills/config setup
@@ -42,13 +42,14 @@ let config = JSON.parse(await readFile(`${cwd}/.bricks/build/application-config.
42
42
  let testId = values['test-id'] || null
43
43
  if (!testId && values['test-title-like']) {
44
44
  const titleLike = values['test-title-like'].toLowerCase()
45
- const testMap = config.test_map || {}
46
- const found = Object.entries(testMap).find(([, test]) =>
47
- test.title?.toLowerCase().includes(titleLike),
48
- )
49
- if (found) {
50
- ;[testId] = found
51
- } else {
45
+ const automationMap = config.automation_map || {}
46
+ const matchedEntry = Object.values(automationMap)
47
+ .flatMap((group) => Object.entries(group.map || {}))
48
+ .find(([, test]) => test.title?.toLowerCase().includes(titleLike))
49
+ if (matchedEntry) {
50
+ ;[testId] = matchedEntry
51
+ }
52
+ if (!testId) {
52
53
  throw new Error(`No automation found matching title: ${values['test-title-like']}`)
53
54
  }
54
55
  }
package/tools/preview.ts CHANGED
@@ -2,7 +2,7 @@ import { $ } from 'bun'
2
2
  import { watch } from 'fs'
3
3
  import type { FSWatcher } from 'fs'
4
4
  import { parseArgs } from 'util'
5
- import { debounce } from 'lodash'
5
+ import debounce from 'lodash/debounce'
6
6
 
7
7
  const { values } = parseArgs({
8
8
  args: Bun.argv,
@@ -41,10 +41,10 @@ Default property:
41
41
  "audioSliceSec": 30,
42
42
  "audioMinSec": 1,
43
43
  "maxSlicesInMemory": 5,
44
+ "transcribeProcessingPauseMs": 500,
45
+ "initTranscribeAfterMs": 500,
44
46
  "vadStrategy": "use-preset",
45
47
  "vadPreset": "default",
46
- "autoSliceOnSpeechEnd": true,
47
- "autoSliceThreshold": 2,
48
48
  "initialPrompt": "",
49
49
  "promptPreviousSlices": false,
50
50
  "saveAudio": true,
@@ -73,6 +73,10 @@ Default property:
73
73
  audioMinSec?: number | DataLink
74
74
  /* Maximum number of slices to keep in memory */
75
75
  maxSlicesInMemory?: number | DataLink
76
+ /* Transcribe processing interval in milliseconds */
77
+ transcribeProcessingPauseMs?: number | DataLink
78
+ /* Transcribe processing init after pause in milliseconds */
79
+ initTranscribeAfterMs?: number | DataLink
76
80
  /* VAD Strategy */
77
81
  vadStrategy?: 'use-preset' | 'use-generator-options' | DataLink
78
82
  /* VAD preset configuration */
@@ -86,10 +90,6 @@ Default property:
86
90
  | 'meeting'
87
91
  | 'noisy-environment'
88
92
  | DataLink
89
- /* Auto slice on speech end */
90
- autoSliceOnSpeechEnd?: boolean | DataLink
91
- /* Auto slice threshold in seconds */
92
- autoSliceThreshold?: number | DataLink
93
93
  /* Initial prompt for transcription */
94
94
  initialPrompt?: string | DataLink
95
95
  /* Include previous slices in prompt */
@@ -128,6 +128,8 @@ Default property:
128
128
  onStatusChange?: Array<EventAction>
129
129
  /* Event triggered when statistics update */
130
130
  onStatsUpdate?: Array<EventAction>
131
+ /* Event triggered when slice transcription is stabilized */
132
+ onStabilized?: Array<EventAction>
131
133
  /* Event triggered when transcription ends */
132
134
  onEnd?: Array<EventAction>
133
135
  }
@@ -140,12 +142,16 @@ Default property:
140
142
  results?: () => Data
141
143
  /* Current transcription result text */
142
144
  resultText?: () => Data
145
+ /* Last stabilized transcription result segment */
146
+ lastStabilizedSegment?: () => Data
143
147
  /* Current statistics */
144
148
  statistics?: () => Data
145
149
  /* Latest transcribe event */
146
150
  lastTranscribeEvent?: () => Data
147
151
  /* Latest VAD event */
148
152
  lastVadEvent?: () => Data
153
+ /* Stabilized transcription text from completed slices */
154
+ stabilizedText?: () => Data
149
155
  /* Audio output file path (auto-generated when saving audio) */
150
156
  audioOutputPath?: () => Data
151
157
  }
@@ -170,9 +176,11 @@ export type GeneratorRealtimeTranscription = Generator &
170
176
  | 'isTranscribing'
171
177
  | 'results'
172
178
  | 'resultText'
179
+ | 'lastStabilizedSegment'
173
180
  | 'statistics'
174
181
  | 'lastTranscribeEvent'
175
182
  | 'lastVadEvent'
183
+ | 'stabilizedText'
176
184
  | 'audioOutputPath'
177
185
  value: any
178
186
  }
@@ -147,6 +147,11 @@ export type GeneratorSpeechInferenceActionTranscribeRealtime = ActionWithParams
147
147
  >
148
148
  }
149
149
 
150
+ /* Stop current transcription */
151
+ export type GeneratorSpeechInferenceActionTranscribeStop = Action & {
152
+ __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_STOP'
153
+ }
154
+
150
155
  /* [Deprecated] Stop transcribing microphone audio source */
151
156
  export type GeneratorSpeechInferenceActionTranscribeRealtimeStop = Action & {
152
157
  __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME_STOP'
@@ -0,0 +1,201 @@
1
+ /* Auto generated by build script */
2
+ import type { SwitchCondInnerStateCurrentCanvas, SwitchCondData, SwitchDef } from '../switch'
3
+ import type { Data, DataLink } from '../data'
4
+ import type {
5
+ Brick,
6
+ Generator,
7
+ EventAction,
8
+ ActionWithDataParams,
9
+ ActionWithParams,
10
+ Action,
11
+ EventProperty,
12
+ } from '../common'
13
+
14
+ /* Load the model */
15
+ export type GeneratorVadInferenceOnnxActionLoadModel = Action & {
16
+ __actionName: 'GENERATOR_ONNX_VAD_LOAD_MODEL'
17
+ }
18
+
19
+ /* Detect speech in audio file */
20
+ export type GeneratorVadInferenceOnnxActionDetectFile = ActionWithParams & {
21
+ __actionName: 'GENERATOR_ONNX_VAD_DETECT_FILE'
22
+ params?: Array<
23
+ | {
24
+ input: 'fileUrl'
25
+ value?: string | DataLink | EventProperty
26
+ mapping?: string
27
+ }
28
+ | {
29
+ input: 'threshold'
30
+ value?: number | DataLink | EventProperty
31
+ mapping?: string
32
+ }
33
+ | {
34
+ input: 'minSpeechDurationMs'
35
+ value?: number | DataLink | EventProperty
36
+ mapping?: string
37
+ }
38
+ | {
39
+ input: 'minSilenceDurationMs'
40
+ value?: number | DataLink | EventProperty
41
+ mapping?: string
42
+ }
43
+ | {
44
+ input: 'maxSpeechDurationS'
45
+ value?: number | DataLink | EventProperty
46
+ mapping?: string
47
+ }
48
+ | {
49
+ input: 'speechPadMs'
50
+ value?: number | DataLink | EventProperty
51
+ mapping?: string
52
+ }
53
+ >
54
+ }
55
+
56
+ /* Detect speech in audio data */
57
+ export type GeneratorVadInferenceOnnxActionDetectData = ActionWithParams & {
58
+ __actionName: 'GENERATOR_ONNX_VAD_DETECT_DATA'
59
+ params?: Array<
60
+ | {
61
+ input: 'data'
62
+ value?: any | EventProperty
63
+ mapping?: string
64
+ }
65
+ | {
66
+ input: 'threshold'
67
+ value?: number | DataLink | EventProperty
68
+ mapping?: string
69
+ }
70
+ | {
71
+ input: 'minSpeechDurationMs'
72
+ value?: number | DataLink | EventProperty
73
+ mapping?: string
74
+ }
75
+ | {
76
+ input: 'minSilenceDurationMs'
77
+ value?: number | DataLink | EventProperty
78
+ mapping?: string
79
+ }
80
+ | {
81
+ input: 'maxSpeechDurationS'
82
+ value?: number | DataLink | EventProperty
83
+ mapping?: string
84
+ }
85
+ | {
86
+ input: 'speechPadMs'
87
+ value?: number | DataLink | EventProperty
88
+ mapping?: string
89
+ }
90
+ >
91
+ }
92
+
93
+ /* Clean cache */
94
+ export type GeneratorVadInferenceOnnxActionCleanCache = Action & {
95
+ __actionName: 'GENERATOR_ONNX_VAD_CLEAN_CACHE'
96
+ }
97
+
98
+ /* Release context */
99
+ export type GeneratorVadInferenceOnnxActionReleaseContext = Action & {
100
+ __actionName: 'GENERATOR_ONNX_VAD_RELEASE_CONTEXT'
101
+ }
102
+
103
+ interface GeneratorVadInferenceOnnxDef {
104
+ /*
105
+ Default property:
106
+ {
107
+ "modelType": "auto",
108
+ "detectThreshold": 0.5,
109
+ "detectMinSpeechDurationMs": 250,
110
+ "detectMinSilenceDurationMs": 100,
111
+ "detectMaxSpeechDurationS": 30,
112
+ "detectSpeechPadMs": 30,
113
+ "executionMode": "sequential"
114
+ }
115
+ */
116
+ property?: {
117
+ /* Initialize the VAD context on generator initialization */
118
+ init?: boolean | DataLink
119
+ /* VAD model */
120
+ model?: string | DataLink
121
+ /* Model type (auto-detected from config.json) */
122
+ modelType?: string | DataLink
123
+ /* Quantize type */
124
+ quantizeType?:
125
+ | 'auto'
126
+ | 'none'
127
+ | 'fp16'
128
+ | 'q8'
129
+ | 'int8'
130
+ | 'uint8'
131
+ | 'q4'
132
+ | 'bnb4'
133
+ | 'q4f16'
134
+ | DataLink
135
+ /* Speech probability threshold (0.0-1.0) */
136
+ detectThreshold?: number | DataLink
137
+ /* Minimum speech duration in milliseconds */
138
+ detectMinSpeechDurationMs?: number | DataLink
139
+ /* Minimum silence duration in milliseconds */
140
+ detectMinSilenceDurationMs?: number | DataLink
141
+ /* Maximum speech duration in seconds */
142
+ detectMaxSpeechDurationS?: number | DataLink
143
+ /* Padding around speech segments in milliseconds */
144
+ detectSpeechPadMs?: number | DataLink
145
+ /* Executor candidates, descending order of priority
146
+ Default will be xnnpack, wasm, cpu */
147
+ executors?:
148
+ | Array<'qnn' | 'dml' | 'nnapi' | 'xnnpack' | 'coreml' | 'cpu' | 'wasm' | 'webgpu' | DataLink>
149
+ | DataLink
150
+ /* Execution mode
151
+ Usually when the model has many branches, setting this option to `parallel` will give you better performance. */
152
+ executionMode?: 'sequential' | 'parallel' | DataLink
153
+ /* QNN backend */
154
+ qnnBackend?: 'HTP' | 'HTA' | 'DSP' | 'GPU' | 'CPU' | DataLink
155
+ /* Enable FP16 for QNN HTP */
156
+ qnnHtpEnableFp16?: boolean | DataLink
157
+ /* Enable QNN debug */
158
+ qnnEnableDebug?: boolean | DataLink
159
+ }
160
+ events?: {
161
+ /* Event triggered when context state changes */
162
+ onContextStateChange?: Array<EventAction>
163
+ /* Event triggered when error occurs */
164
+ onError?: Array<EventAction>
165
+ /* Event triggered when got detection result */
166
+ onDetected?: Array<EventAction>
167
+ }
168
+ outlets?: {
169
+ /* Context state */
170
+ contextState?: () => Data
171
+ /* Is detecting */
172
+ isDetecting?: () => Data
173
+ /* Detection segments result */
174
+ detectionSegments?: () => Data
175
+ /* Detection details */
176
+ detectionDetails?: () => Data
177
+ }
178
+ }
179
+
180
+ /* Local Voice Activity Detection (VAD) inference based on [transformers.js](https://huggingface.co/docs/transformers.js)
181
+ You can use any compatible VAD model from HuggingFace (Silero VAD, smart-turn, etc.) */
182
+ export type GeneratorVadInferenceOnnx = Generator &
183
+ GeneratorVadInferenceOnnxDef & {
184
+ templateKey: 'GENERATOR_ONNX_VAD'
185
+ switches: Array<
186
+ SwitchDef &
187
+ GeneratorVadInferenceOnnxDef & {
188
+ conds?: Array<{
189
+ method: '==' | '!=' | '>' | '<' | '>=' | '<='
190
+ cond:
191
+ | SwitchCondInnerStateCurrentCanvas
192
+ | SwitchCondData
193
+ | {
194
+ __typename: 'SwitchCondInnerStateOutlet'
195
+ outlet: 'contextState' | 'isDetecting' | 'detectionSegments' | 'detectionDetails'
196
+ value: any
197
+ }
198
+ }>
199
+ }
200
+ >
201
+ }
@@ -0,0 +1,123 @@
1
+ /* Auto generated by build script */
2
+ import type { SwitchCondInnerStateCurrentCanvas, SwitchCondData, SwitchDef } from '../switch'
3
+ import type { Data, DataLink } from '../data'
4
+ import type {
5
+ Brick,
6
+ Generator,
7
+ EventAction,
8
+ ActionWithDataParams,
9
+ ActionWithParams,
10
+ Action,
11
+ EventProperty,
12
+ } from '../common'
13
+
14
+ /* Detect speech in audio file */
15
+ export type GeneratorVadInferenceTraditionalActionDetectFile = ActionWithParams & {
16
+ __actionName: 'GENERATOR_TRADITIONAL_VAD_DETECT_FILE'
17
+ params?: Array<{
18
+ input: 'fileUrl'
19
+ value?: string | DataLink | EventProperty
20
+ mapping?: string
21
+ }>
22
+ }
23
+
24
+ /* Detect speech in audio data stream */
25
+ export type GeneratorVadInferenceTraditionalActionDetectData = ActionWithParams & {
26
+ __actionName: 'GENERATOR_TRADITIONAL_VAD_DETECT_DATA'
27
+ params?: Array<{
28
+ input: 'data'
29
+ value?: any | EventProperty
30
+ mapping?: string
31
+ }>
32
+ }
33
+
34
+ interface GeneratorVadInferenceTraditionalDef {
35
+ /*
36
+ Default property:
37
+ {
38
+ "detectVocalFreqMin": 75,
39
+ "detectVocalFreqMax": 900,
40
+ "detectThreshold": 0.5,
41
+ "detectMinSpeechDurationMs": 250,
42
+ "detectMinSilenceDurationMs": 100,
43
+ "detectMaxSpeechDurationS": 30,
44
+ "detectSpeechPadMs": 30
45
+ }
46
+ */
47
+ property?: {
48
+ /* Minimum vocal frequency in Hz */
49
+ detectVocalFreqMin?: number | DataLink
50
+ /* Maximum vocal frequency in Hz */
51
+ detectVocalFreqMax?: number | DataLink
52
+ /* Volume threshold in dB */
53
+ detectVolumeThreshold?: number | DataLink
54
+ /* Speech probability threshold (0.0-1.0) - maps to frequency clarity */
55
+ detectThreshold?: number | DataLink
56
+ /* Minimum speech duration in milliseconds */
57
+ detectMinSpeechDurationMs?: number | DataLink
58
+ /* Minimum silence duration in milliseconds */
59
+ detectMinSilenceDurationMs?: number | DataLink
60
+ /* Maximum speech duration in seconds */
61
+ detectMaxSpeechDurationS?: number | DataLink
62
+ /* Padding around speech segments in milliseconds */
63
+ detectSpeechPadMs?: number | DataLink
64
+ /* The file URL or path to be analyzed */
65
+ detectFileUrl?: string | DataLink
66
+ /* MD5 of file to be analyzed */
67
+ detectFileMd5?: string | DataLink
68
+ }
69
+ events?: {
70
+ /* Event triggered when context state changes */
71
+ onContextStateChange?: Array<EventAction>
72
+ /* Event triggered when detection result is available */
73
+ onDetected?: Array<EventAction>
74
+ /* Event triggered when error occurs */
75
+ onError?: Array<EventAction>
76
+ }
77
+ outlets?: {
78
+ /* Context state */
79
+ contextState?: () => Data
80
+ /* Is detecting */
81
+ isDetecting?: () => Data
82
+ /* Is speaking (real-time) */
83
+ isSpeaking?: () => Data
84
+ /* Detection segments result */
85
+ detectionSegments?: () => Data
86
+ /* Current volume in dB */
87
+ currentVolume?: () => Data
88
+ /* Current frequency clarity (0-1) */
89
+ currentClarity?: () => Data
90
+ /* Current detected frequency in Hz */
91
+ currentFrequency?: () => Data
92
+ }
93
+ }
94
+
95
+ /* Traditional Voice Activity Detection (VAD) using pitch detection and RMS volume analysis
96
+ No model download required - pure algorithmic approach */
97
+ export type GeneratorVadInferenceTraditional = Generator &
98
+ GeneratorVadInferenceTraditionalDef & {
99
+ templateKey: 'GENERATOR_TRADITIONAL_VAD'
100
+ switches: Array<
101
+ SwitchDef &
102
+ GeneratorVadInferenceTraditionalDef & {
103
+ conds?: Array<{
104
+ method: '==' | '!=' | '>' | '<' | '>=' | '<='
105
+ cond:
106
+ | SwitchCondInnerStateCurrentCanvas
107
+ | SwitchCondData
108
+ | {
109
+ __typename: 'SwitchCondInnerStateOutlet'
110
+ outlet:
111
+ | 'contextState'
112
+ | 'isDetecting'
113
+ | 'isSpeaking'
114
+ | 'detectionSegments'
115
+ | 'currentVolume'
116
+ | 'currentClarity'
117
+ | 'currentFrequency'
118
+ value: any
119
+ }
120
+ }>
121
+ }
122
+ >
123
+ }
@@ -33,9 +33,11 @@ export * from './ThermalPrinter'
33
33
  export * from './SqLite'
34
34
  export * from './McpServer'
35
35
  export * from './Mcp'
36
+ export * from './VadTraditional'
36
37
  export * from './TextToSpeechOnnx'
37
38
  export * from './LlmOnnx'
38
39
  export * from './SpeechToTextOnnx'
40
+ export * from './VadOnnx'
39
41
  export * from './SpeechToTextGgml'
40
42
  export * from './VadGgml'
41
43
  export * from './RealtimeTranscription'
package/utils/calc.ts CHANGED
@@ -33,10 +33,11 @@ export const generateDataCalculationMapEditorInfo = (
33
33
  nodes.forEach((node) => {
34
34
  // Count and track inputs
35
35
  if ('inputs' in node) {
36
- const inputs = node.inputs
37
- .filter((input) => input !== null)
38
- .map((input) => (Array.isArray(input) ? input.length : 1))
39
- .reduce((sum, count) => sum + count, 0)
36
+ const inputs = node.inputs.reduce((count, input) => {
37
+ if (input === null) return count
38
+ if (Array.isArray(input)) return count + input.length
39
+ return count + 1
40
+ }, 0)
40
41
  inputCounts.set(node, inputs)
41
42
 
42
43
  // Track connections
@@ -59,10 +60,11 @@ export const generateDataCalculationMapEditorInfo = (
59
60
 
60
61
  // Count outputs
61
62
  if ('outputs' in node) {
62
- const outputs = node.outputs
63
- .filter((output) => output !== null)
64
- .map((output) => (Array.isArray(output) ? output.length : 1))
65
- .reduce((sum, count) => sum + count, 0)
63
+ const outputs = node.outputs.reduce((count, output) => {
64
+ if (output === null) return count
65
+ if (Array.isArray(output)) return count + output.length
66
+ return count + 1
67
+ }, 0)
66
68
  outputCounts.set(node, outputs)
67
69
  } else {
68
70
  outputCounts.set(node, 0)
@@ -733,6 +733,18 @@ export const templateEventPropsMap = {
733
733
  'GENERATOR_MCP_ERROR_MESSAGE', // type: string
734
734
  ],
735
735
  },
736
+ GENERATOR_TRADITIONAL_VAD: {
737
+ onContextStateChange: [
738
+ 'GENERATOR_TRADITIONAL_VAD_CONTEXT_STATE', // type: string
739
+ ],
740
+ onDetected: [
741
+ 'GENERATOR_TRADITIONAL_VAD_DETECTION_SEGMENTS', // type: array
742
+ 'GENERATOR_TRADITIONAL_VAD_DETECTION_TIME', // type: number
743
+ ],
744
+ onError: [
745
+ 'GENERATOR_TRADITIONAL_VAD_ERROR', // type: string
746
+ ],
747
+ },
736
748
  GENERATOR_TTS: {
737
749
  onContextStateChange: [
738
750
  'GENERATOR_TTS_CONTEXT_STATE', // type: string
@@ -767,6 +779,18 @@ export const templateEventPropsMap = {
767
779
  'GENERATOR_ONNX_STT_ERROR', // type: string
768
780
  ],
769
781
  },
782
+ GENERATOR_ONNX_VAD: {
783
+ onContextStateChange: [
784
+ 'GENERATOR_ONNX_VAD_CONTEXT_STATE', // type: string
785
+ ],
786
+ onError: [
787
+ 'GENERATOR_ONNX_VAD_ERROR', // type: string
788
+ ],
789
+ onDetected: [
790
+ 'GENERATOR_ONNX_VAD_DETECTION_SEGMENTS', // type: array
791
+ 'GENERATOR_ONNX_VAD_DETECTION_TIME', // type: number
792
+ ],
793
+ },
770
794
  GENERATOR_SPEECH_INFERENCE: {
771
795
  onContextStateChange: [
772
796
  'GENERATOR_SPEECH_INFERENCE_CONTEXT_STATE', // type: string
@@ -823,6 +847,9 @@ export const templateEventPropsMap = {
823
847
  'GENERATOR_REALTIME_TRANSCRIPTION_STATS_TIMESTAMP', // type: number
824
848
  'GENERATOR_REALTIME_TRANSCRIPTION_STATS', // type: object
825
849
  ],
850
+ onStabilized: [
851
+ 'GENERATOR_REALTIME_TRANSCRIPTION_STABILIZED_TEXT', // type: string
852
+ ],
826
853
  onEnd: [
827
854
  'GENERATOR_REALTIME_TRANSCRIPTION_END_RESULTS', // type: array
828
855
  'GENERATOR_REALTIME_TRANSCRIPTION_END_AUDIO_OUTPUT_PATH', // type: string