@dxos/plugin-transformer 0.8.4-main.d05673bc65 → 0.8.4-main.d9fc60f731
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +102 -5
- package/PLUGIN.mdl +326 -0
- package/README.md +1 -1
- package/dist/lib/neutral/TransformerPlugin.mjs +38 -0
- package/dist/lib/neutral/TransformerPlugin.mjs.map +7 -0
- package/dist/lib/neutral/chunk-SFEKXMAC.mjs +36 -0
- package/dist/lib/neutral/chunk-SFEKXMAC.mjs.map +7 -0
- package/dist/lib/neutral/components/index.mjs +136 -0
- package/dist/lib/neutral/components/index.mjs.map +7 -0
- package/dist/lib/neutral/hooks/index.mjs +339 -0
- package/dist/lib/neutral/hooks/index.mjs.map +7 -0
- package/dist/lib/neutral/index.mjs +7 -0
- package/dist/lib/neutral/meta.json +1 -0
- package/dist/lib/neutral/meta.mjs +7 -0
- package/dist/lib/neutral/plugin.mjs +11 -0
- package/dist/lib/neutral/plugin.mjs.map +7 -0
- package/dist/lib/neutral/translations.mjs +15 -0
- package/dist/lib/neutral/translations.mjs.map +7 -0
- package/dist/types/src/TransformerPlugin.d.ts +1 -0
- package/dist/types/src/TransformerPlugin.d.ts.map +1 -1
- package/dist/types/src/TransformerPlugin.test.d.ts +2 -0
- package/dist/types/src/TransformerPlugin.test.d.ts.map +1 -0
- package/dist/types/src/components/Voice/Voice.d.ts.map +1 -1
- package/dist/types/src/components/Voice/Voice.stories.d.ts.map +1 -1
- package/dist/types/src/hooks/useAudioStream.d.ts.map +1 -1
- package/dist/types/src/hooks/usePipeline.d.ts +1 -16
- package/dist/types/src/hooks/usePipeline.d.ts.map +1 -1
- package/dist/types/src/index.d.ts +0 -1
- package/dist/types/src/index.d.ts.map +1 -1
- package/dist/types/src/meta.d.ts +1 -1
- package/dist/types/src/meta.d.ts.map +1 -1
- package/dist/types/src/plugin.d.ts +3 -0
- package/dist/types/src/plugin.d.ts.map +1 -0
- package/dist/types/src/testing/node-pipeline.d.ts +1 -1
- package/dist/types/src/testing/node-pipeline.d.ts.map +1 -1
- package/dist/types/src/testing/pipeline.d.ts.map +1 -1
- package/dist/types/src/testing/web-pipeline.d.ts +1 -1
- package/dist/types/src/testing/web-pipeline.d.ts.map +1 -1
- package/dist/types/src/translations.d.ts +2 -3
- package/dist/types/src/translations.d.ts.map +1 -1
- package/dist/types/tsconfig.tsbuildinfo +1 -1
- package/package.json +60 -43
- package/src/TransformerPlugin.test.ts +25 -0
- package/src/TransformerPlugin.tsx +11 -3
- package/src/components/Voice/Voice.tsx +1 -1
- package/src/hooks/usePipeline.ts +9 -33
- package/src/index.ts +0 -2
- package/src/meta.ts +21 -6
- package/src/plugin.ts +9 -0
- package/src/translations.ts +2 -2
- package/src/vite-env.d.ts +10 -0
- package/dist/lib/browser/index.mjs +0 -52
- package/dist/lib/browser/index.mjs.map +0 -7
- package/dist/lib/browser/meta.json +0 -1
- package/dist/lib/browser/types/index.mjs +0 -1
- package/dist/lib/node-esm/index.mjs +0 -54
- package/dist/lib/node-esm/index.mjs.map +0 -7
- package/dist/lib/node-esm/meta.json +0 -1
- package/dist/lib/node-esm/types/index.mjs +0 -2
- package/dist/types/src/capabilities/index.d.ts +0 -1
- package/dist/types/src/capabilities/index.d.ts.map +0 -1
- package/src/capabilities/index.ts +0 -3
- /package/dist/lib/{browser/types → neutral}/index.mjs.map +0 -0
- /package/dist/lib/{node-esm/types/index.mjs.map → neutral/meta.mjs.map} +0 -0
package/LICENSE
CHANGED
|
@@ -1,8 +1,105 @@
|
|
|
1
|
-
|
|
2
|
-
Copyright (c) 2022 DXOS
|
|
1
|
+
# Functional Source License, Version 1.1, ALv2 Future License
|
|
3
2
|
|
|
4
|
-
|
|
3
|
+
## Abbreviation
|
|
5
4
|
|
|
6
|
-
|
|
5
|
+
FSL-1.1-Apache-2.0
|
|
7
6
|
|
|
8
|
-
|
|
7
|
+
## Notice
|
|
8
|
+
|
|
9
|
+
Copyright 2026 DXOS
|
|
10
|
+
|
|
11
|
+
## Terms and Conditions
|
|
12
|
+
|
|
13
|
+
### Licensor ("We")
|
|
14
|
+
|
|
15
|
+
The party offering the Software under these Terms and Conditions.
|
|
16
|
+
|
|
17
|
+
### The Software
|
|
18
|
+
|
|
19
|
+
The "Software" is each version of the software that we make available under
|
|
20
|
+
these Terms and Conditions, as indicated by our inclusion of these Terms and
|
|
21
|
+
Conditions with the Software.
|
|
22
|
+
|
|
23
|
+
### License Grant
|
|
24
|
+
|
|
25
|
+
Subject to your compliance with this License Grant and the Patents,
|
|
26
|
+
Redistribution and Trademark clauses below, we hereby grant you the right to
|
|
27
|
+
use, copy, modify, create derivative works, publicly perform, publicly display
|
|
28
|
+
and redistribute the Software for any Permitted Purpose identified below.
|
|
29
|
+
|
|
30
|
+
### Permitted Purpose
|
|
31
|
+
|
|
32
|
+
A Permitted Purpose is any purpose other than a Competing Use. A Competing Use
|
|
33
|
+
means making the Software available to others in a commercial product or
|
|
34
|
+
service that:
|
|
35
|
+
|
|
36
|
+
1. substitutes for the Software;
|
|
37
|
+
|
|
38
|
+
2. substitutes for any other product or service we offer using the Software
|
|
39
|
+
that exists as of the date we make the Software available; or
|
|
40
|
+
|
|
41
|
+
3. offers the same or substantially similar functionality as the Software.
|
|
42
|
+
|
|
43
|
+
Permitted Purposes specifically include using the Software:
|
|
44
|
+
|
|
45
|
+
1. for your internal use and access;
|
|
46
|
+
|
|
47
|
+
2. for non-commercial education;
|
|
48
|
+
|
|
49
|
+
3. for non-commercial research; and
|
|
50
|
+
|
|
51
|
+
4. in connection with professional services that you provide to a licensee
|
|
52
|
+
using the Software in accordance with these Terms and Conditions.
|
|
53
|
+
|
|
54
|
+
### Patents
|
|
55
|
+
|
|
56
|
+
To the extent your use for a Permitted Purpose would necessarily infringe our
|
|
57
|
+
patents, the license grant above includes a license under our patents. If you
|
|
58
|
+
make a claim against any party that the Software infringes or contributes to
|
|
59
|
+
the infringement of any patent, then your patent license to the Software ends
|
|
60
|
+
immediately.
|
|
61
|
+
|
|
62
|
+
### Redistribution
|
|
63
|
+
|
|
64
|
+
The Terms and Conditions apply to all copies, modifications and derivatives of
|
|
65
|
+
the Software.
|
|
66
|
+
|
|
67
|
+
If you redistribute any copies, modifications or derivatives of the Software,
|
|
68
|
+
you must include a copy of or a link to these Terms and Conditions and not
|
|
69
|
+
remove any copyright notices provided in or with the Software.
|
|
70
|
+
|
|
71
|
+
### Disclaimer
|
|
72
|
+
|
|
73
|
+
THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR
|
|
74
|
+
IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR
|
|
75
|
+
PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT.
|
|
76
|
+
|
|
77
|
+
IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE
|
|
78
|
+
SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES,
|
|
79
|
+
EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE.
|
|
80
|
+
|
|
81
|
+
### Trademarks
|
|
82
|
+
|
|
83
|
+
Except for displaying the License Details and identifying us as the origin of
|
|
84
|
+
the Software, you have no right under these Terms and Conditions to use our
|
|
85
|
+
trademarks, trade names, service marks or product names.
|
|
86
|
+
|
|
87
|
+
## Grant of Future License
|
|
88
|
+
|
|
89
|
+
We hereby irrevocably grant you an additional license to use the Software under
|
|
90
|
+
the Apache License, Version 2.0 that is effective on the second anniversary of
|
|
91
|
+
the date we make the Software available. On or after that date, you may use the
|
|
92
|
+
Software under the Apache License, Version 2.0, in which case the following
|
|
93
|
+
will apply:
|
|
94
|
+
|
|
95
|
+
Licensed under the Apache License, Version 2.0 (the "License"); you may not use
|
|
96
|
+
this file except in compliance with the License.
|
|
97
|
+
|
|
98
|
+
You may obtain a copy of the License at
|
|
99
|
+
|
|
100
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
101
|
+
|
|
102
|
+
Unless required by applicable law or agreed to in writing, software distributed
|
|
103
|
+
under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
|
104
|
+
CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
105
|
+
specific language governing permissions and limitations under the License.
|
package/PLUGIN.mdl
ADDED
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: org.dxos.plugin.transformer
|
|
3
|
+
name: TransformerPlugin
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
A browser-based machine learning plugin for DXOS Composer that runs Hugging Face Transformers.js
|
|
8
|
+
models entirely in-browser via WebAssembly and WebGPU. It provides automatic speech recognition
|
|
9
|
+
(Whisper) through a React hook and component layer, enabling real-time voice transcription without
|
|
10
|
+
any server-side inference infrastructure.
|
|
11
|
+
|
|
12
|
+
## Extensions
|
|
13
|
+
|
|
14
|
+
The following extension dialects are used in this document.
|
|
15
|
+
Each extension is defined in the Appendix or resolved via its URI.
|
|
16
|
+
|
|
17
|
+
| Term | URI |
|
|
18
|
+
|-------------|--------------------------------|
|
|
19
|
+
| `type` | `org.dxos.mdl.type@1.0` |
|
|
20
|
+
| `feat` | `org.dxos.mdl.feat@1.0` |
|
|
21
|
+
| `test` | `org.dxos.mdl.test@1.0` |
|
|
22
|
+
| `component` | `org.dxos.mdl.component@1.0` |
|
|
23
|
+
| `op` | `org.dxos.mdl.op@1.0` |
|
|
24
|
+
|
|
25
|
+
## Types
|
|
26
|
+
|
|
27
|
+
```mdl
|
|
28
|
+
type PipelineConfig
|
|
29
|
+
fields:
|
|
30
|
+
model: string # Hugging Face model id, e.g. 'Xenova/whisper-base'
|
|
31
|
+
active?: boolean # whether to load and run the pipeline
|
|
32
|
+
debug?: boolean # enable verbose logging
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
```mdl
|
|
36
|
+
type PipelineState
|
|
37
|
+
fields:
|
|
38
|
+
gpuInfo: string # WebGPU adapter description, or fallback message
|
|
39
|
+
isLoaded: boolean # true once the model is fully initialised
|
|
40
|
+
isLoading: boolean # true while the model is being downloaded/compiled
|
|
41
|
+
error: string | null # error message if loading failed
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
```mdl
|
|
45
|
+
type AudioStreamConfig
|
|
46
|
+
fields:
|
|
47
|
+
active?: boolean # start/stop microphone capture
|
|
48
|
+
debug?: boolean
|
|
49
|
+
onAudioData?(audioData: Float32Array): Promise<void>
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
```mdl
|
|
53
|
+
type AudioStreamState
|
|
54
|
+
fields:
|
|
55
|
+
stream: MediaStream | null # live microphone stream, or null when inactive
|
|
56
|
+
error: string | null # error from getUserMedia or audio processing
|
|
57
|
+
audioLevel: number # 0-255 RMS level for visualisation
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
```mdl
|
|
61
|
+
type TranscriptionOptions
|
|
62
|
+
fields:
|
|
63
|
+
sampling_rate: number # audio sample rate in Hz (16000)
|
|
64
|
+
chunk_length_s: number # seconds of audio per chunk
|
|
65
|
+
stride_length_s: number # overlap between adjacent chunks
|
|
66
|
+
return_timestamps: boolean # include word-level timestamps in output
|
|
67
|
+
language: string # target language hint, e.g. 'english'
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
```mdl
|
|
71
|
+
type EmbeddingOutput
|
|
72
|
+
fields:
|
|
73
|
+
data: number[] # embedding vector as a flat float array
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Components
|
|
77
|
+
|
|
78
|
+
```mdl
|
|
79
|
+
component Voice
|
|
80
|
+
desc: |
|
|
81
|
+
Composite component that wires usePipeline and useAudioStream together to provide
|
|
82
|
+
live Whisper transcription from the microphone. Accumulates the running transcript
|
|
83
|
+
in local state and renders a DebugInfo panel showing model status, audio level, and
|
|
84
|
+
the current transcription text.
|
|
85
|
+
props:
|
|
86
|
+
active?: boolean # start/stop the entire pipeline
|
|
87
|
+
debug?: boolean
|
|
88
|
+
model?: string # defaults to 'Xenova/whisper-base'
|
|
89
|
+
state:
|
|
90
|
+
isTranscribing: boolean # true while a transcription call is in-flight
|
|
91
|
+
transcription: string # accumulated transcript text
|
|
92
|
+
layout: |
|
|
93
|
+
┌──────────────────────────────────────┐
|
|
94
|
+
│ [DebugInfo panel] │
|
|
95
|
+
│ model: Xenova/whisper-base │
|
|
96
|
+
│ gpu: GPU Available │
|
|
97
|
+
│ stream: active / inactive │
|
|
98
|
+
│ level: ███░░░░ (audioLevel bar) │
|
|
99
|
+
│ ────────────────────────────── │
|
|
100
|
+
│ [transcription text] │
|
|
101
|
+
└──────────────────────────────────────┘
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Operations
|
|
105
|
+
|
|
106
|
+
```mdl
|
|
107
|
+
op LoadModel
|
|
108
|
+
desc: |
|
|
109
|
+
Loads a Transformers.js automatic-speech-recognition pipeline for the given model id.
|
|
110
|
+
Configures ONNX backend to use WebGPU if available, falling back to WASM/CPU.
|
|
111
|
+
Invoked automatically when the Voice component mounts with active = true.
|
|
112
|
+
input: void
|
|
113
|
+
output: void
|
|
114
|
+
effects: [http, fs]
|
|
115
|
+
note: Model weights are cached in ./.cache by the Transformers.js env settings.
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
```mdl
|
|
119
|
+
op Transcribe
|
|
120
|
+
desc: |
|
|
121
|
+
Runs the loaded ASR pipeline against a Float32Array of 16 kHz mono audio.
|
|
122
|
+
Returns the transcribed text string from the pipeline result.
|
|
123
|
+
input: void
|
|
124
|
+
output: string
|
|
125
|
+
effects: [gpu]
|
|
126
|
+
note: Guarded by isTranscribing flag to prevent concurrent calls.
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
```mdl
|
|
130
|
+
op CaptureAudio
|
|
131
|
+
desc: |
|
|
132
|
+
Requests microphone permission via getUserMedia and wires an AudioWorklet that
|
|
133
|
+
accumulates 2-second chunks (32000 samples at 16 kHz) before forwarding them to
|
|
134
|
+
the onAudioData callback.
|
|
135
|
+
input: void
|
|
136
|
+
output: void
|
|
137
|
+
effects: [audio]
|
|
138
|
+
note: Cleaned up automatically when the component unmounts or active becomes false.
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Features
|
|
142
|
+
|
|
143
|
+
```mdl
|
|
144
|
+
feat F-1: In-Browser ASR Pipeline
|
|
145
|
+
|
|
146
|
+
req F-1.1:
|
|
147
|
+
when: Voice component mounts with active = true
|
|
148
|
+
then: usePipeline loads the Whisper model via Transformers.js; isLoading is true until complete
|
|
149
|
+
|
|
150
|
+
req F-1.2:
|
|
151
|
+
when: WebGPU is available in the browser
|
|
152
|
+
then: the ONNX backend is configured for WebGPU execution and gpuInfo reflects the adapter
|
|
153
|
+
|
|
154
|
+
req F-1.3:
|
|
155
|
+
when: WebGPU is not available
|
|
156
|
+
then: the pipeline falls back to WASM/CPU and gpuInfo is set to a 'not supported' message
|
|
157
|
+
|
|
158
|
+
req F-1.4:
|
|
159
|
+
when: model loading fails
|
|
160
|
+
then: PipelineState.error contains a human-readable message; isLoading is false
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
```mdl
|
|
164
|
+
feat F-2: Microphone Capture
|
|
165
|
+
|
|
166
|
+
req F-2.1:
|
|
167
|
+
when: useAudioStream is activated
|
|
168
|
+
then: getUserMedia is called with mono 16 kHz audio and echo/noise cancellation enabled
|
|
169
|
+
|
|
170
|
+
req F-2.2:
|
|
171
|
+
when: audio is captured
|
|
172
|
+
then: an AudioWorklet accumulates samples and emits 2-second chunks via the onAudioData callback
|
|
173
|
+
|
|
174
|
+
req F-2.3:
|
|
175
|
+
when: active is set to false or the component unmounts
|
|
176
|
+
then: all MediaStream tracks are stopped, AudioContext is closed, and AnimationFrame is cancelled
|
|
177
|
+
|
|
178
|
+
req F-2.4:
|
|
179
|
+
when: getUserMedia is denied
|
|
180
|
+
then: AudioStreamState.error is set with the underlying error message
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
```mdl
|
|
184
|
+
feat F-3: Live Transcription
|
|
185
|
+
|
|
186
|
+
req F-3.1:
|
|
187
|
+
when: a 2-second audio chunk arrives and the model is loaded
|
|
188
|
+
then: usePipeline.transcribe is called with 16 kHz mono options and the result appended to the transcript
|
|
189
|
+
|
|
190
|
+
req F-3.2:
|
|
191
|
+
when: transcription is already in progress (isTranscribing = true)
|
|
192
|
+
then: the incoming audio chunk is dropped to avoid concurrent inference
|
|
193
|
+
|
|
194
|
+
req F-3.3:
|
|
195
|
+
when: transcription returns an empty or whitespace-only string
|
|
196
|
+
then: the transcript state is not updated
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
```mdl
|
|
200
|
+
feat F-4: RAG Embedding Pipeline (Testing)
|
|
201
|
+
|
|
202
|
+
req F-4.1:
|
|
203
|
+
when: RagPipeline.generateCompletions is called with an input and a knowledge base
|
|
204
|
+
then: embeddings are generated for both the input and each knowledge-base entry
|
|
205
|
+
|
|
206
|
+
req F-4.2:
|
|
207
|
+
when: embeddings are available
|
|
208
|
+
then: cosine similarity is computed and the top-3 most similar contexts are selected
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## Acceptance
|
|
212
|
+
|
|
213
|
+
```mdl
|
|
214
|
+
test T-1: Model loads and transitions isLoaded
|
|
215
|
+
given: Voice component mounts with active = true and model = 'Xenova/whisper-base'
|
|
216
|
+
when: model download and ONNX compilation complete
|
|
217
|
+
then:
|
|
218
|
+
- isLoading transitions from true to false
|
|
219
|
+
- isLoaded is true
|
|
220
|
+
- error is null
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
```mdl
|
|
224
|
+
test T-2: Model load failure sets error state
|
|
225
|
+
given: the model download throws a network error
|
|
226
|
+
when: usePipeline attempts to load the model
|
|
227
|
+
then:
|
|
228
|
+
- isLoading is false
|
|
229
|
+
- error contains the error message string
|
|
230
|
+
- isLoaded is false
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
```mdl
|
|
234
|
+
test T-3: Microphone capture populates stream state
|
|
235
|
+
given: getUserMedia is granted
|
|
236
|
+
when: useAudioStream activates
|
|
237
|
+
then:
|
|
238
|
+
- AudioStreamState.stream is a non-null MediaStream
|
|
239
|
+
- AudioStreamState.error is null
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
```mdl
|
|
243
|
+
test T-4: Audio chunk triggers transcription
|
|
244
|
+
given: model is loaded and stream is active
|
|
245
|
+
when: AudioWorklet posts a 32000-sample chunk
|
|
246
|
+
then:
|
|
247
|
+
- isTranscribing becomes true during the pipeline call
|
|
248
|
+
- transcription string is updated with the returned text
|
|
249
|
+
- isTranscribing returns to false after completion
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
```mdl
|
|
253
|
+
test T-5: Concurrent transcription calls are dropped
|
|
254
|
+
given: isTranscribing is true
|
|
255
|
+
when: a second audio chunk arrives
|
|
256
|
+
then: the chunk is discarded and no second pipeline call is made
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## Appendix: Extension Definitions
|
|
262
|
+
|
|
263
|
+
Extension block types used in this document are defined below using
|
|
264
|
+
the core `ext` primitive — the only construct the base language provides.
|
|
265
|
+
|
|
266
|
+
```mdl
|
|
267
|
+
ext type
|
|
268
|
+
uri: org.dxos.mdl.type@1.0
|
|
269
|
+
desc: A named data structure with typed fields and optional literals.
|
|
270
|
+
fields:
|
|
271
|
+
desc?: Prose
|
|
272
|
+
fields?: FieldMap # name[?]: TypeExpr (# inline comment)
|
|
273
|
+
literals?: UnionList # a | b | c
|
|
274
|
+
extends?: TypeRef[]
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
```mdl
|
|
278
|
+
ext feat
|
|
279
|
+
uri: org.dxos.mdl.feat@1.0
|
|
280
|
+
desc: A named feature grouping one or more requirements.
|
|
281
|
+
fields:
|
|
282
|
+
desc?: Prose
|
|
283
|
+
req: RequirementList
|
|
284
|
+
nesting: self # feat blocks may contain feat blocks
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
```mdl
|
|
288
|
+
ext test
|
|
289
|
+
uri: org.dxos.mdl.test@1.0
|
|
290
|
+
desc: An acceptance scenario expressed as given / when / then steps.
|
|
291
|
+
fields:
|
|
292
|
+
given?: Step | Step[]
|
|
293
|
+
when?: Step | Step[]
|
|
294
|
+
then: Step | Step[]
|
|
295
|
+
tags?: TagList
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
```mdl
|
|
299
|
+
ext component
|
|
300
|
+
uri: org.dxos.mdl.component@1.0
|
|
301
|
+
desc: A UI component with props, internal state, slots, actions, and events.
|
|
302
|
+
fields:
|
|
303
|
+
desc?: Prose
|
|
304
|
+
props?: FieldMap # external inputs (immutable inside component)
|
|
305
|
+
state?: FieldMap # internal reactive state
|
|
306
|
+
slots?: FieldMap # named ReactNode injection points
|
|
307
|
+
actions?: ActionMap # methods the component exposes or handles
|
|
308
|
+
emits?: EventMap # events the component raises to its parent
|
|
309
|
+
layout?: CodeBlock # ASCII sketch of visual structure (non-normative)
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
```mdl
|
|
313
|
+
ext op
|
|
314
|
+
uri: org.dxos.mdl.op@1.0
|
|
315
|
+
desc: |
|
|
316
|
+
A named operation with typed inputs, outputs, and declared errors.
|
|
317
|
+
Pure ops have no effects or requires. Effectful ops declare both.
|
|
318
|
+
fields:
|
|
319
|
+
desc?: Prose
|
|
320
|
+
input?: FieldMap # named input parameters
|
|
321
|
+
output?: TypeExpr # return type
|
|
322
|
+
errors?: ErrorMap # name: Prose (when this error occurs)
|
|
323
|
+
effects?: EffectList # echo:read | echo:write | http | fs | ...
|
|
324
|
+
requires?: ServiceList # injected service dependencies
|
|
325
|
+
note?: Prose # implementation guidance (non-normative)
|
|
326
|
+
```
|
package/README.md
CHANGED
|
@@ -12,4 +12,4 @@ Run local transformers.
|
|
|
12
12
|
|
|
13
13
|
Your ideas, issues, and code are most welcome. Please take a look at our [community code of conduct](https://github.com/dxos/dxos/blob/main/CODE_OF_CONDUCT.md), the [issue guide](https://github.com/dxos/dxos/blob/main/CONTRIBUTING.md#submitting-issues), and the [PR contribution guide](https://github.com/dxos/dxos/blob/main/CONTRIBUTING.md#submitting-prs).
|
|
14
14
|
|
|
15
|
-
License: [
|
|
15
|
+
License: [FSL-1.1-Apache-2.0](./LICENSE) Copyright 2023 © DXOS
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
// src/TransformerPlugin.tsx
|
|
2
|
+
import { Plugin } from "@dxos/app-framework";
|
|
3
|
+
import { AppPlugin } from "@dxos/app-toolkit";
|
|
4
|
+
import { meta } from "#meta";
|
|
5
|
+
import { translations } from "#translations";
|
|
6
|
+
|
|
7
|
+
// raw-loader:/__w/dxos/dxos/packages/plugins/plugin-transformer/PLUGIN.mdl?raw
|
|
8
|
+
var PLUGIN_default = "---\nid: org.dxos.plugin.transformer\nname: TransformerPlugin\nversion: 0.1.0\n---\n\nA browser-based machine learning plugin for DXOS Composer that runs Hugging Face Transformers.js\nmodels entirely in-browser via WebAssembly and WebGPU. It provides automatic speech recognition\n(Whisper) through a React hook and component layer, enabling real-time voice transcription without\nany server-side inference infrastructure.\n\n## Extensions\n\nThe following extension dialects are used in this document.\nEach extension is defined in the Appendix or resolved via its URI.\n\n| Term | URI |\n|-------------|--------------------------------|\n| `type` | `org.dxos.mdl.type@1.0` |\n| `feat` | `org.dxos.mdl.feat@1.0` |\n| `test` | `org.dxos.mdl.test@1.0` |\n| `component` | `org.dxos.mdl.component@1.0` |\n| `op` | `org.dxos.mdl.op@1.0` |\n\n## Types\n\n```mdl\ntype PipelineConfig\n fields:\n model: string # Hugging Face model id, e.g. 'Xenova/whisper-base'\n active?: boolean # whether to load and run the pipeline\n debug?: boolean # enable verbose logging\n```\n\n```mdl\ntype PipelineState\n fields:\n gpuInfo: string # WebGPU adapter description, or fallback message\n isLoaded: boolean # true once the model is fully initialised\n isLoading: boolean # true while the model is being downloaded/compiled\n error: string | null # error message if loading failed\n```\n\n```mdl\ntype AudioStreamConfig\n fields:\n active?: boolean # start/stop microphone capture\n debug?: boolean\n onAudioData?(audioData: Float32Array): Promise<void>\n```\n\n```mdl\ntype AudioStreamState\n fields:\n stream: MediaStream | null # live microphone stream, or null when inactive\n error: string | null # error from getUserMedia or audio processing\n audioLevel: number # 0-255 RMS level for visualisation\n```\n\n```mdl\ntype TranscriptionOptions\n fields:\n sampling_rate: number # audio sample rate in Hz (16000)\n chunk_length_s: number # seconds of audio per chunk\n stride_length_s: number # overlap between adjacent chunks\n return_timestamps: boolean # include word-level timestamps in output\n language: string # target language hint, e.g. 'english'\n```\n\n```mdl\ntype EmbeddingOutput\n fields:\n data: number[] # embedding vector as a flat float array\n```\n\n## Components\n\n```mdl\ncomponent Voice\n desc: |\n Composite component that wires usePipeline and useAudioStream together to provide\n live Whisper transcription from the microphone. Accumulates the running transcript\n in local state and renders a DebugInfo panel showing model status, audio level, and\n the current transcription text.\n props:\n active?: boolean # start/stop the entire pipeline\n debug?: boolean\n model?: string # defaults to 'Xenova/whisper-base'\n state:\n isTranscribing: boolean # true while a transcription call is in-flight\n transcription: string # accumulated transcript text\n layout: |\n \u250C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n \u2502 [DebugInfo panel] \u2502\n \u2502 model: Xenova/whisper-base \u2502\n \u2502 gpu: GPU Available \u2502\n \u2502 stream: active / inactive \u2502\n \u2502 level: \u2588\u2588\u2588\u2591\u2591\u2591\u2591 (audioLevel bar) \u2502\n \u2502 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 \u2502\n \u2502 [transcription text] \u2502\n \u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n```\n\n## Operations\n\n```mdl\nop LoadModel\n desc: |\n Loads a Transformers.js automatic-speech-recognition pipeline for the given model id.\n Configures ONNX backend to use WebGPU if available, falling back to WASM/CPU.\n Invoked automatically when the Voice component mounts with active = true.\n input: void\n output: void\n effects: [http, fs]\n note: Model weights are cached in ./.cache by the Transformers.js env settings.\n```\n\n```mdl\nop Transcribe\n desc: |\n Runs the loaded ASR pipeline against a Float32Array of 16 kHz mono audio.\n Returns the transcribed text string from the pipeline result.\n input: void\n output: string\n effects: [gpu]\n note: Guarded by isTranscribing flag to prevent concurrent calls.\n```\n\n```mdl\nop CaptureAudio\n desc: |\n Requests microphone permission via getUserMedia and wires an AudioWorklet that\n accumulates 2-second chunks (32000 samples at 16 kHz) before forwarding them to\n the onAudioData callback.\n input: void\n output: void\n effects: [audio]\n note: Cleaned up automatically when the component unmounts or active becomes false.\n```\n\n## Features\n\n```mdl\nfeat F-1: In-Browser ASR Pipeline\n\n req F-1.1:\n when: Voice component mounts with active = true\n then: usePipeline loads the Whisper model via Transformers.js; isLoading is true until complete\n\n req F-1.2:\n when: WebGPU is available in the browser\n then: the ONNX backend is configured for WebGPU execution and gpuInfo reflects the adapter\n\n req F-1.3:\n when: WebGPU is not available\n then: the pipeline falls back to WASM/CPU and gpuInfo is set to a 'not supported' message\n\n req F-1.4:\n when: model loading fails\n then: PipelineState.error contains a human-readable message; isLoading is false\n```\n\n```mdl\nfeat F-2: Microphone Capture\n\n req F-2.1:\n when: useAudioStream is activated\n then: getUserMedia is called with mono 16 kHz audio and echo/noise cancellation enabled\n\n req F-2.2:\n when: audio is captured\n then: an AudioWorklet accumulates samples and emits 2-second chunks via the onAudioData callback\n\n req F-2.3:\n when: active is set to false or the component unmounts\n then: all MediaStream tracks are stopped, AudioContext is closed, and AnimationFrame is cancelled\n\n req F-2.4:\n when: getUserMedia is denied\n then: AudioStreamState.error is set with the underlying error message\n```\n\n```mdl\nfeat F-3: Live Transcription\n\n req F-3.1:\n when: a 2-second audio chunk arrives and the model is loaded\n then: usePipeline.transcribe is called with 16 kHz mono options and the result appended to the transcript\n\n req F-3.2:\n when: transcription is already in progress (isTranscribing = true)\n then: the incoming audio chunk is dropped to avoid concurrent inference\n\n req F-3.3:\n when: transcription returns an empty or whitespace-only string\n then: the transcript state is not updated\n```\n\n```mdl\nfeat F-4: RAG Embedding Pipeline (Testing)\n\n req F-4.1:\n when: RagPipeline.generateCompletions is called with an input and a knowledge base\n then: embeddings are generated for both the input and each knowledge-base entry\n\n req F-4.2:\n when: embeddings are available\n then: cosine similarity is computed and the top-3 most similar contexts are selected\n```\n\n## Acceptance\n\n```mdl\ntest T-1: Model loads and transitions isLoaded\n given: Voice component mounts with active = true and model = 'Xenova/whisper-base'\n when: model download and ONNX compilation complete\n then:\n - isLoading transitions from true to false\n - isLoaded is true\n - error is null\n```\n\n```mdl\ntest T-2: Model load failure sets error state\n given: the model download throws a network error\n when: usePipeline attempts to load the model\n then:\n - isLoading is false\n - error contains the error message string\n - isLoaded is false\n```\n\n```mdl\ntest T-3: Microphone capture populates stream state\n given: getUserMedia is granted\n when: useAudioStream activates\n then:\n - AudioStreamState.stream is a non-null MediaStream\n - AudioStreamState.error is null\n```\n\n```mdl\ntest T-4: Audio chunk triggers transcription\n given: model is loaded and stream is active\n when: AudioWorklet posts a 32000-sample chunk\n then:\n - isTranscribing becomes true during the pipeline call\n - transcription string is updated with the returned text\n - isTranscribing returns to false after completion\n```\n\n```mdl\ntest T-5: Concurrent transcription calls are dropped\n given: isTranscribing is true\n when: a second audio chunk arrives\n then: the chunk is discarded and no second pipeline call is made\n```\n\n---\n\n## Appendix: Extension Definitions\n\nExtension block types used in this document are defined below using\nthe core `ext` primitive \u2014 the only construct the base language provides.\n\n```mdl\next type\n uri: org.dxos.mdl.type@1.0\n desc: A named data structure with typed fields and optional literals.\n fields:\n desc?: Prose\n fields?: FieldMap # name[?]: TypeExpr (# inline comment)\n literals?: UnionList # a | b | c\n extends?: TypeRef[]\n```\n\n```mdl\next feat\n uri: org.dxos.mdl.feat@1.0\n desc: A named feature grouping one or more requirements.\n fields:\n desc?: Prose\n req: RequirementList\n nesting: self # feat blocks may contain feat blocks\n```\n\n```mdl\next test\n uri: org.dxos.mdl.test@1.0\n desc: An acceptance scenario expressed as given / when / then steps.\n fields:\n given?: Step | Step[]\n when?: Step | Step[]\n then: Step | Step[]\n tags?: TagList\n```\n\n```mdl\next component\n uri: org.dxos.mdl.component@1.0\n desc: A UI component with props, internal state, slots, actions, and events.\n fields:\n desc?: Prose\n props?: FieldMap # external inputs (immutable inside component)\n state?: FieldMap # internal reactive state\n slots?: FieldMap # named ReactNode injection points\n actions?: ActionMap # methods the component exposes or handles\n emits?: EventMap # events the component raises to its parent\n layout?: CodeBlock # ASCII sketch of visual structure (non-normative)\n```\n\n```mdl\next op\n uri: org.dxos.mdl.op@1.0\n desc: |\n A named operation with typed inputs, outputs, and declared errors.\n Pure ops have no effects or requires. Effectful ops declare both.\n fields:\n desc?: Prose\n input?: FieldMap # named input parameters\n output?: TypeExpr # return type\n errors?: ErrorMap # name: Prose (when this error occurs)\n effects?: EffectList # echo:read | echo:write | http | fs | ...\n requires?: ServiceList # injected service dependencies\n note?: Prose # implementation guidance (non-normative)\n```\n";
|
|
9
|
+
|
|
10
|
+
// src/TransformerPlugin.tsx
|
|
11
|
+
var TransformerPlugin = Plugin.define(meta).pipe(
|
|
12
|
+
AppPlugin.addSchemaModule({
|
|
13
|
+
schema: []
|
|
14
|
+
}),
|
|
15
|
+
AppPlugin.addTranslationsModule({
|
|
16
|
+
translations
|
|
17
|
+
}),
|
|
18
|
+
// Plugin.addModule({
|
|
19
|
+
// id: 'intent-resolver',
|
|
20
|
+
// activatesOn: Events.SetupIntentResolver,
|
|
21
|
+
// activate: IntentResolver,
|
|
22
|
+
// }),
|
|
23
|
+
AppPlugin.addPluginAssetModule({
|
|
24
|
+
asset: {
|
|
25
|
+
pluginId: meta.id,
|
|
26
|
+
path: "PLUGIN.mdl",
|
|
27
|
+
content: PLUGIN_default,
|
|
28
|
+
mimeType: "application/x-mdl"
|
|
29
|
+
}
|
|
30
|
+
}),
|
|
31
|
+
Plugin.make
|
|
32
|
+
);
|
|
33
|
+
var TransformerPlugin_default = TransformerPlugin;
|
|
34
|
+
export {
|
|
35
|
+
TransformerPlugin,
|
|
36
|
+
TransformerPlugin_default as default
|
|
37
|
+
};
|
|
38
|
+
//# sourceMappingURL=TransformerPlugin.mjs.map
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 3,
|
|
3
|
+
"sources": ["../../../src/TransformerPlugin.tsx", "raw-loader:/__w/dxos/dxos/packages/plugins/plugin-transformer/PLUGIN.mdl?raw"],
|
|
4
|
+
"sourcesContent": ["//\n// Copyright 2024 DXOS.org\n//\n\nimport { Plugin } from '@dxos/app-framework';\nimport { AppPlugin } from '@dxos/app-toolkit';\n\n// import { IntentResolver } from './capabilities';\nimport { meta } from '#meta';\nimport { translations } from '#translations';\n\n// eslint-disable-next-line import/no-relative-packages\nimport pluginSpec from '../PLUGIN.mdl?raw';\n\nexport const TransformerPlugin = Plugin.define(meta).pipe(\n AppPlugin.addSchemaModule({ schema: [] }),\n AppPlugin.addTranslationsModule({ translations }),\n // Plugin.addModule({\n // id: 'intent-resolver',\n // activatesOn: Events.SetupIntentResolver,\n // activate: IntentResolver,\n // }),\n AppPlugin.addPluginAssetModule({\n asset: { pluginId: meta.id, path: 'PLUGIN.mdl', content: pluginSpec, mimeType: 'application/x-mdl' },\n }),\n Plugin.make,\n);\n\nexport default TransformerPlugin;\n", "---\nid: org.dxos.plugin.transformer\nname: TransformerPlugin\nversion: 0.1.0\n---\n\nA browser-based machine learning plugin for DXOS Composer that runs Hugging Face Transformers.js\nmodels entirely in-browser via WebAssembly and WebGPU. It provides automatic speech recognition\n(Whisper) through a React hook and component layer, enabling real-time voice transcription without\nany server-side inference infrastructure.\n\n## Extensions\n\nThe following extension dialects are used in this document.\nEach extension is defined in the Appendix or resolved via its URI.\n\n| Term | URI |\n|-------------|--------------------------------|\n| `type` | `org.dxos.mdl.type@1.0` |\n| `feat` | `org.dxos.mdl.feat@1.0` |\n| `test` | `org.dxos.mdl.test@1.0` |\n| `component` | `org.dxos.mdl.component@1.0` |\n| `op` | `org.dxos.mdl.op@1.0` |\n\n## Types\n\n```mdl\ntype PipelineConfig\n fields:\n model: string # Hugging Face model id, e.g. 'Xenova/whisper-base'\n active?: boolean # whether to load and run the pipeline\n debug?: boolean # enable verbose logging\n```\n\n```mdl\ntype PipelineState\n fields:\n gpuInfo: string # WebGPU adapter description, or fallback message\n isLoaded: boolean # true once the model is fully initialised\n isLoading: boolean # true while the model is being downloaded/compiled\n error: string | null # error message if loading failed\n```\n\n```mdl\ntype AudioStreamConfig\n fields:\n active?: boolean # start/stop microphone capture\n debug?: boolean\n onAudioData?(audioData: Float32Array): Promise<void>\n```\n\n```mdl\ntype AudioStreamState\n fields:\n stream: MediaStream | null # live microphone stream, or null when inactive\n error: string | null # error from getUserMedia or audio processing\n audioLevel: number # 0-255 RMS level for visualisation\n```\n\n```mdl\ntype TranscriptionOptions\n fields:\n sampling_rate: number # audio sample rate in Hz (16000)\n chunk_length_s: number # seconds of audio per chunk\n stride_length_s: number # overlap between adjacent chunks\n return_timestamps: boolean # include word-level timestamps in output\n language: string # target language hint, e.g. 'english'\n```\n\n```mdl\ntype EmbeddingOutput\n fields:\n data: number[] # embedding vector as a flat float array\n```\n\n## Components\n\n```mdl\ncomponent Voice\n desc: |\n Composite component that wires usePipeline and useAudioStream together to provide\n live Whisper transcription from the microphone. Accumulates the running transcript\n in local state and renders a DebugInfo panel showing model status, audio level, and\n the current transcription text.\n props:\n active?: boolean # start/stop the entire pipeline\n debug?: boolean\n model?: string # defaults to 'Xenova/whisper-base'\n state:\n isTranscribing: boolean # true while a transcription call is in-flight\n transcription: string # accumulated transcript text\n layout: |\n \u250C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n \u2502 [DebugInfo panel] \u2502\n \u2502 model: Xenova/whisper-base \u2502\n \u2502 gpu: GPU Available \u2502\n \u2502 stream: active / inactive \u2502\n \u2502 level: \u2588\u2588\u2588\u2591\u2591\u2591\u2591 (audioLevel bar) \u2502\n \u2502 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 \u2502\n \u2502 [transcription text] \u2502\n \u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n```\n\n## Operations\n\n```mdl\nop LoadModel\n desc: |\n Loads a Transformers.js automatic-speech-recognition pipeline for the given model id.\n Configures ONNX backend to use WebGPU if available, falling back to WASM/CPU.\n Invoked automatically when the Voice component mounts with active = true.\n input: void\n output: void\n effects: [http, fs]\n note: Model weights are cached in ./.cache by the Transformers.js env settings.\n```\n\n```mdl\nop Transcribe\n desc: |\n Runs the loaded ASR pipeline against a Float32Array of 16 kHz mono audio.\n Returns the transcribed text string from the pipeline result.\n input: void\n output: string\n effects: [gpu]\n note: Guarded by isTranscribing flag to prevent concurrent calls.\n```\n\n```mdl\nop CaptureAudio\n desc: |\n Requests microphone permission via getUserMedia and wires an AudioWorklet that\n accumulates 2-second chunks (32000 samples at 16 kHz) before forwarding them to\n the onAudioData callback.\n input: void\n output: void\n effects: [audio]\n note: Cleaned up automatically when the component unmounts or active becomes false.\n```\n\n## Features\n\n```mdl\nfeat F-1: In-Browser ASR Pipeline\n\n req F-1.1:\n when: Voice component mounts with active = true\n then: usePipeline loads the Whisper model via Transformers.js; isLoading is true until complete\n\n req F-1.2:\n when: WebGPU is available in the browser\n then: the ONNX backend is configured for WebGPU execution and gpuInfo reflects the adapter\n\n req F-1.3:\n when: WebGPU is not available\n then: the pipeline falls back to WASM/CPU and gpuInfo is set to a 'not supported' message\n\n req F-1.4:\n when: model loading fails\n then: PipelineState.error contains a human-readable message; isLoading is false\n```\n\n```mdl\nfeat F-2: Microphone Capture\n\n req F-2.1:\n when: useAudioStream is activated\n then: getUserMedia is called with mono 16 kHz audio and echo/noise cancellation enabled\n\n req F-2.2:\n when: audio is captured\n then: an AudioWorklet accumulates samples and emits 2-second chunks via the onAudioData callback\n\n req F-2.3:\n when: active is set to false or the component unmounts\n then: all MediaStream tracks are stopped, AudioContext is closed, and AnimationFrame is cancelled\n\n req F-2.4:\n when: getUserMedia is denied\n then: AudioStreamState.error is set with the underlying error message\n```\n\n```mdl\nfeat F-3: Live Transcription\n\n req F-3.1:\n when: a 2-second audio chunk arrives and the model is loaded\n then: usePipeline.transcribe is called with 16 kHz mono options and the result appended to the transcript\n\n req F-3.2:\n when: transcription is already in progress (isTranscribing = true)\n then: the incoming audio chunk is dropped to avoid concurrent inference\n\n req F-3.3:\n when: transcription returns an empty or whitespace-only string\n then: the transcript state is not updated\n```\n\n```mdl\nfeat F-4: RAG Embedding Pipeline (Testing)\n\n req F-4.1:\n when: RagPipeline.generateCompletions is called with an input and a knowledge base\n then: embeddings are generated for both the input and each knowledge-base entry\n\n req F-4.2:\n when: embeddings are available\n then: cosine similarity is computed and the top-3 most similar contexts are selected\n```\n\n## Acceptance\n\n```mdl\ntest T-1: Model loads and transitions isLoaded\n given: Voice component mounts with active = true and model = 'Xenova/whisper-base'\n when: model download and ONNX compilation complete\n then:\n - isLoading transitions from true to false\n - isLoaded is true\n - error is null\n```\n\n```mdl\ntest T-2: Model load failure sets error state\n given: the model download throws a network error\n when: usePipeline attempts to load the model\n then:\n - isLoading is false\n - error contains the error message string\n - isLoaded is false\n```\n\n```mdl\ntest T-3: Microphone capture populates stream state\n given: getUserMedia is granted\n when: useAudioStream activates\n then:\n - AudioStreamState.stream is a non-null MediaStream\n - AudioStreamState.error is null\n```\n\n```mdl\ntest T-4: Audio chunk triggers transcription\n given: model is loaded and stream is active\n when: AudioWorklet posts a 32000-sample chunk\n then:\n - isTranscribing becomes true during the pipeline call\n - transcription string is updated with the returned text\n - isTranscribing returns to false after completion\n```\n\n```mdl\ntest T-5: Concurrent transcription calls are dropped\n given: isTranscribing is true\n when: a second audio chunk arrives\n then: the chunk is discarded and no second pipeline call is made\n```\n\n---\n\n## Appendix: Extension Definitions\n\nExtension block types used in this document are defined below using\nthe core `ext` primitive \u2014 the only construct the base language provides.\n\n```mdl\next type\n uri: org.dxos.mdl.type@1.0\n desc: A named data structure with typed fields and optional literals.\n fields:\n desc?: Prose\n fields?: FieldMap # name[?]: TypeExpr (# inline comment)\n literals?: UnionList # a | b | c\n extends?: TypeRef[]\n```\n\n```mdl\next feat\n uri: org.dxos.mdl.feat@1.0\n desc: A named feature grouping one or more requirements.\n fields:\n desc?: Prose\n req: RequirementList\n nesting: self # feat blocks may contain feat blocks\n```\n\n```mdl\next test\n uri: org.dxos.mdl.test@1.0\n desc: An acceptance scenario expressed as given / when / then steps.\n fields:\n given?: Step | Step[]\n when?: Step | Step[]\n then: Step | Step[]\n tags?: TagList\n```\n\n```mdl\next component\n uri: org.dxos.mdl.component@1.0\n desc: A UI component with props, internal state, slots, actions, and events.\n fields:\n desc?: Prose\n props?: FieldMap # external inputs (immutable inside component)\n state?: FieldMap # internal reactive state\n slots?: FieldMap # named ReactNode injection points\n actions?: ActionMap # methods the component exposes or handles\n emits?: EventMap # events the component raises to its parent\n layout?: CodeBlock # ASCII sketch of visual structure (non-normative)\n```\n\n```mdl\next op\n uri: org.dxos.mdl.op@1.0\n desc: |\n A named operation with typed inputs, outputs, and declared errors.\n Pure ops have no effects or requires. Effectful ops declare both.\n fields:\n desc?: Prose\n input?: FieldMap # named input parameters\n output?: TypeExpr # return type\n errors?: ErrorMap # name: Prose (when this error occurs)\n effects?: EffectList # echo:read | echo:write | http | fs | ...\n requires?: ServiceList # injected service dependencies\n note?: Prose # implementation guidance (non-normative)\n```\n"],
|
|
5
|
+
"mappings": ";AAIA,SAASA,cAAc;AACvB,SAASC,iBAAiB;AAG1B,SAASC,YAAY;AACrB,SAASC,oBAAoB;;;ACT7B;;;ADcO,IAAMC,oBAAoBC,OAAOC,OAAOC,IAAAA,EAAMC;EACnDC,UAAUC,gBAAgB;IAAEC,QAAQ,CAAA;EAAG,CAAA;EACvCF,UAAUG,sBAAsB;IAAEC;EAAa,CAAA;;;;;;EAM/CJ,UAAUK,qBAAqB;IAC7BC,OAAO;MAAEC,UAAUT,KAAKU;MAAIC,MAAM;MAAcC,SAASC;MAAYC,UAAU;IAAoB;EACrG,CAAA;EACAhB,OAAOiB;AAAI;AAGb,IAAA,4BAAelB;",
|
|
6
|
+
"names": ["Plugin", "AppPlugin", "meta", "translations", "TransformerPlugin", "Plugin", "define", "meta", "pipe", "AppPlugin", "addSchemaModule", "schema", "addTranslationsModule", "translations", "addPluginAssetModule", "asset", "pluginId", "id", "path", "content", "pluginSpec", "mimeType", "make"]
|
|
7
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
// src/meta.ts
|
|
2
|
+
import { Plugin } from "@dxos/app-framework";
|
|
3
|
+
import { DXN } from "@dxos/keys";
|
|
4
|
+
import { trim } from "@dxos/util";
|
|
5
|
+
var meta = Plugin.makeMeta({
|
|
6
|
+
key: DXN.make("org.dxos.plugin.transformer"),
|
|
7
|
+
name: "Transformer",
|
|
8
|
+
author: "DXOS",
|
|
9
|
+
spec: "PLUGIN.mdl",
|
|
10
|
+
description: trim`
|
|
11
|
+
Browser-based machine learning plugin that runs Hugging Face Transformers.js models
|
|
12
|
+
entirely in-browser via WebAssembly and WebGPU — no server-side inference required.
|
|
13
|
+
|
|
14
|
+
Provides automatic speech recognition through a Whisper pipeline hook (usePipeline)
|
|
15
|
+
and a microphone capture hook (useAudioStream) that buffers 16 kHz mono audio into
|
|
16
|
+
2-second chunks before forwarding them to the model.
|
|
17
|
+
|
|
18
|
+
Exposes a Voice component that wires the two hooks together to deliver live
|
|
19
|
+
transcription, accumulating the running transcript in local state and rendering
|
|
20
|
+
a debug panel with model status, GPU info, and audio level visualisation.
|
|
21
|
+
|
|
22
|
+
Includes a RAG embedding pipeline base class for retrieval-augmented generation
|
|
23
|
+
experiments, with cosine similarity ranking for selecting the most relevant
|
|
24
|
+
knowledge-base contexts before text generation.
|
|
25
|
+
`,
|
|
26
|
+
icon: "ph--cpu--regular",
|
|
27
|
+
source: "https://github.com/dxos/dxos/tree/main/packages/plugins/plugin-transformer",
|
|
28
|
+
tags: [
|
|
29
|
+
"labs"
|
|
30
|
+
]
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
export {
|
|
34
|
+
meta
|
|
35
|
+
};
|
|
36
|
+
//# sourceMappingURL=chunk-SFEKXMAC.mjs.map
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 3,
|
|
3
|
+
"sources": ["../../../src/meta.ts"],
|
|
4
|
+
"sourcesContent": ["//\n// Copyright 2023 DXOS.org\n//\n\nimport { Plugin } from '@dxos/app-framework';\nimport { DXN } from '@dxos/keys';\nimport { trim } from '@dxos/util';\n\nexport const meta = Plugin.makeMeta({\n key: DXN.make('org.dxos.plugin.transformer'),\n name: 'Transformer',\n author: 'DXOS',\n spec: 'PLUGIN.mdl',\n description: trim`\n Browser-based machine learning plugin that runs Hugging Face Transformers.js models\n entirely in-browser via WebAssembly and WebGPU — no server-side inference required.\n\n Provides automatic speech recognition through a Whisper pipeline hook (usePipeline)\n and a microphone capture hook (useAudioStream) that buffers 16 kHz mono audio into\n 2-second chunks before forwarding them to the model.\n\n Exposes a Voice component that wires the two hooks together to deliver live\n transcription, accumulating the running transcript in local state and rendering\n a debug panel with model status, GPU info, and audio level visualisation.\n\n Includes a RAG embedding pipeline base class for retrieval-augmented generation\n experiments, with cosine similarity ranking for selecting the most relevant\n knowledge-base contexts before text generation.\n `,\n icon: 'ph--cpu--regular',\n source: 'https://github.com/dxos/dxos/tree/main/packages/plugins/plugin-transformer',\n tags: ['labs'],\n});\n"],
|
|
5
|
+
"mappings": ";AAIA,SAASA,cAAc;AACvB,SAASC,WAAW;AACpB,SAASC,YAAY;AAEd,IAAMC,OAAOH,OAAOI,SAAS;EAClCC,KAAKJ,IAAIK,KAAK,6BAAA;EACdC,MAAM;EACNC,QAAQ;EACRC,MAAM;EACNC,aAAaR;;;;;;;;;;;;;;;;EAgBbS,MAAM;EACNC,QAAQ;EACRC,MAAM;IAAC;;AACT,CAAA;",
|
|
6
|
+
"names": ["Plugin", "DXN", "trim", "meta", "makeMeta", "key", "make", "name", "author", "spec", "description", "icon", "source", "tags"]
|
|
7
|
+
}
|