streaming-sortformer-node 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/DiarizeStream.d.ts +45 -0
- package/dist/DiarizeStream.d.ts.map +1 -0
- package/dist/DiarizeStream.js +189 -0
- package/dist/DiarizeStream.js.map +1 -0
- package/dist/Sortformer.d.ts +8 -4
- package/dist/Sortformer.d.ts.map +1 -1
- package/dist/Sortformer.js +35 -37
- package/dist/Sortformer.js.map +1 -1
- package/dist/StreamingSession.d.ts +2 -2
- package/dist/StreamingSession.d.ts.map +1 -1
- package/dist/StreamingSession.js +4 -4
- package/dist/StreamingSession.js.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/types.d.ts +42 -23
- package/dist/types.d.ts.map +1 -1
- package/package.json +3 -3
- package/src/DiarizeStream.ts +263 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { StreamingSession } from './StreamingSession.js';
|
|
2
|
+
import type { DiarizeStreamOptions, DiarizeStreamFeedResult, DiarizeStreamFlushResult } from './types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Real-time diarization stream that wraps StreamingSession and performs
|
|
5
|
+
* threshold → median filter → segment extraction, emitting completed
|
|
6
|
+
* speaker segments via callbacks as audio is fed incrementally.
|
|
7
|
+
*/
|
|
8
|
+
export declare class DiarizeStream {
|
|
9
|
+
private session;
|
|
10
|
+
private threshold;
|
|
11
|
+
private medianFilterWin;
|
|
12
|
+
private half;
|
|
13
|
+
private onSegment?;
|
|
14
|
+
private onFrames?;
|
|
15
|
+
private pendingBinary;
|
|
16
|
+
private pendingLen;
|
|
17
|
+
private globalFrameOffset;
|
|
18
|
+
private speakerActive;
|
|
19
|
+
private segmentStart;
|
|
20
|
+
private _closed;
|
|
21
|
+
constructor(session: StreamingSession, options?: DiarizeStreamOptions);
|
|
22
|
+
/**
|
|
23
|
+
* Feed audio samples, run inference, and post-process predictions.
|
|
24
|
+
* Returns segments that completed during this call.
|
|
25
|
+
*/
|
|
26
|
+
feed(audio: Float32Array): Promise<DiarizeStreamFeedResult>;
|
|
27
|
+
/**
|
|
28
|
+
* Flush remaining buffered frames at end of stream.
|
|
29
|
+
* Zero-pads future frames to settle all pending predictions.
|
|
30
|
+
*/
|
|
31
|
+
flush(): Promise<DiarizeStreamFlushResult>;
|
|
32
|
+
reset(): void;
|
|
33
|
+
close(): void;
|
|
34
|
+
get totalFrames(): number;
|
|
35
|
+
get isClosed(): boolean;
|
|
36
|
+
private thresholdToBinary;
|
|
37
|
+
/**
|
|
38
|
+
* Median filter + segment detection on settled frames.
|
|
39
|
+
* `combined` has `combinedLen` binary frames; only first `settledCount` are emitted.
|
|
40
|
+
* Remaining frames provide look-ahead context. Zero-pads out-of-bounds (matches C impl).
|
|
41
|
+
*/
|
|
42
|
+
private processSettledFrames;
|
|
43
|
+
private closeAllOpenSegments;
|
|
44
|
+
}
|
|
45
|
+
//# sourceMappingURL=DiarizeStream.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DiarizeStream.d.ts","sourceRoot":"","sources":["../src/DiarizeStream.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,KAAK,EAIV,oBAAoB,EACpB,uBAAuB,EACvB,wBAAwB,EACzB,MAAM,YAAY,CAAC;AAKpB;;;;GAIG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,OAAO,CAAmB;IAClC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,eAAe,CAAS;IAChC,OAAO,CAAC,IAAI,CAAS;IACrB,OAAO,CAAC,SAAS,CAAC,CAAkB;IACpC,OAAO,CAAC,QAAQ,CAAC,CAAgB;IAEjC,OAAO,CAAC,aAAa,CAAa;IAClC,OAAO,CAAC,UAAU,CAAS;IAE3B,OAAO,CAAC,iBAAiB,CAAS;IAElC,OAAO,CAAC,aAAa,CAAY;IACjC,OAAO,CAAC,YAAY,CAAW;IAE/B,OAAO,CAAC,OAAO,CAAU;gBAEb,OAAO,EAAE,gBAAgB,EAAE,OAAO,GAAE,oBAAyB;IAqBzE;;;OAGG;IACG,IAAI,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,uBAAuB,CAAC;IA8CjE;;;OAGG;IACG,KAAK,IAAI,OAAO,CAAC,wBAAwB,CAAC;IA8ChD,KAAK,IAAI,IAAI;IAYb,KAAK,IAAI,IAAI;IAOb,IAAI,WAAW,IAAI,MAAM,CAGxB;IAED,IAAI,QAAQ,IAAI,OAAO,CAEtB;IAED,OAAO,CAAC,iBAAiB;IASzB;;;;OAIG;IACH,OAAO,CAAC,oBAAoB;IA6C5B,OAAO,CAAC,oBAAoB;CAkB7B"}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
const NUM_SPEAKERS = 4;
|
|
2
|
+
const FRAME_DURATION = 0.08; // 80ms per frame (160 samples * 8x subsampling / 16kHz)
|
|
3
|
+
/**
|
|
4
|
+
* Real-time diarization stream that wraps StreamingSession and performs
|
|
5
|
+
* threshold → median filter → segment extraction, emitting completed
|
|
6
|
+
* speaker segments via callbacks as audio is fed incrementally.
|
|
7
|
+
*/
|
|
8
|
+
export class DiarizeStream {
|
|
9
|
+
constructor(session, options = {}) {
|
|
10
|
+
this.session = session;
|
|
11
|
+
this.threshold = options.threshold ?? 0.5;
|
|
12
|
+
this.medianFilterWin = options.medianFilter ?? 11;
|
|
13
|
+
this.onSegment = options.onSegment;
|
|
14
|
+
this.onFrames = options.onFrames;
|
|
15
|
+
if (this.medianFilterWin < 1 || this.medianFilterWin % 2 === 0) {
|
|
16
|
+
throw new Error('medianFilter must be a positive odd integer');
|
|
17
|
+
}
|
|
18
|
+
this.half = Math.floor(this.medianFilterWin / 2);
|
|
19
|
+
this.pendingBinary = new Uint8Array(this.half * NUM_SPEAKERS);
|
|
20
|
+
this.pendingLen = 0;
|
|
21
|
+
this.globalFrameOffset = 0;
|
|
22
|
+
this.speakerActive = new Array(NUM_SPEAKERS).fill(false);
|
|
23
|
+
this.segmentStart = new Array(NUM_SPEAKERS).fill(0);
|
|
24
|
+
this._closed = false;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Feed audio samples, run inference, and post-process predictions.
|
|
28
|
+
* Returns segments that completed during this call.
|
|
29
|
+
*/
|
|
30
|
+
async feed(audio) {
|
|
31
|
+
if (this._closed) {
|
|
32
|
+
throw new Error('DiarizeStream is closed');
|
|
33
|
+
}
|
|
34
|
+
const result = await this.session.feed(audio);
|
|
35
|
+
if (this.onFrames) {
|
|
36
|
+
this.onFrames(result.predictions, result.frameCount);
|
|
37
|
+
}
|
|
38
|
+
if (result.frameCount === 0) {
|
|
39
|
+
return { frameCount: 0, segments: [] };
|
|
40
|
+
}
|
|
41
|
+
const newBinary = this.thresholdToBinary(result.predictions, result.frameCount);
|
|
42
|
+
const combinedLen = this.pendingLen + result.frameCount;
|
|
43
|
+
const combined = new Uint8Array(combinedLen * NUM_SPEAKERS);
|
|
44
|
+
if (this.pendingLen > 0) {
|
|
45
|
+
combined.set(this.pendingBinary.subarray(0, this.pendingLen * NUM_SPEAKERS), 0);
|
|
46
|
+
}
|
|
47
|
+
combined.set(newBinary, this.pendingLen * NUM_SPEAKERS);
|
|
48
|
+
// Last `half` frames are unsettled (need future context); rest are settled
|
|
49
|
+
const settledCount = Math.max(0, combinedLen - this.half);
|
|
50
|
+
const unsettledStart = settledCount;
|
|
51
|
+
const unsettledCount = combinedLen - settledCount;
|
|
52
|
+
const segments = this.processSettledFrames(combined, combinedLen, settledCount);
|
|
53
|
+
this.pendingLen = unsettledCount;
|
|
54
|
+
if (unsettledCount > 0) {
|
|
55
|
+
this.pendingBinary.set(combined.subarray(unsettledStart * NUM_SPEAKERS, combinedLen * NUM_SPEAKERS), 0);
|
|
56
|
+
}
|
|
57
|
+
return { frameCount: result.frameCount, segments };
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Flush remaining buffered frames at end of stream.
|
|
61
|
+
* Zero-pads future frames to settle all pending predictions.
|
|
62
|
+
*/
|
|
63
|
+
async flush() {
|
|
64
|
+
if (this._closed) {
|
|
65
|
+
throw new Error('DiarizeStream is closed');
|
|
66
|
+
}
|
|
67
|
+
const result = await this.session.flush();
|
|
68
|
+
if (this.onFrames && result.frameCount > 0) {
|
|
69
|
+
this.onFrames(result.predictions, result.frameCount);
|
|
70
|
+
}
|
|
71
|
+
const newBinary = result.frameCount > 0
|
|
72
|
+
? this.thresholdToBinary(result.predictions, result.frameCount)
|
|
73
|
+
: new Uint8Array(0);
|
|
74
|
+
const combinedLen = this.pendingLen + result.frameCount;
|
|
75
|
+
if (combinedLen === 0) {
|
|
76
|
+
const segments = this.closeAllOpenSegments();
|
|
77
|
+
return { frameCount: 0, segments };
|
|
78
|
+
}
|
|
79
|
+
const combined = new Uint8Array(combinedLen * NUM_SPEAKERS);
|
|
80
|
+
if (this.pendingLen > 0) {
|
|
81
|
+
combined.set(this.pendingBinary.subarray(0, this.pendingLen * NUM_SPEAKERS), 0);
|
|
82
|
+
}
|
|
83
|
+
if (newBinary.length > 0) {
|
|
84
|
+
combined.set(newBinary, this.pendingLen * NUM_SPEAKERS);
|
|
85
|
+
}
|
|
86
|
+
// On flush, all frames settle (zero-pad future, matching C edge behavior)
|
|
87
|
+
const segments = this.processSettledFrames(combined, combinedLen, combinedLen);
|
|
88
|
+
this.pendingLen = 0;
|
|
89
|
+
const closingSegments = this.closeAllOpenSegments();
|
|
90
|
+
for (const seg of closingSegments) {
|
|
91
|
+
segments.push(seg);
|
|
92
|
+
}
|
|
93
|
+
return { frameCount: result.frameCount, segments };
|
|
94
|
+
}
|
|
95
|
+
reset() {
|
|
96
|
+
if (this._closed) {
|
|
97
|
+
throw new Error('DiarizeStream is closed');
|
|
98
|
+
}
|
|
99
|
+
this.session.reset();
|
|
100
|
+
this.pendingLen = 0;
|
|
101
|
+
this.pendingBinary.fill(0);
|
|
102
|
+
this.globalFrameOffset = 0;
|
|
103
|
+
this.speakerActive.fill(false);
|
|
104
|
+
this.segmentStart.fill(0);
|
|
105
|
+
}
|
|
106
|
+
close() {
|
|
107
|
+
if (!this._closed) {
|
|
108
|
+
this.session.close();
|
|
109
|
+
this._closed = true;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
get totalFrames() {
|
|
113
|
+
if (this._closed)
|
|
114
|
+
return 0;
|
|
115
|
+
return this.session.totalFrames;
|
|
116
|
+
}
|
|
117
|
+
get isClosed() {
|
|
118
|
+
return this._closed;
|
|
119
|
+
}
|
|
120
|
+
thresholdToBinary(predictions, frameCount) {
|
|
121
|
+
const binary = new Uint8Array(frameCount * NUM_SPEAKERS);
|
|
122
|
+
const thresh = this.threshold;
|
|
123
|
+
for (let i = 0; i < frameCount * NUM_SPEAKERS; i++) {
|
|
124
|
+
binary[i] = predictions[i] > thresh ? 1 : 0;
|
|
125
|
+
}
|
|
126
|
+
return binary;
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Median filter + segment detection on settled frames.
|
|
130
|
+
* `combined` has `combinedLen` binary frames; only first `settledCount` are emitted.
|
|
131
|
+
* Remaining frames provide look-ahead context. Zero-pads out-of-bounds (matches C impl).
|
|
132
|
+
*/
|
|
133
|
+
processSettledFrames(combined, combinedLen, settledCount) {
|
|
134
|
+
const segments = [];
|
|
135
|
+
const win = this.medianFilterWin;
|
|
136
|
+
const half = this.half;
|
|
137
|
+
for (let s = 0; s < NUM_SPEAKERS; s++) {
|
|
138
|
+
for (let i = 0; i < settledCount; i++) {
|
|
139
|
+
let ones = 0;
|
|
140
|
+
const lo = i - half;
|
|
141
|
+
const hi = lo + win;
|
|
142
|
+
for (let j = lo; j < hi; j++) {
|
|
143
|
+
if (j >= 0 && j < combinedLen) {
|
|
144
|
+
ones += combined[j * NUM_SPEAKERS + s];
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
const active = ones * 2 > win;
|
|
148
|
+
const globalFrame = this.globalFrameOffset + i;
|
|
149
|
+
if (active && !this.speakerActive[s]) {
|
|
150
|
+
this.speakerActive[s] = true;
|
|
151
|
+
this.segmentStart[s] = globalFrame;
|
|
152
|
+
}
|
|
153
|
+
else if (!active && this.speakerActive[s]) {
|
|
154
|
+
this.speakerActive[s] = false;
|
|
155
|
+
const seg = {
|
|
156
|
+
speaker: s,
|
|
157
|
+
start: this.segmentStart[s] * FRAME_DURATION,
|
|
158
|
+
duration: (globalFrame - this.segmentStart[s]) * FRAME_DURATION,
|
|
159
|
+
};
|
|
160
|
+
segments.push(seg);
|
|
161
|
+
if (this.onSegment) {
|
|
162
|
+
this.onSegment(seg);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
this.globalFrameOffset += settledCount;
|
|
168
|
+
return segments;
|
|
169
|
+
}
|
|
170
|
+
closeAllOpenSegments() {
|
|
171
|
+
const segments = [];
|
|
172
|
+
for (let s = 0; s < NUM_SPEAKERS; s++) {
|
|
173
|
+
if (this.speakerActive[s]) {
|
|
174
|
+
this.speakerActive[s] = false;
|
|
175
|
+
const seg = {
|
|
176
|
+
speaker: s,
|
|
177
|
+
start: this.segmentStart[s] * FRAME_DURATION,
|
|
178
|
+
duration: (this.globalFrameOffset - this.segmentStart[s]) * FRAME_DURATION,
|
|
179
|
+
};
|
|
180
|
+
segments.push(seg);
|
|
181
|
+
if (this.onSegment) {
|
|
182
|
+
this.onSegment(seg);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return segments;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
//# sourceMappingURL=DiarizeStream.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DiarizeStream.js","sourceRoot":"","sources":["../src/DiarizeStream.ts"],"names":[],"mappings":"AAUA,MAAM,YAAY,GAAG,CAAC,CAAC;AACvB,MAAM,cAAc,GAAG,IAAI,CAAC,CAAC,wDAAwD;AAErF;;;;GAIG;AACH,MAAM,OAAO,aAAa;IAkBxB,YAAY,OAAyB,EAAE,UAAgC,EAAE;QACvE,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,GAAG,CAAC;QAC1C,IAAI,CAAC,eAAe,GAAG,OAAO,CAAC,YAAY,IAAI,EAAE,CAAC;QAClD,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;QACnC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;QAEjC,IAAI,IAAI,CAAC,eAAe,GAAG,CAAC,IAAI,IAAI,CAAC,eAAe,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAC/D,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;QACjE,CAAC;QAED,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,eAAe,GAAG,CAAC,CAAC,CAAC;QACjD,IAAI,CAAC,aAAa,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,GAAG,YAAY,CAAC,CAAC;QAC9D,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC;QACpB,IAAI,CAAC,iBAAiB,GAAG,CAAC,CAAC;QAE3B,IAAI,CAAC,aAAa,GAAG,IAAI,KAAK,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACzD,IAAI,CAAC,YAAY,GAAG,IAAI,KAAK,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpD,IAAI,CAAC,OAAO,GAAG,KAAK,CAAC;IACvB,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,IAAI,CAAC,KAAmB;QAC5B,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAE9C,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;QACvD,CAAC;QAED,IAAI,MAAM,CAAC,UAAU,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;QACzC,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;QAEhF,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;QACxD,MAAM,QAAQ,GAAG,IAAI,UAAU,CAAC,WAAW,GAAG,YAAY,CAAC,CAAC;QAE5D,IAAI,IAAI,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;YACxB,QAAQ,CAAC,GAAG,CACV,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,GAAG,YAAY,CAAC,EAC9D,CAAC,CACF,CAAC;QACJ,CAAC;QACD,QAAQ,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,UAAU,GAAG,YAAY,CAAC,CAAC;QAExD,2EAA2E;QAC3E,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1D,MAAM,cAAc,GAAG,YAAY,CAAC;QACpC,MAAM,cAAc,GAAG,WAAW,GAAG,YAAY,CAAC;QAElD,MAAM,QAAQ,GAAG,IAAI,CAAC,oBAAoB,CAAC,QAAQ,EAAE,WAAW,EAAE,YAAY,CAAC,CAAC;QAEhF,IAAI,CAAC,UAAU,GAAG,cAAc,CAAC;QACjC,IAAI,cAAc,GAAG,CAAC,EAAE,CAAC;YACvB,IAAI,CAAC,aAAa,CAAC,GAAG,CACpB,QAAQ,CAAC,QAAQ,CAAC,cAAc,GAAG,YAAY,EAAE,WAAW,GAAG,YAAY,CAAC,EAC5E,CAAC,CACF,CAAC;QACJ,CAAC;QAED,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,QAAQ,EAAE,CAAC;IACrD,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,KAAK;QACT,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QAE1C,IAAI,IAAI,CAAC,QAAQ,IAAI,MAAM,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;YAC3C,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;QACvD,CAAC;QAED,MAAM,SAAS,GAAG,MAAM,CAAC,UAAU,GAAG,CAAC;YACrC,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,UAAU,CAAC;YAC/D,CAAC,CAAC,IAAI,UAAU,CAAC,CAAC,CAAC,CAAC;QAEtB,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;QAExD,IAAI,WAAW,KAAK,CAAC,EAAE,CAAC;YACtB,MAAM,QAAQ,GAAG,IAAI,CAAC,oBAAoB,EAAE,CAAC;YAC7C,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC;QACrC,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,UAAU,CAAC,WAAW,GAAG,YAAY,CAAC,CAAC;QAC5D,IAAI,IAAI,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;YACxB,QAAQ,CAAC,GAAG,CACV,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC,EAAE,IAAI,CAAC,UAAU,GAAG,YAAY,CAAC,EAC9D,CAAC,CACF,CAAC;QACJ,CAAC;QACD,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,QAAQ,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,UAAU,GAAG,YAAY,CAAC,CAAC;QAC1D,CAAC;QAED,0EAA0E;QAC1E,MAAM,QAAQ,GAAG,IAAI,CAAC,oBAAoB,CAAC,QAAQ,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC;QAE/E,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC;QAEpB,MAAM,eAAe,GAAG,IAAI,CAAC,oBAAoB,EAAE,CAAC;QACpD,KAAK,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;YAClC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACrB,CAAC;QAED,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,QAAQ,EAAE,CAAC;IACrD,CAAC;IAED,KAAK;QACH,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,CAAC;QACD,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QACrB,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC;QACpB,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,IAAI,CAAC,iBAAiB,GAAG,CAAC,CAAC;QAC3B,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC/B,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC5B,CAAC;IAED,KAAK;QACH,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAClB,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YACrB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;IAED,IAAI,WAAW;QACb,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO,CAAC,CAAC;QAC3B,OAAO,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC;IAClC,CAAC;IAED,IAAI,QAAQ;QACV,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IAEO,iBAAiB,CAAC,WAAyB,EAAE,UAAkB;QACrE,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,UAAU,GAAG,YAAY,CAAC,CAAC;QACzD,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC;QAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,GAAG,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;YACnD,MAAM,CAAC,CAAC,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9C,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;;OAIG;IACK,oBAAoB,CAC1B,QAAoB,EACpB,WAAmB,EACnB,YAAoB;QAEpB,MAAM,QAAQ,GAAqB,EAAE,CAAC;QACtC,MAAM,GAAG,GAAG,IAAI,CAAC,eAAe,CAAC;QACjC,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;QAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,IAAI,IAAI,GAAG,CAAC,CAAC;gBACb,MAAM,EAAE,GAAG,CAAC,GAAG,IAAI,CAAC;gBACpB,MAAM,EAAE,GAAG,EAAE,GAAG,GAAG,CAAC;gBACpB,KAAK,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC7B,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,WAAW,EAAE,CAAC;wBAC9B,IAAI,IAAI,QAAQ,CAAC,CAAC,GAAG,YAAY,GAAG,CAAC,CAAC,CAAC;oBACzC,CAAC;gBACH,CAAC;gBACD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,GAAG,CAAC;gBAE9B,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,GAAG,CAAC,CAAC;gBAE/C,IAAI,MAAM,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC;oBACrC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;oBAC7B,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,GAAG,WAAW,CAAC;gBACrC,CAAC;qBAAM,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC5C,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC;oBAC9B,MAAM,GAAG,GAAmB;wBAC1B,OAAO,EAAE,CAAC;wBACV,KAAK,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,GAAG,cAAc;wBAC5C,QAAQ,EAAE,CAAC,WAAW,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,GAAG,cAAc;qBAChE,CAAC;oBACF,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;oBACnB,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;wBACnB,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;oBACtB,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,IAAI,CAAC,iBAAiB,IAAI,YAAY,CAAC;QACvC,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEO,oBAAoB;QAC1B,MAAM,QAAQ,GAAqB,EAAE,CAAC;QACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,IAAI,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC1B,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC;gBAC9B,MAAM,GAAG,GAAmB;oBAC1B,OAAO,EAAE,CAAC;oBACV,KAAK,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,GAAG,cAAc;oBAC5C,QAAQ,EAAE,CAAC,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,GAAG,cAAc;iBAC3E,CAAC;gBACF,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACnB,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;oBACnB,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;gBACtB,CAAC;YACH,CAAC;QACH,CAAC;QACD,OAAO,QAAQ,CAAC;IAClB,CAAC;CACF"}
|
package/dist/Sortformer.d.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* TypeScript wrapper for the native SortFormer speaker diarization model
|
|
3
3
|
*/
|
|
4
|
-
import type { LoadOptions, DiarizeOptions, DiarizeResult, StreamingSessionOptions } from './types.js';
|
|
4
|
+
import type { LoadOptions, DiarizeOptions, DiarizeResult, StreamingSessionOptions, DiarizeStreamOptions } from './types.js';
|
|
5
5
|
import { StreamingSession } from './StreamingSession.js';
|
|
6
|
+
import { DiarizeStream } from './DiarizeStream.js';
|
|
6
7
|
/**
|
|
7
8
|
* SortFormer speaker diarization model wrapper
|
|
8
9
|
*
|
|
@@ -40,7 +41,11 @@ export declare class Sortformer {
|
|
|
40
41
|
*/
|
|
41
42
|
static load(modelPath: string, options?: LoadOptions): Promise<Sortformer>;
|
|
42
43
|
/**
|
|
43
|
-
* Run diarization inference on audio samples
|
|
44
|
+
* Run offline diarization inference on audio samples
|
|
45
|
+
*
|
|
46
|
+
* This method processes the entire audio at once in offline mode.
|
|
47
|
+
* For streaming diarization with latency control, use createStreamingSession()
|
|
48
|
+
* or createDiarizeStream() instead.
|
|
44
49
|
*
|
|
45
50
|
* @param audio - Audio samples as Float32Array (16kHz mono)
|
|
46
51
|
* @param options - Optional diarization configuration
|
|
@@ -50,8 +55,6 @@ export declare class Sortformer {
|
|
|
50
55
|
* @example
|
|
51
56
|
* ```typescript
|
|
52
57
|
* const result = await model.diarize(audioData, {
|
|
53
|
-
* mode: 'streaming',
|
|
54
|
-
* latency: '2s',
|
|
55
58
|
* threshold: 0.5,
|
|
56
59
|
* medianFilter: 11
|
|
57
60
|
* });
|
|
@@ -100,5 +103,6 @@ export declare class Sortformer {
|
|
|
100
103
|
* ```
|
|
101
104
|
*/
|
|
102
105
|
createStreamingSession(options?: StreamingSessionOptions): StreamingSession;
|
|
106
|
+
createDiarizeStream(options?: DiarizeStreamOptions): DiarizeStream;
|
|
103
107
|
}
|
|
104
108
|
//# sourceMappingURL=Sortformer.d.ts.map
|
package/dist/Sortformer.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"Sortformer.d.ts","sourceRoot":"","sources":["../src/Sortformer.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,cAAc,EAAE,aAAa,EAAE,uBAAuB,EAAmB,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"Sortformer.d.ts","sourceRoot":"","sources":["../src/Sortformer.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,cAAc,EAAE,aAAa,EAAE,uBAAuB,EAAmB,oBAAoB,EAAE,MAAM,YAAY,CAAC;AAG7I,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEnD;;;;;;;;;;;;;GAaG;AACH,qBAAa,UAAU;IACrB,OAAO,CAAC,MAAM,CAAM;IACpB,OAAO,CAAC,MAAM,CAAkB;IAEhC;;;OAGG;IACH,OAAO;IAIP;;;;;;;;;;;;OAYG;WACU,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,UAAU,CAAC;IAehF;;;;;;;;;;;;;;;;;;;OAmBG;IACG,OAAO,CAAC,KAAK,EAAE,YAAY,EAAE,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,aAAa,CAAC;IAiEpF;;;;;;;;;;OAUG;IACH,KAAK,IAAI,IAAI;IASb;;;OAGG;IACH,QAAQ,IAAI,OAAO;IAInB;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,sBAAsB,CAAC,OAAO,CAAC,EAAE,uBAAuB,GAAG,gBAAgB;IA2B3E,mBAAmB,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,aAAa;CAyBnE"}
|
package/dist/Sortformer.js
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* TypeScript wrapper for the native SortFormer speaker diarization model
|
|
3
3
|
*/
|
|
4
|
-
import {
|
|
4
|
+
import { OFFLINE_PARAMS } from './presets.js';
|
|
5
5
|
import { getBinding } from './binding.js';
|
|
6
6
|
import { StreamingSession } from './StreamingSession.js';
|
|
7
|
+
import { DiarizeStream } from './DiarizeStream.js';
|
|
7
8
|
/**
|
|
8
9
|
* SortFormer speaker diarization model wrapper
|
|
9
10
|
*
|
|
@@ -47,18 +48,16 @@ export class Sortformer {
|
|
|
47
48
|
}
|
|
48
49
|
// Get native binding
|
|
49
50
|
const binding = getBinding();
|
|
50
|
-
// Create native model instance
|
|
51
|
-
// Default to 4 threads if not specified
|
|
52
|
-
const threads = options?.threads ?? 4;
|
|
53
|
-
if (threads < 1 || !Number.isInteger(threads)) {
|
|
54
|
-
throw new Error('threads must be a positive integer');
|
|
55
|
-
}
|
|
56
51
|
// Instantiate native model
|
|
57
|
-
const native = new binding.SortformerModel(modelPath
|
|
52
|
+
const native = new binding.SortformerModel(modelPath);
|
|
58
53
|
return new Sortformer(native);
|
|
59
54
|
}
|
|
60
55
|
/**
|
|
61
|
-
* Run diarization inference on audio samples
|
|
56
|
+
* Run offline diarization inference on audio samples
|
|
57
|
+
*
|
|
58
|
+
* This method processes the entire audio at once in offline mode.
|
|
59
|
+
* For streaming diarization with latency control, use createStreamingSession()
|
|
60
|
+
* or createDiarizeStream() instead.
|
|
62
61
|
*
|
|
63
62
|
* @param audio - Audio samples as Float32Array (16kHz mono)
|
|
64
63
|
* @param options - Optional diarization configuration
|
|
@@ -68,8 +67,6 @@ export class Sortformer {
|
|
|
68
67
|
* @example
|
|
69
68
|
* ```typescript
|
|
70
69
|
* const result = await model.diarize(audioData, {
|
|
71
|
-
* mode: 'streaming',
|
|
72
|
-
* latency: '2s',
|
|
73
70
|
* threshold: 0.5,
|
|
74
71
|
* medianFilter: 11
|
|
75
72
|
* });
|
|
@@ -98,34 +95,15 @@ export class Sortformer {
|
|
|
98
95
|
throw new Error('medianFilter must be a positive odd integer');
|
|
99
96
|
}
|
|
100
97
|
}
|
|
101
|
-
// Map user-friendly options to native format
|
|
102
|
-
const mode = options?.mode ?? 'offline';
|
|
98
|
+
// Map user-friendly options to native format (offline mode only)
|
|
103
99
|
const nativeOptions = {
|
|
104
100
|
threshold: options?.threshold ?? 0.5,
|
|
105
101
|
medianFilter: options?.medianFilter ?? 11,
|
|
102
|
+
chunkLen: OFFLINE_PARAMS.chunkLen,
|
|
103
|
+
rightContext: OFFLINE_PARAMS.rightContext,
|
|
104
|
+
fifoLen: OFFLINE_PARAMS.fifoLen,
|
|
105
|
+
spkcacheUpdatePeriod: OFFLINE_PARAMS.spkcacheUpdatePeriod,
|
|
106
106
|
};
|
|
107
|
-
// Add streaming-specific parameters if in streaming mode
|
|
108
|
-
if (mode === 'streaming') {
|
|
109
|
-
const latency = options?.latency ?? '2s';
|
|
110
|
-
const presetParams = LATENCY_PRESETS[latency];
|
|
111
|
-
if (!presetParams) {
|
|
112
|
-
throw new Error(`Unknown latency preset: ${latency}`);
|
|
113
|
-
}
|
|
114
|
-
nativeOptions.chunkLen = presetParams.chunkLen;
|
|
115
|
-
nativeOptions.rightContext = presetParams.rightContext;
|
|
116
|
-
nativeOptions.fifoLen = presetParams.fifoLen;
|
|
117
|
-
nativeOptions.spkcacheUpdatePeriod = presetParams.spkcacheUpdatePeriod;
|
|
118
|
-
}
|
|
119
|
-
else if (mode === 'offline') {
|
|
120
|
-
// Use offline parameters
|
|
121
|
-
nativeOptions.chunkLen = OFFLINE_PARAMS.chunkLen;
|
|
122
|
-
nativeOptions.rightContext = OFFLINE_PARAMS.rightContext;
|
|
123
|
-
nativeOptions.fifoLen = OFFLINE_PARAMS.fifoLen;
|
|
124
|
-
nativeOptions.spkcacheUpdatePeriod = OFFLINE_PARAMS.spkcacheUpdatePeriod;
|
|
125
|
-
}
|
|
126
|
-
else {
|
|
127
|
-
throw new Error(`Unknown diarization mode: ${mode}`);
|
|
128
|
-
}
|
|
129
107
|
// Call native diarization
|
|
130
108
|
const result = await this.native.diarize(audio, nativeOptions);
|
|
131
109
|
// Validate result structure
|
|
@@ -141,8 +119,8 @@ export class Sortformer {
|
|
|
141
119
|
if (!Number.isInteger(result.frameCount) || result.frameCount < 0) {
|
|
142
120
|
throw new Error('Native diarization result frameCount must be non-negative integer');
|
|
143
121
|
}
|
|
144
|
-
if (!Number.isInteger(result.
|
|
145
|
-
throw new Error('Native diarization result
|
|
122
|
+
if (!Number.isInteger(result.maxSpeakers) || result.maxSpeakers < 1 || result.maxSpeakers > 4) {
|
|
123
|
+
throw new Error('Native diarization result maxSpeakers must be 1-4');
|
|
146
124
|
}
|
|
147
125
|
return result;
|
|
148
126
|
}
|
|
@@ -217,5 +195,25 @@ export class Sortformer {
|
|
|
217
195
|
const nativeSession = new binding.StreamingSession(this.native, presetNum);
|
|
218
196
|
return new StreamingSession(nativeSession);
|
|
219
197
|
}
|
|
198
|
+
createDiarizeStream(options) {
|
|
199
|
+
if (this.closed) {
|
|
200
|
+
throw new Error('Model is closed. Cannot create diarize stream.');
|
|
201
|
+
}
|
|
202
|
+
const preset = options?.preset ?? '2s';
|
|
203
|
+
const presetMap = {
|
|
204
|
+
'low': 0,
|
|
205
|
+
'2s': 1,
|
|
206
|
+
'3s': 2,
|
|
207
|
+
'5s': 3,
|
|
208
|
+
};
|
|
209
|
+
const presetNum = presetMap[preset];
|
|
210
|
+
if (presetNum === undefined) {
|
|
211
|
+
throw new Error(`Unknown preset: ${preset}`);
|
|
212
|
+
}
|
|
213
|
+
const binding = getBinding();
|
|
214
|
+
const nativeSession = new binding.StreamingSession(this.native, presetNum);
|
|
215
|
+
const session = new StreamingSession(nativeSession);
|
|
216
|
+
return new DiarizeStream(session, options);
|
|
217
|
+
}
|
|
220
218
|
}
|
|
221
219
|
//# sourceMappingURL=Sortformer.js.map
|
package/dist/Sortformer.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"Sortformer.js","sourceRoot":"","sources":["../src/Sortformer.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"Sortformer.js","sourceRoot":"","sources":["../src/Sortformer.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC1C,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEnD;;;;;;;;;;;;;GAaG;AACH,MAAM,OAAO,UAAU;IAIrB;;;OAGG;IACH,YAAoB,MAAW;QANvB,WAAM,GAAY,KAAK,CAAC;QAO9B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAiB,EAAE,OAAqB;QACxD,iBAAiB;QACjB,IAAI,CAAC,SAAS,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;YAChD,MAAM,IAAI,SAAS,CAAC,sCAAsC,CAAC,CAAC;QAC9D,CAAC;QAEA,qBAAqB;QACrB,MAAM,OAAO,GAAG,UAAU,EAAE,CAAC;QAE7B,2BAA2B;QAC3B,MAAM,MAAM,GAAG,IAAI,OAAO,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QAEvD,OAAO,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;IAChC,CAAC;IAED;;;;;;;;;;;;;;;;;;;OAmBG;IACH,KAAK,CAAC,OAAO,CAAC,KAAmB,EAAE,OAAwB;QACzD,2BAA2B;QAC3B,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;QAED,uBAAuB;QACvB,IAAI,CAAC,CAAC,KAAK,YAAY,YAAY,CAAC,EAAE,CAAC;YACrC,MAAM,IAAI,SAAS,CAAC,8BAA8B,CAAC,CAAC;QACtD,CAAC;QAED,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,uBAAuB,CAAC,CAAC;QAC3C,CAAC;QAED,mBAAmB;QACnB,IAAI,OAAO,EAAE,SAAS,KAAK,SAAS,EAAE,CAAC;YACrC,IAAI,OAAO,OAAO,CAAC,SAAS,KAAK,QAAQ,IAAI,OAAO,CAAC,SAAS,GAAG,CAAC,IAAI,OAAO,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;gBAC5F,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;YAChE,CAAC;QACH,CAAC;QAED,IAAI,OAAO,EAAE,YAAY,KAAK,SAAS,EAAE,CAAC;YACxC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,YAAY,CAAC,IAAI,OAAO,CAAC,YAAY,GAAG,CAAC,IAAI,OAAO,CAAC,YAAY,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC1G,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;YACjE,CAAC;QACH,CAAC;QAEA,iEAAiE;QACjE,MAAM,aAAa,GAAQ;YACzB,SAAS,EAAE,OAAO,EAAE,SAAS,IAAI,GAAG;YACpC,YAAY,EAAE,OAAO,EAAE,YAAY,IAAI,EAAE;YACzC,QAAQ,EAAE,cAAc,CAAC,QAAQ;YACjC,YAAY,EAAE,cAAc,CAAC,YAAY;YACzC,OAAO,EAAE,cAAc,CAAC,OAAO;YAC/B,oBAAoB,EAAE,cAAc,CAAC,oBAAoB;SAC1D,CAAC;QAEH,0BAA0B;QAC1B,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,aAAa,CAAC,CAAC;QAE/D,4BAA4B;QAC5B,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;YAC1C,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;QAChE,CAAC;QAED,IAAI,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACpC,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;QACnE,CAAC;QAED,IAAI,CAAC,CAAC,MAAM,CAAC,WAAW,YAAY,YAAY,CAAC,EAAE,CAAC;YAClD,MAAM,IAAI,KAAK,CAAC,4DAA4D,CAAC,CAAC;QAChF,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,UAAU,CAAC,IAAI,MAAM,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;YAClE,MAAM,IAAI,KAAK,CAAC,mEAAmE,CAAC,CAAC;QACvF,CAAC;QAEA,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,MAAM,CAAC,WAAW,GAAG,CAAC,IAAI,MAAM,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC;YAC9F,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;QACvE,CAAC;QAEF,OAAO,MAAuB,CAAC;IACjC,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK;QACH,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,IAAI,IAAI,CAAC,MAAM,IAAI,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,KAAK,UAAU,EAAE,CAAC;gBAC3D,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;YACtB,CAAC;YACD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;QACrB,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,QAAQ;QACN,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,sBAAsB,CAAC,OAAiC;QACtD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;QACvE,CAAC;QAED,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,IAAI,CAAC;QAEvC,kCAAkC;QAClC,MAAM,SAAS,GAAoC;YACjD,KAAK,EAAE,CAAC,EAAG,gCAAgC;YAC3C,IAAI,EAAE,CAAC,EAAI,uBAAuB;YAClC,IAAI,EAAE,CAAC,EAAI,uBAAuB;YAClC,IAAI,EAAE,CAAC,EAAI,uBAAuB;SACnC,CAAC;QAEF,MAAM,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QACpC,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,mBAAmB,MAAM,EAAE,CAAC,CAAC;QAC/C,CAAC;QAED,wCAAwC;QACxC,MAAM,OAAO,GAAG,UAAU,EAAE,CAAC;QAC7B,MAAM,aAAa,GAAG,IAAI,OAAO,CAAC,gBAAgB,CAAC,IAAI,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;QAE3E,OAAO,IAAI,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAC7C,CAAC;IAED,mBAAmB,CAAC,OAA8B;QAChD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;QACpE,CAAC;QAED,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,IAAI,CAAC;QAEvC,MAAM,SAAS,GAAoC;YACjD,KAAK,EAAE,CAAC;YACR,IAAI,EAAE,CAAC;YACP,IAAI,EAAE,CAAC;YACP,IAAI,EAAE,CAAC;SACR,CAAC;QAEF,MAAM,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QACpC,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,mBAAmB,MAAM,EAAE,CAAC,CAAC;QAC/C,CAAC;QAED,MAAM,OAAO,GAAG,UAAU,EAAE,CAAC;QAC7B,MAAM,aAAa,GAAG,IAAI,OAAO,CAAC,gBAAgB,CAAC,IAAI,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;QAC3E,MAAM,OAAO,GAAG,IAAI,gBAAgB,CAAC,aAAa,CAAC,CAAC;QAEpD,OAAO,IAAI,aAAa,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAC7C,CAAC;CACF"}
|
|
@@ -49,7 +49,7 @@ export declare class StreamingSession {
|
|
|
49
49
|
* console.log(`Got ${result.frameCount} new frames`);
|
|
50
50
|
* ```
|
|
51
51
|
*/
|
|
52
|
-
feed(audio: Float32Array): FeedResult
|
|
52
|
+
feed(audio: Float32Array): Promise<FeedResult>;
|
|
53
53
|
/**
|
|
54
54
|
* Flush remaining buffered audio at end of stream
|
|
55
55
|
*
|
|
@@ -59,7 +59,7 @@ export declare class StreamingSession {
|
|
|
59
59
|
* @returns Final predictions for buffered audio
|
|
60
60
|
* @throws Error if session is closed
|
|
61
61
|
*/
|
|
62
|
-
flush(): FeedResult
|
|
62
|
+
flush(): Promise<FeedResult>;
|
|
63
63
|
/**
|
|
64
64
|
* Reset the streaming state for a new audio stream
|
|
65
65
|
*
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"StreamingSession.d.ts","sourceRoot":"","sources":["../src/StreamingSession.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAmB,MAAM,YAAY,CAAC;AAE9D;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAAM;IACpB,OAAO,CAAC,OAAO,CAAkB;IAEjC;;;;OAIG;gBACS,MAAM,EAAE,GAAG;IAIvB;;;;;;;;;;;;;OAaG;
|
|
1
|
+
{"version":3,"file":"StreamingSession.d.ts","sourceRoot":"","sources":["../src/StreamingSession.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAmB,MAAM,YAAY,CAAC;AAE9D;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAAM;IACpB,OAAO,CAAC,OAAO,CAAkB;IAEjC;;;;OAIG;gBACS,MAAM,EAAE,GAAG;IAIvB;;;;;;;;;;;;;OAaG;IACG,IAAI,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,UAAU,CAAC;IAiBpD;;;;;;;;OAQG;IACG,KAAK,IAAI,OAAO,CAAC,UAAU,CAAC;IAalC;;;;;;;OAOG;IACH,KAAK,IAAI,IAAI;IAOb;;;;;OAKG;IACH,KAAK,IAAI,IAAI;IASb;;OAEG;IACH,IAAI,WAAW,IAAI,MAAM,CAKxB;IAED;;OAEG;IACH,IAAI,QAAQ,IAAI,OAAO,CAEtB;CACF"}
|
package/dist/StreamingSession.js
CHANGED
|
@@ -49,14 +49,14 @@ export class StreamingSession {
|
|
|
49
49
|
* console.log(`Got ${result.frameCount} new frames`);
|
|
50
50
|
* ```
|
|
51
51
|
*/
|
|
52
|
-
feed(audio) {
|
|
52
|
+
async feed(audio) {
|
|
53
53
|
if (this._closed) {
|
|
54
54
|
throw new Error('Session is closed');
|
|
55
55
|
}
|
|
56
56
|
if (!(audio instanceof Float32Array)) {
|
|
57
57
|
throw new TypeError('audio must be a Float32Array');
|
|
58
58
|
}
|
|
59
|
-
const result = this.native.feed(audio);
|
|
59
|
+
const result = await this.native.feed(audio);
|
|
60
60
|
return {
|
|
61
61
|
predictions: result.predictions,
|
|
62
62
|
frameCount: result.frameCount,
|
|
@@ -71,11 +71,11 @@ export class StreamingSession {
|
|
|
71
71
|
* @returns Final predictions for buffered audio
|
|
72
72
|
* @throws Error if session is closed
|
|
73
73
|
*/
|
|
74
|
-
flush() {
|
|
74
|
+
async flush() {
|
|
75
75
|
if (this._closed) {
|
|
76
76
|
throw new Error('Session is closed');
|
|
77
77
|
}
|
|
78
|
-
const result = this.native.flush();
|
|
78
|
+
const result = await this.native.flush();
|
|
79
79
|
return {
|
|
80
80
|
predictions: result.predictions,
|
|
81
81
|
frameCount: result.frameCount,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"StreamingSession.js","sourceRoot":"","sources":["../src/StreamingSession.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,OAAO,gBAAgB;IAI3B;;;;OAIG;IACH,YAAY,MAAW;QAPf,YAAO,GAAY,KAAK,CAAC;QAQ/B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;;;;;;;;;;;;OAaG;IACH,IAAI,CAAC,KAAmB;
|
|
1
|
+
{"version":3,"file":"StreamingSession.js","sourceRoot":"","sources":["../src/StreamingSession.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,OAAO,gBAAgB;IAI3B;;;;OAIG;IACH,YAAY,MAAW;QAPf,YAAO,GAAY,KAAK,CAAC;QAQ/B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;;;;;;;;;;;;OAaG;IACH,KAAK,CAAC,IAAI,CAAC,KAAmB;QAC5B,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC;QACvC,CAAC;QAED,IAAI,CAAC,CAAC,KAAK,YAAY,YAAY,CAAC,EAAE,CAAC;YACrC,MAAM,IAAI,SAAS,CAAC,8BAA8B,CAAC,CAAC;QACtD,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAE7C,OAAO;YACL,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,UAAU,EAAE,MAAM,CAAC,UAAU;SAC9B,CAAC;IACJ,CAAC;IAED;;;;;;;;OAQG;IACH,KAAK,CAAC,KAAK;QACT,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC;QACvC,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QAEzC,OAAO;YACL,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,UAAU,EAAE,MAAM,CAAC,UAAU;SAC9B,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK;QACH,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC;QACvC,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;IACtB,CAAC;IAED;;;;;OAKG;IACH,KAAK;QACH,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAClB,IAAI,IAAI,CAAC,MAAM,IAAI,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,KAAK,UAAU,EAAE,CAAC;gBAC3D,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;YACtB,CAAC;YACD,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,IAAI,WAAW;QACb,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,OAAO,CAAC,CAAC;QACX,CAAC;QACD,OAAO,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,CAAC;IACtC,CAAC;IAED;;OAEG;IACH,IAAI,QAAQ;QACV,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;CACF"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
export { Sortformer } from './Sortformer.js';
|
|
2
2
|
export { StreamingSession } from './StreamingSession.js';
|
|
3
|
-
export
|
|
3
|
+
export { DiarizeStream } from './DiarizeStream.js';
|
|
4
|
+
export type { LoadOptions, DiarizeOptions, DiarizeResult, LatencyPreset, StreamingPreset, StreamingSessionOptions, FeedResult, SpeakerSegment, SegmentCallback, FrameCallback, DiarizeStreamOptions, DiarizeStreamFeedResult, DiarizeStreamFlushResult, } from './types.js';
|
|
4
5
|
export { LATENCY_PRESETS, OFFLINE_PARAMS } from './presets.js';
|
|
5
6
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,YAAY,EACV,WAAW,EACX,cAAc,EACd,aAAa,EACb,aAAa,EACb,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,YAAY,EACV,WAAW,EACX,cAAc,EACd,aAAa,EACb,aAAa,EACb,eAAe,EACf,uBAAuB,EACvB,UAAU,EACV,cAAc,EACd,eAAe,EACf,aAAa,EACb,oBAAoB,EACpB,uBAAuB,EACvB,wBAAwB,GACzB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC"}
|
package/dist/index.js
CHANGED
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AACzD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAgBnD,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC"}
|
package/dist/types.d.ts
CHANGED
|
@@ -1,11 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* TypeScript type definitions for streaming-sortformer-node
|
|
3
3
|
*/
|
|
4
|
-
/**
|
|
5
|
-
* Diarization mode: offline processes entire audio at once,
|
|
6
|
-
* streaming processes audio in chunks with latency control
|
|
7
|
-
*/
|
|
8
|
-
export type DiarizeMode = 'offline' | 'streaming';
|
|
9
4
|
/**
|
|
10
5
|
* Latency preset for streaming mode
|
|
11
6
|
* - 'low': ~188ms latency, minimal buffering
|
|
@@ -18,27 +13,11 @@ export type LatencyPreset = 'low' | '2s' | '3s' | '5s';
|
|
|
18
13
|
* Options for loading a SortFormer model
|
|
19
14
|
*/
|
|
20
15
|
export interface LoadOptions {
|
|
21
|
-
/**
|
|
22
|
-
* Number of CPU threads to use for inference
|
|
23
|
-
* @default auto-detected based on CPU cores
|
|
24
|
-
*/
|
|
25
|
-
threads?: number;
|
|
26
16
|
}
|
|
27
17
|
/**
|
|
28
18
|
* Options for diarization inference
|
|
29
19
|
*/
|
|
30
20
|
export interface DiarizeOptions {
|
|
31
|
-
/**
|
|
32
|
-
* Diarization mode: 'offline' or 'streaming'
|
|
33
|
-
* @default 'offline'
|
|
34
|
-
*/
|
|
35
|
-
mode?: DiarizeMode;
|
|
36
|
-
/**
|
|
37
|
-
* Latency preset for streaming mode
|
|
38
|
-
* Only used when mode='streaming'
|
|
39
|
-
* @default '2s'
|
|
40
|
-
*/
|
|
41
|
-
latency?: LatencyPreset;
|
|
42
21
|
/**
|
|
43
22
|
* Speaker activity threshold (0.0 to 1.0)
|
|
44
23
|
* Frames with prediction >= threshold are considered active
|
|
@@ -72,9 +51,9 @@ export interface DiarizeResult {
|
|
|
72
51
|
*/
|
|
73
52
|
frameCount: number;
|
|
74
53
|
/**
|
|
75
|
-
*
|
|
54
|
+
* Maximum number of speakers the model outputs (always 4)
|
|
76
55
|
*/
|
|
77
|
-
|
|
56
|
+
maxSpeakers: number;
|
|
78
57
|
}
|
|
79
58
|
/**
|
|
80
59
|
* Streaming preset type
|
|
@@ -104,4 +83,44 @@ export interface FeedResult {
|
|
|
104
83
|
*/
|
|
105
84
|
frameCount: number;
|
|
106
85
|
}
|
|
86
|
+
/** A completed speaker segment emitted during streaming */
|
|
87
|
+
export interface SpeakerSegment {
|
|
88
|
+
/** Speaker index (0-3) */
|
|
89
|
+
speaker: number;
|
|
90
|
+
/** Segment start time in seconds */
|
|
91
|
+
start: number;
|
|
92
|
+
/** Segment duration in seconds */
|
|
93
|
+
duration: number;
|
|
94
|
+
}
|
|
95
|
+
/** Callback for completed segments */
|
|
96
|
+
export type SegmentCallback = (segment: SpeakerSegment) => void;
|
|
97
|
+
/** Callback for raw frame predictions */
|
|
98
|
+
export type FrameCallback = (predictions: Float32Array, frameCount: number) => void;
|
|
99
|
+
/** Options for creating a DiarizeStream */
|
|
100
|
+
export interface DiarizeStreamOptions {
|
|
101
|
+
/** Latency preset (default: '2s') */
|
|
102
|
+
preset?: StreamingPreset;
|
|
103
|
+
/** Speaker activity threshold 0.0-1.0 (default: 0.5) */
|
|
104
|
+
threshold?: number;
|
|
105
|
+
/** Median filter window size, must be odd >= 1 (default: 11) */
|
|
106
|
+
medianFilter?: number;
|
|
107
|
+
/** Called when a speaker segment completes */
|
|
108
|
+
onSegment?: SegmentCallback;
|
|
109
|
+
/** Called with raw frame predictions from each feed() (optional, for power users) */
|
|
110
|
+
onFrames?: FrameCallback;
|
|
111
|
+
}
|
|
112
|
+
/** Result from DiarizeStream.feed() */
|
|
113
|
+
export interface DiarizeStreamFeedResult {
|
|
114
|
+
/** Number of new frames from model inference */
|
|
115
|
+
frameCount: number;
|
|
116
|
+
/** Segments completed during this feed (also delivered via onSegment callback) */
|
|
117
|
+
segments: SpeakerSegment[];
|
|
118
|
+
}
|
|
119
|
+
/** Result from DiarizeStream.flush() */
|
|
120
|
+
export interface DiarizeStreamFlushResult {
|
|
121
|
+
/** Number of final frames */
|
|
122
|
+
frameCount: number;
|
|
123
|
+
/** All segments that completed during flush */
|
|
124
|
+
segments: SpeakerSegment[];
|
|
125
|
+
}
|
|
107
126
|
//# sourceMappingURL=types.d.ts.map
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;;;;;GAMG;AACH,MAAM,MAAM,aAAa,GAAG,KAAK,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC;AAEvD;;GAEG;AACH,MAAM,WAAW,WAAW;CAAG;AAE/B;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B;;;OAGG;IACH,IAAI,EAAE,MAAM,CAAC;IAEb;;;;OAIG;IACH,WAAW,EAAE,YAAY,CAAC;IAE1B;;OAEG;IACH,UAAU,EAAE,MAAM,CAAC;IAElB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,MAAM,eAAe,GAAG,KAAK,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC;AAEzD;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC;;;OAGG;IACH,MAAM,CAAC,EAAE,eAAe,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB;;;OAGG;IACH,WAAW,EAAE,YAAY,CAAC;IAE1B;;OAEG;IACH,UAAU,EAAE,MAAM,CAAC;CACpB;AAMD,2DAA2D;AAC3D,MAAM,WAAW,cAAc;IAC7B,0BAA0B;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,oCAAoC;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,kCAAkC;IAClC,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,sCAAsC;AACtC,MAAM,MAAM,eAAe,GAAG,CAAC,OAAO,EAAE,cAAc,KAAK,IAAI,CAAC;AAEhE,yCAAyC;AACzC,MAAM,MAAM,aAAa,GAAG,CAAC,WAAW,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,KAAK,IAAI,CAAC;AAEpF,2CAA2C;AAC3C,MAAM,WAAW,oBAAoB;IACnC,qCAAqC;IACrC,MAAM,CAAC,EAAE,eAAe,CAAC;IACzB,wDAAwD;IACxD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gEAAgE;IAChE,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,8CAA8C;IAC9C,SAAS,CAAC,EAAE,eAAe,CAAC;IAC5B,qFAAqF;IACrF,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B;AAED,uCAAuC;AACvC,MAAM,WAAW,uBAAuB;IACtC,gDAAgD;IAChD,UAAU,EAAE,MAAM,CAAC;IACnB,kFAAkF;IAClF,QAAQ,EAAE,cAAc,EAAE,CAAC;CAC5B;AAED,wCAAwC;AACxC,MAAM,WAAW,wBAAwB;IACvC,6BAA6B;IAC7B,UAAU,EAAE,MAAM,CAAC;IACnB,+CAA+C;IAC/C,QAAQ,EAAE,cAAc,EAAE,CAAC;CAC5B"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "streaming-sortformer-node",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1",
|
|
4
4
|
"description": "Node.js bindings for SortFormer streaming speaker diarization",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -17,8 +17,8 @@
|
|
|
17
17
|
"cmake-js": "^7.0.0"
|
|
18
18
|
},
|
|
19
19
|
"optionalDependencies": {
|
|
20
|
-
"@streaming-sortformer-node/darwin-
|
|
21
|
-
"@streaming-sortformer-node/darwin-
|
|
20
|
+
"@streaming-sortformer-node/darwin-arm64": "0.1.1",
|
|
21
|
+
"@streaming-sortformer-node/darwin-x64": "0.1.0"
|
|
22
22
|
},
|
|
23
23
|
"engines": {
|
|
24
24
|
"node": ">=18.0.0"
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
import { StreamingSession } from './StreamingSession.js';
|
|
2
|
+
import type {
|
|
3
|
+
SpeakerSegment,
|
|
4
|
+
SegmentCallback,
|
|
5
|
+
FrameCallback,
|
|
6
|
+
DiarizeStreamOptions,
|
|
7
|
+
DiarizeStreamFeedResult,
|
|
8
|
+
DiarizeStreamFlushResult,
|
|
9
|
+
} from './types.js';
|
|
10
|
+
|
|
11
|
+
const NUM_SPEAKERS = 4;
|
|
12
|
+
const FRAME_DURATION = 0.08; // 80ms per frame (160 samples * 8x subsampling / 16kHz)
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Real-time diarization stream that wraps StreamingSession and performs
|
|
16
|
+
* threshold → median filter → segment extraction, emitting completed
|
|
17
|
+
* speaker segments via callbacks as audio is fed incrementally.
|
|
18
|
+
*/
|
|
19
|
+
export class DiarizeStream {
|
|
20
|
+
private session: StreamingSession;
|
|
21
|
+
private threshold: number;
|
|
22
|
+
private medianFilterWin: number;
|
|
23
|
+
private half: number;
|
|
24
|
+
private onSegment?: SegmentCallback;
|
|
25
|
+
private onFrames?: FrameCallback;
|
|
26
|
+
|
|
27
|
+
private pendingBinary: Uint8Array; // unsettled binary frames (half × NUM_SPEAKERS, row-major)
|
|
28
|
+
private pendingLen: number;
|
|
29
|
+
|
|
30
|
+
private globalFrameOffset: number;
|
|
31
|
+
|
|
32
|
+
private speakerActive: boolean[];
|
|
33
|
+
private segmentStart: number[];
|
|
34
|
+
|
|
35
|
+
private _closed: boolean;
|
|
36
|
+
|
|
37
|
+
constructor(session: StreamingSession, options: DiarizeStreamOptions = {}) {
|
|
38
|
+
this.session = session;
|
|
39
|
+
this.threshold = options.threshold ?? 0.5;
|
|
40
|
+
this.medianFilterWin = options.medianFilter ?? 11;
|
|
41
|
+
this.onSegment = options.onSegment;
|
|
42
|
+
this.onFrames = options.onFrames;
|
|
43
|
+
|
|
44
|
+
if (this.medianFilterWin < 1 || this.medianFilterWin % 2 === 0) {
|
|
45
|
+
throw new Error('medianFilter must be a positive odd integer');
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
this.half = Math.floor(this.medianFilterWin / 2);
|
|
49
|
+
this.pendingBinary = new Uint8Array(this.half * NUM_SPEAKERS);
|
|
50
|
+
this.pendingLen = 0;
|
|
51
|
+
this.globalFrameOffset = 0;
|
|
52
|
+
|
|
53
|
+
this.speakerActive = new Array(NUM_SPEAKERS).fill(false);
|
|
54
|
+
this.segmentStart = new Array(NUM_SPEAKERS).fill(0);
|
|
55
|
+
this._closed = false;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Feed audio samples, run inference, and post-process predictions.
|
|
60
|
+
* Returns segments that completed during this call.
|
|
61
|
+
*/
|
|
62
|
+
async feed(audio: Float32Array): Promise<DiarizeStreamFeedResult> {
|
|
63
|
+
if (this._closed) {
|
|
64
|
+
throw new Error('DiarizeStream is closed');
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const result = await this.session.feed(audio);
|
|
68
|
+
|
|
69
|
+
if (this.onFrames) {
|
|
70
|
+
this.onFrames(result.predictions, result.frameCount);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (result.frameCount === 0) {
|
|
74
|
+
return { frameCount: 0, segments: [] };
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const newBinary = this.thresholdToBinary(result.predictions, result.frameCount);
|
|
78
|
+
|
|
79
|
+
const combinedLen = this.pendingLen + result.frameCount;
|
|
80
|
+
const combined = new Uint8Array(combinedLen * NUM_SPEAKERS);
|
|
81
|
+
|
|
82
|
+
if (this.pendingLen > 0) {
|
|
83
|
+
combined.set(
|
|
84
|
+
this.pendingBinary.subarray(0, this.pendingLen * NUM_SPEAKERS),
|
|
85
|
+
0,
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
combined.set(newBinary, this.pendingLen * NUM_SPEAKERS);
|
|
89
|
+
|
|
90
|
+
// Last `half` frames are unsettled (need future context); rest are settled
|
|
91
|
+
const settledCount = Math.max(0, combinedLen - this.half);
|
|
92
|
+
const unsettledStart = settledCount;
|
|
93
|
+
const unsettledCount = combinedLen - settledCount;
|
|
94
|
+
|
|
95
|
+
const segments = this.processSettledFrames(combined, combinedLen, settledCount);
|
|
96
|
+
|
|
97
|
+
this.pendingLen = unsettledCount;
|
|
98
|
+
if (unsettledCount > 0) {
|
|
99
|
+
this.pendingBinary.set(
|
|
100
|
+
combined.subarray(unsettledStart * NUM_SPEAKERS, combinedLen * NUM_SPEAKERS),
|
|
101
|
+
0,
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return { frameCount: result.frameCount, segments };
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Flush remaining buffered frames at end of stream.
|
|
110
|
+
* Zero-pads future frames to settle all pending predictions.
|
|
111
|
+
*/
|
|
112
|
+
async flush(): Promise<DiarizeStreamFlushResult> {
|
|
113
|
+
if (this._closed) {
|
|
114
|
+
throw new Error('DiarizeStream is closed');
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const result = await this.session.flush();
|
|
118
|
+
|
|
119
|
+
if (this.onFrames && result.frameCount > 0) {
|
|
120
|
+
this.onFrames(result.predictions, result.frameCount);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const newBinary = result.frameCount > 0
|
|
124
|
+
? this.thresholdToBinary(result.predictions, result.frameCount)
|
|
125
|
+
: new Uint8Array(0);
|
|
126
|
+
|
|
127
|
+
const combinedLen = this.pendingLen + result.frameCount;
|
|
128
|
+
|
|
129
|
+
if (combinedLen === 0) {
|
|
130
|
+
const segments = this.closeAllOpenSegments();
|
|
131
|
+
return { frameCount: 0, segments };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const combined = new Uint8Array(combinedLen * NUM_SPEAKERS);
|
|
135
|
+
if (this.pendingLen > 0) {
|
|
136
|
+
combined.set(
|
|
137
|
+
this.pendingBinary.subarray(0, this.pendingLen * NUM_SPEAKERS),
|
|
138
|
+
0,
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
if (newBinary.length > 0) {
|
|
142
|
+
combined.set(newBinary, this.pendingLen * NUM_SPEAKERS);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// On flush, all frames settle (zero-pad future, matching C edge behavior)
|
|
146
|
+
const segments = this.processSettledFrames(combined, combinedLen, combinedLen);
|
|
147
|
+
|
|
148
|
+
this.pendingLen = 0;
|
|
149
|
+
|
|
150
|
+
const closingSegments = this.closeAllOpenSegments();
|
|
151
|
+
for (const seg of closingSegments) {
|
|
152
|
+
segments.push(seg);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return { frameCount: result.frameCount, segments };
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
reset(): void {
|
|
159
|
+
if (this._closed) {
|
|
160
|
+
throw new Error('DiarizeStream is closed');
|
|
161
|
+
}
|
|
162
|
+
this.session.reset();
|
|
163
|
+
this.pendingLen = 0;
|
|
164
|
+
this.pendingBinary.fill(0);
|
|
165
|
+
this.globalFrameOffset = 0;
|
|
166
|
+
this.speakerActive.fill(false);
|
|
167
|
+
this.segmentStart.fill(0);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
close(): void {
|
|
171
|
+
if (!this._closed) {
|
|
172
|
+
this.session.close();
|
|
173
|
+
this._closed = true;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
get totalFrames(): number {
|
|
178
|
+
if (this._closed) return 0;
|
|
179
|
+
return this.session.totalFrames;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
get isClosed(): boolean {
|
|
183
|
+
return this._closed;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
private thresholdToBinary(predictions: Float32Array, frameCount: number): Uint8Array {
|
|
187
|
+
const binary = new Uint8Array(frameCount * NUM_SPEAKERS);
|
|
188
|
+
const thresh = this.threshold;
|
|
189
|
+
for (let i = 0; i < frameCount * NUM_SPEAKERS; i++) {
|
|
190
|
+
binary[i] = predictions[i] > thresh ? 1 : 0;
|
|
191
|
+
}
|
|
192
|
+
return binary;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Median filter + segment detection on settled frames.
|
|
197
|
+
* `combined` has `combinedLen` binary frames; only first `settledCount` are emitted.
|
|
198
|
+
* Remaining frames provide look-ahead context. Zero-pads out-of-bounds (matches C impl).
|
|
199
|
+
*/
|
|
200
|
+
private processSettledFrames(
|
|
201
|
+
combined: Uint8Array,
|
|
202
|
+
combinedLen: number,
|
|
203
|
+
settledCount: number,
|
|
204
|
+
): SpeakerSegment[] {
|
|
205
|
+
const segments: SpeakerSegment[] = [];
|
|
206
|
+
const win = this.medianFilterWin;
|
|
207
|
+
const half = this.half;
|
|
208
|
+
|
|
209
|
+
for (let s = 0; s < NUM_SPEAKERS; s++) {
|
|
210
|
+
for (let i = 0; i < settledCount; i++) {
|
|
211
|
+
let ones = 0;
|
|
212
|
+
const lo = i - half;
|
|
213
|
+
const hi = lo + win;
|
|
214
|
+
for (let j = lo; j < hi; j++) {
|
|
215
|
+
if (j >= 0 && j < combinedLen) {
|
|
216
|
+
ones += combined[j * NUM_SPEAKERS + s];
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
const active = ones * 2 > win;
|
|
220
|
+
|
|
221
|
+
const globalFrame = this.globalFrameOffset + i;
|
|
222
|
+
|
|
223
|
+
if (active && !this.speakerActive[s]) {
|
|
224
|
+
this.speakerActive[s] = true;
|
|
225
|
+
this.segmentStart[s] = globalFrame;
|
|
226
|
+
} else if (!active && this.speakerActive[s]) {
|
|
227
|
+
this.speakerActive[s] = false;
|
|
228
|
+
const seg: SpeakerSegment = {
|
|
229
|
+
speaker: s,
|
|
230
|
+
start: this.segmentStart[s] * FRAME_DURATION,
|
|
231
|
+
duration: (globalFrame - this.segmentStart[s]) * FRAME_DURATION,
|
|
232
|
+
};
|
|
233
|
+
segments.push(seg);
|
|
234
|
+
if (this.onSegment) {
|
|
235
|
+
this.onSegment(seg);
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
this.globalFrameOffset += settledCount;
|
|
242
|
+
return segments;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
private closeAllOpenSegments(): SpeakerSegment[] {
|
|
246
|
+
const segments: SpeakerSegment[] = [];
|
|
247
|
+
for (let s = 0; s < NUM_SPEAKERS; s++) {
|
|
248
|
+
if (this.speakerActive[s]) {
|
|
249
|
+
this.speakerActive[s] = false;
|
|
250
|
+
const seg: SpeakerSegment = {
|
|
251
|
+
speaker: s,
|
|
252
|
+
start: this.segmentStart[s] * FRAME_DURATION,
|
|
253
|
+
duration: (this.globalFrameOffset - this.segmentStart[s]) * FRAME_DURATION,
|
|
254
|
+
};
|
|
255
|
+
segments.push(seg);
|
|
256
|
+
if (this.onSegment) {
|
|
257
|
+
this.onSegment(seg);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
return segments;
|
|
262
|
+
}
|
|
263
|
+
}
|