@storyteller-platform/ghost-story 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +611 -0
- package/README.md +18 -0
- package/dist/api/APIOptions.cjs +16 -0
- package/dist/api/APIOptions.d.cts +18 -0
- package/dist/api/APIOptions.d.ts +18 -0
- package/dist/api/APIOptions.js +0 -0
- package/dist/api/Recognition.cjs +263 -0
- package/dist/api/Recognition.d.cts +77 -0
- package/dist/api/Recognition.d.ts +77 -0
- package/dist/api/Recognition.js +233 -0
- package/dist/api/VoiceActivityDetection.cjs +77 -0
- package/dist/api/VoiceActivityDetection.d.cts +24 -0
- package/dist/api/VoiceActivityDetection.d.ts +24 -0
- package/dist/api/VoiceActivityDetection.js +43 -0
- package/dist/audio/AudioConverter.cjs +331 -0
- package/dist/audio/AudioConverter.d.cts +53 -0
- package/dist/audio/AudioConverter.d.ts +53 -0
- package/dist/audio/AudioConverter.js +310 -0
- package/dist/audio/AudioFormat.cjs +151 -0
- package/dist/audio/AudioFormat.d.cts +25 -0
- package/dist/audio/AudioFormat.d.ts +25 -0
- package/dist/audio/AudioFormat.js +123 -0
- package/dist/audio/AudioSource.cjs +119 -0
- package/dist/audio/AudioSource.d.cts +33 -0
- package/dist/audio/AudioSource.d.ts +33 -0
- package/dist/audio/AudioSource.js +88 -0
- package/dist/audio/index.cjs +74 -0
- package/dist/audio/index.d.cts +6 -0
- package/dist/audio/index.d.ts +6 -0
- package/dist/audio/index.js +54 -0
- package/dist/cli/bin.cjs +277 -0
- package/dist/cli/bin.d.cts +1 -0
- package/dist/cli/bin.d.ts +1 -0
- package/dist/cli/bin.js +275 -0
- package/dist/cli/config.cjs +347 -0
- package/dist/cli/config.d.cts +33 -0
- package/dist/cli/config.d.ts +33 -0
- package/dist/cli/config.js +285 -0
- package/dist/cli/install.cjs +334 -0
- package/dist/cli/install.d.cts +62 -0
- package/dist/cli/install.d.ts +62 -0
- package/dist/cli/install.js +316 -0
- package/dist/cli/whisper-server.cjs +172 -0
- package/dist/cli/whisper-server.d.cts +24 -0
- package/dist/cli/whisper-server.d.ts +24 -0
- package/dist/cli/whisper-server.js +152 -0
- package/dist/config.cjs +60 -0
- package/dist/config.d.cts +12 -0
- package/dist/config.d.ts +12 -0
- package/dist/config.js +32 -0
- package/dist/convert.cjs +88 -0
- package/dist/convert.d.cts +12 -0
- package/dist/convert.d.ts +12 -0
- package/dist/convert.js +63 -0
- package/dist/encodings/Ascii.cjs +75 -0
- package/dist/encodings/Ascii.d.cts +13 -0
- package/dist/encodings/Ascii.d.ts +13 -0
- package/dist/encodings/Ascii.js +48 -0
- package/dist/encodings/Base64.cjs +155 -0
- package/dist/encodings/Base64.d.cts +5 -0
- package/dist/encodings/Base64.d.ts +5 -0
- package/dist/encodings/Base64.js +129 -0
- package/dist/encodings/TextEncodingsCommon.cjs +16 -0
- package/dist/encodings/TextEncodingsCommon.d.cts +6 -0
- package/dist/encodings/TextEncodingsCommon.d.ts +6 -0
- package/dist/encodings/TextEncodingsCommon.js +0 -0
- package/dist/index.cjs +153 -0
- package/dist/index.d.cts +15 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +140 -0
- package/dist/recognition/AmazonTranscribeSTT.cjs +188 -0
- package/dist/recognition/AmazonTranscribeSTT.d.cts +21 -0
- package/dist/recognition/AmazonTranscribeSTT.d.ts +21 -0
- package/dist/recognition/AmazonTranscribeSTT.js +160 -0
- package/dist/recognition/AzureCognitiveServicesSTT.cjs +124 -0
- package/dist/recognition/AzureCognitiveServicesSTT.d.cts +21 -0
- package/dist/recognition/AzureCognitiveServicesSTT.d.ts +21 -0
- package/dist/recognition/AzureCognitiveServicesSTT.js +95 -0
- package/dist/recognition/DeepgramSTT.cjs +172 -0
- package/dist/recognition/DeepgramSTT.d.cts +23 -0
- package/dist/recognition/DeepgramSTT.d.ts +23 -0
- package/dist/recognition/DeepgramSTT.js +153 -0
- package/dist/recognition/GoogleCloudSTT.cjs +125 -0
- package/dist/recognition/GoogleCloudSTT.d.cts +35 -0
- package/dist/recognition/GoogleCloudSTT.d.ts +35 -0
- package/dist/recognition/GoogleCloudSTT.js +107 -0
- package/dist/recognition/OpenAICloudSTT.cjs +180 -0
- package/dist/recognition/OpenAICloudSTT.d.cts +29 -0
- package/dist/recognition/OpenAICloudSTT.d.ts +29 -0
- package/dist/recognition/OpenAICloudSTT.js +150 -0
- package/dist/recognition/WhisperCppSTT.cjs +296 -0
- package/dist/recognition/WhisperCppSTT.d.cts +40 -0
- package/dist/recognition/WhisperCppSTT.d.ts +40 -0
- package/dist/recognition/WhisperCppSTT.js +275 -0
- package/dist/recognition/WhisperServerSTT.cjs +119 -0
- package/dist/recognition/WhisperServerSTT.d.cts +24 -0
- package/dist/recognition/WhisperServerSTT.d.ts +24 -0
- package/dist/recognition/WhisperServerSTT.js +105 -0
- package/dist/utilities/FileSystem.cjs +54 -0
- package/dist/utilities/FileSystem.d.cts +3 -0
- package/dist/utilities/FileSystem.d.ts +3 -0
- package/dist/utilities/FileSystem.js +20 -0
- package/dist/utilities/Locale.cjs +46 -0
- package/dist/utilities/Locale.d.cts +9 -0
- package/dist/utilities/Locale.d.ts +9 -0
- package/dist/utilities/Locale.js +20 -0
- package/dist/utilities/ObjectUtilities.cjs +41 -0
- package/dist/utilities/ObjectUtilities.d.cts +3 -0
- package/dist/utilities/ObjectUtilities.d.ts +3 -0
- package/dist/utilities/ObjectUtilities.js +7 -0
- package/dist/utilities/Timeline.cjs +120 -0
- package/dist/utilities/Timeline.d.cts +23 -0
- package/dist/utilities/Timeline.d.ts +23 -0
- package/dist/utilities/Timeline.js +94 -0
- package/dist/utilities/Timing.cjs +287 -0
- package/dist/utilities/Timing.d.cts +64 -0
- package/dist/utilities/Timing.d.ts +64 -0
- package/dist/utilities/Timing.js +256 -0
- package/dist/utilities/WhisperTimeline.cjs +344 -0
- package/dist/utilities/WhisperTimeline.d.cts +86 -0
- package/dist/utilities/WhisperTimeline.d.ts +86 -0
- package/dist/utilities/WhisperTimeline.js +313 -0
- package/dist/vad/ActiveGate.cjs +357 -0
- package/dist/vad/ActiveGate.d.cts +53 -0
- package/dist/vad/ActiveGate.d.ts +53 -0
- package/dist/vad/ActiveGate.js +329 -0
- package/dist/vad/ActiveGateOg.cjs +1366 -0
- package/dist/vad/ActiveGateOg.d.cts +33 -0
- package/dist/vad/ActiveGateOg.d.ts +33 -0
- package/dist/vad/ActiveGateOg.js +1341 -0
- package/dist/vad/Silero.cjs +174 -0
- package/dist/vad/Silero.d.cts +25 -0
- package/dist/vad/Silero.d.ts +25 -0
- package/dist/vad/Silero.js +153 -0
- package/package.json +125 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var WhisperTimeline_exports = {};
|
|
20
|
+
__export(WhisperTimeline_exports, {
|
|
21
|
+
calculateEffectiveProcessors: () => calculateEffectiveProcessors,
|
|
22
|
+
calculateWhisperSplits: () => calculateWhisperSplits,
|
|
23
|
+
compareTimelines: () => compareTimelines,
|
|
24
|
+
countProcessorBoundaries: () => countProcessorBoundaries,
|
|
25
|
+
extractCorrectedTimeline: () => extractCorrectedTimeline,
|
|
26
|
+
parseWhisperCppOutput: () => parseWhisperCppOutput,
|
|
27
|
+
parseWhisperServerOutput: () => parseWhisperServerOutput,
|
|
28
|
+
scoreTimeline: () => scoreTimeline
|
|
29
|
+
});
|
|
30
|
+
module.exports = __toCommonJS(WhisperTimeline_exports);
|
|
31
|
+
const WHISPER_SAMPLE_RATE = 16e3;
|
|
32
|
+
function calculateWhisperSplits(durationSeconds, numProcessors, sampleRate = WHISPER_SAMPLE_RATE) {
|
|
33
|
+
if (numProcessors <= 1) return [];
|
|
34
|
+
const totalSamples = Math.floor(durationSeconds * sampleRate);
|
|
35
|
+
const samplesPerProcessor = Math.floor(totalSamples / numProcessors);
|
|
36
|
+
const splits = [];
|
|
37
|
+
for (let i = 1; i < numProcessors; i++) {
|
|
38
|
+
const splitSamples = i * samplesPerProcessor;
|
|
39
|
+
const splitSeconds = splitSamples / sampleRate;
|
|
40
|
+
splits.push(splitSeconds);
|
|
41
|
+
}
|
|
42
|
+
return splits;
|
|
43
|
+
}
|
|
44
|
+
const specialTokenPattern = /\[_.+\]|<\|[a-z_]+\|>/g;
|
|
45
|
+
function parseWhisperCppOutput(transcription) {
|
|
46
|
+
return transcription.map((segment) => {
|
|
47
|
+
var _a, _b;
|
|
48
|
+
const words = [];
|
|
49
|
+
let lastTokenEndMs = 0;
|
|
50
|
+
for (const token of segment.tokens) {
|
|
51
|
+
const cleanedText = token.text.replace(specialTokenPattern, "");
|
|
52
|
+
if (cleanedText.trim().length === 0) continue;
|
|
53
|
+
const offsetFrom = ((_a = token.offsets) == null ? void 0 : _a.from) ?? lastTokenEndMs;
|
|
54
|
+
const offsetTo = ((_b = token.offsets) == null ? void 0 : _b.to) ?? lastTokenEndMs;
|
|
55
|
+
if (token.offsets) {
|
|
56
|
+
lastTokenEndMs = token.offsets.to;
|
|
57
|
+
}
|
|
58
|
+
words.push({
|
|
59
|
+
text: cleanedText,
|
|
60
|
+
start: offsetFrom / 1e3,
|
|
61
|
+
end: offsetTo / 1e3,
|
|
62
|
+
confidence: token.p
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
return {
|
|
66
|
+
text: segment.text,
|
|
67
|
+
segmentStart: segment.offsets.from / 1e3,
|
|
68
|
+
segmentEnd: segment.offsets.to / 1e3,
|
|
69
|
+
words
|
|
70
|
+
};
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
function parseWhisperServerOutput(segments) {
|
|
74
|
+
return segments.map((segment) => {
|
|
75
|
+
const words = (segment.words ?? []).map((word) => ({
|
|
76
|
+
text: word.word,
|
|
77
|
+
start: word.start,
|
|
78
|
+
end: word.end,
|
|
79
|
+
confidence: word.probability ?? 0
|
|
80
|
+
}));
|
|
81
|
+
return {
|
|
82
|
+
text: segment.text,
|
|
83
|
+
segmentStart: segment.start,
|
|
84
|
+
segmentEnd: segment.end,
|
|
85
|
+
words
|
|
86
|
+
};
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
const MS_PER_CHAR = 0.15;
|
|
90
|
+
const MAX_REASONABLE_WORD_DURATION = 5;
|
|
91
|
+
const LOW_CONFIDENCE_THRESHOLD = 0.3;
|
|
92
|
+
function estimateReasonableDuration(text) {
|
|
93
|
+
const charCount = text.trim().length;
|
|
94
|
+
return Math.max(0.1, charCount * MS_PER_CHAR);
|
|
95
|
+
}
|
|
96
|
+
function detectProcessorBoundary(segment, state) {
|
|
97
|
+
if (segment.words.length === 0) return false;
|
|
98
|
+
const firstWord = segment.words[0];
|
|
99
|
+
if (!firstWord) return false;
|
|
100
|
+
const wordStartsNearZero = firstWord.start < 1;
|
|
101
|
+
if (!wordStartsNearZero) return false;
|
|
102
|
+
const segmentJumpsForward = segment.segmentStart > state.lastSegmentEnd + 1;
|
|
103
|
+
if (segmentJumpsForward) return true;
|
|
104
|
+
const segmentGoesBackwards = segment.segmentEnd < segment.segmentStart;
|
|
105
|
+
if (segmentGoesBackwards) return true;
|
|
106
|
+
const significantTimeAccumulated = state.lastWordEnd > 30;
|
|
107
|
+
if (significantTimeAccumulated && firstWord.start < 0.5) return true;
|
|
108
|
+
return false;
|
|
109
|
+
}
|
|
110
|
+
function countProcessorBoundaries(segments) {
|
|
111
|
+
if (segments.length === 0) return 0;
|
|
112
|
+
let boundaryCount = 0;
|
|
113
|
+
const state = {
|
|
114
|
+
cumulativeOffset: 0,
|
|
115
|
+
lastSegmentEnd: 0,
|
|
116
|
+
lastWordEnd: 0
|
|
117
|
+
};
|
|
118
|
+
for (const segment of segments) {
|
|
119
|
+
if (detectProcessorBoundary(segment, state)) {
|
|
120
|
+
boundaryCount++;
|
|
121
|
+
}
|
|
122
|
+
const segmentEnd = segment.segmentEnd < segment.segmentStart ? segment.segmentStart : segment.segmentEnd;
|
|
123
|
+
state.lastSegmentEnd = segmentEnd;
|
|
124
|
+
const lastWord = segment.words[segment.words.length - 1];
|
|
125
|
+
if (lastWord) {
|
|
126
|
+
state.lastWordEnd = Math.max(state.lastWordEnd, lastWord.end);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
return boundaryCount;
|
|
130
|
+
}
|
|
131
|
+
const MIN_SECONDS_PER_PROCESSOR = 30;
|
|
132
|
+
function calculateEffectiveProcessors(durationSeconds, requestedProcessors) {
|
|
133
|
+
const maxProcessors = Math.max(
|
|
134
|
+
1,
|
|
135
|
+
Math.floor(durationSeconds / MIN_SECONDS_PER_PROCESSOR)
|
|
136
|
+
);
|
|
137
|
+
return Math.min(requestedProcessors, maxProcessors);
|
|
138
|
+
}
|
|
139
|
+
function correctWordTimestamps(word, state, segmentBounds) {
|
|
140
|
+
let startTime = word.start + state.cumulativeOffset;
|
|
141
|
+
let endTime = word.end + state.cumulativeOffset;
|
|
142
|
+
const duration = endTime - startTime;
|
|
143
|
+
if (duration > MAX_REASONABLE_WORD_DURATION && word.confidence < LOW_CONFIDENCE_THRESHOLD) {
|
|
144
|
+
const reasonableDuration = estimateReasonableDuration(word.text);
|
|
145
|
+
endTime = startTime + reasonableDuration;
|
|
146
|
+
}
|
|
147
|
+
if (startTime < state.lastWordEnd) {
|
|
148
|
+
const shift = state.lastWordEnd - startTime;
|
|
149
|
+
startTime = state.lastWordEnd;
|
|
150
|
+
endTime = endTime + shift;
|
|
151
|
+
}
|
|
152
|
+
if (endTime < startTime) {
|
|
153
|
+
endTime = startTime;
|
|
154
|
+
}
|
|
155
|
+
const segmentDuration = segmentBounds.end - segmentBounds.start;
|
|
156
|
+
if (segmentDuration > 0 && endTime > segmentBounds.end + state.cumulativeOffset) {
|
|
157
|
+
endTime = Math.max(startTime, segmentBounds.end + state.cumulativeOffset);
|
|
158
|
+
}
|
|
159
|
+
return { startTime, endTime };
|
|
160
|
+
}
|
|
161
|
+
function extractCorrectedTimeline(segments, options = {}) {
|
|
162
|
+
var _a;
|
|
163
|
+
if (segments.length === 0) return [];
|
|
164
|
+
const { splitBoundaries = [] } = options;
|
|
165
|
+
const usedSplits = /* @__PURE__ */ new Set();
|
|
166
|
+
const timeline = [];
|
|
167
|
+
const state = {
|
|
168
|
+
cumulativeOffset: 0,
|
|
169
|
+
lastSegmentEnd: 0,
|
|
170
|
+
lastWordEnd: 0
|
|
171
|
+
};
|
|
172
|
+
for (const segment of segments) {
|
|
173
|
+
const segmentStart = segment.segmentStart;
|
|
174
|
+
const segmentEnd = segment.segmentEnd < segment.segmentStart ? segment.segmentStart : segment.segmentEnd;
|
|
175
|
+
if (detectProcessorBoundary(segment, state)) {
|
|
176
|
+
const firstWord = segment.words[0];
|
|
177
|
+
const firstWordStart = (firstWord == null ? void 0 : firstWord.start) ?? 0;
|
|
178
|
+
if (splitBoundaries.length > 0) {
|
|
179
|
+
const expectedTime = state.lastWordEnd > 0 ? state.lastWordEnd : segmentStart;
|
|
180
|
+
let bestSplit = null;
|
|
181
|
+
for (const split of splitBoundaries) {
|
|
182
|
+
if (usedSplits.has(split)) continue;
|
|
183
|
+
if (Math.abs(split - expectedTime) < 30 || Math.abs(split - segmentStart) < 30) {
|
|
184
|
+
bestSplit = split;
|
|
185
|
+
break;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
if (bestSplit !== null) {
|
|
189
|
+
state.cumulativeOffset = bestSplit - firstWordStart;
|
|
190
|
+
usedSplits.add(bestSplit);
|
|
191
|
+
} else if (state.lastWordEnd > 0) {
|
|
192
|
+
state.cumulativeOffset = state.lastWordEnd - firstWordStart;
|
|
193
|
+
} else {
|
|
194
|
+
state.cumulativeOffset = segmentStart - firstWordStart;
|
|
195
|
+
}
|
|
196
|
+
} else {
|
|
197
|
+
if (state.lastWordEnd > 0) {
|
|
198
|
+
state.cumulativeOffset = state.lastWordEnd - firstWordStart;
|
|
199
|
+
} else {
|
|
200
|
+
state.cumulativeOffset = segmentStart - firstWordStart;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
for (const word of segment.words) {
|
|
205
|
+
const trimmedText = word.text.trim();
|
|
206
|
+
if (trimmedText.length === 0) continue;
|
|
207
|
+
if (trimmedText.includes("BLANK_AUDIO")) continue;
|
|
208
|
+
const { startTime, endTime } = correctWordTimestamps(word, state, {
|
|
209
|
+
start: segmentStart,
|
|
210
|
+
end: segmentEnd
|
|
211
|
+
});
|
|
212
|
+
const lastEntry = timeline[timeline.length - 1];
|
|
213
|
+
if (lastEntry && !word.text.startsWith(" ")) {
|
|
214
|
+
lastEntry.text += trimmedText;
|
|
215
|
+
if (lastEntry.confidence !== void 0) {
|
|
216
|
+
lastEntry.confidence = Math.min(lastEntry.confidence, word.confidence);
|
|
217
|
+
}
|
|
218
|
+
const mergedDuration = endTime - lastEntry.startTime;
|
|
219
|
+
const mergedConfidence = lastEntry.confidence ?? 1;
|
|
220
|
+
if (mergedDuration > MAX_REASONABLE_WORD_DURATION && mergedConfidence < LOW_CONFIDENCE_THRESHOLD) {
|
|
221
|
+
lastEntry.endTime = lastEntry.startTime + estimateReasonableDuration(lastEntry.text);
|
|
222
|
+
} else {
|
|
223
|
+
lastEntry.endTime = endTime;
|
|
224
|
+
}
|
|
225
|
+
} else {
|
|
226
|
+
timeline.push({
|
|
227
|
+
type: "word",
|
|
228
|
+
text: trimmedText,
|
|
229
|
+
startTime,
|
|
230
|
+
endTime,
|
|
231
|
+
confidence: word.confidence
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
state.lastWordEnd = ((_a = timeline[timeline.length - 1]) == null ? void 0 : _a.endTime) ?? endTime;
|
|
235
|
+
}
|
|
236
|
+
state.lastSegmentEnd = segmentEnd;
|
|
237
|
+
}
|
|
238
|
+
return ensureMonotonicTimeline(timeline);
|
|
239
|
+
}
|
|
240
|
+
function ensureMonotonicTimeline(timeline) {
|
|
241
|
+
if (timeline.length === 0) return [];
|
|
242
|
+
const sorted = [...timeline].sort((a, b) => a.startTime - b.startTime);
|
|
243
|
+
const result = [];
|
|
244
|
+
let lastEndTime = 0;
|
|
245
|
+
for (const entry of sorted) {
|
|
246
|
+
let startTime = entry.startTime;
|
|
247
|
+
let endTime = entry.endTime;
|
|
248
|
+
if (startTime < lastEndTime) {
|
|
249
|
+
const shift = lastEndTime - startTime;
|
|
250
|
+
startTime = lastEndTime;
|
|
251
|
+
endTime = endTime + shift;
|
|
252
|
+
}
|
|
253
|
+
if (endTime < startTime) {
|
|
254
|
+
endTime = startTime;
|
|
255
|
+
}
|
|
256
|
+
result.push({
|
|
257
|
+
...entry,
|
|
258
|
+
startTime,
|
|
259
|
+
endTime
|
|
260
|
+
});
|
|
261
|
+
lastEndTime = endTime;
|
|
262
|
+
}
|
|
263
|
+
return result;
|
|
264
|
+
}
|
|
265
|
+
function scoreTimeline(timeline) {
|
|
266
|
+
if (timeline.length === 0) {
|
|
267
|
+
return {
|
|
268
|
+
totalWords: 0,
|
|
269
|
+
maxWordDuration: 0,
|
|
270
|
+
averageWordDuration: 0,
|
|
271
|
+
medianWordDuration: 0,
|
|
272
|
+
suspiciousTokenCount: 0,
|
|
273
|
+
suspiciousTokens: [],
|
|
274
|
+
totalDuration: 0
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
const durations = timeline.map((entry) => entry.endTime - entry.startTime);
|
|
278
|
+
const sortedDurations = [...durations].sort((a, b) => a - b);
|
|
279
|
+
const suspiciousTokens = [];
|
|
280
|
+
for (const entry of timeline) {
|
|
281
|
+
const duration = entry.endTime - entry.startTime;
|
|
282
|
+
const confidence = entry.confidence ?? 1;
|
|
283
|
+
if (duration > 3 && confidence < LOW_CONFIDENCE_THRESHOLD) {
|
|
284
|
+
suspiciousTokens.push({
|
|
285
|
+
text: entry.text,
|
|
286
|
+
duration,
|
|
287
|
+
confidence,
|
|
288
|
+
startTime: entry.startTime
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
const lastEntry = timeline[timeline.length - 1];
|
|
293
|
+
const totalDuration = lastEntry ? lastEntry.endTime : 0;
|
|
294
|
+
const sum = durations.reduce((acc, d) => acc + d, 0);
|
|
295
|
+
const medianIndex = Math.floor(sortedDurations.length / 2);
|
|
296
|
+
return {
|
|
297
|
+
totalWords: timeline.length,
|
|
298
|
+
maxWordDuration: Math.max(...durations),
|
|
299
|
+
averageWordDuration: sum / durations.length,
|
|
300
|
+
medianWordDuration: sortedDurations[medianIndex] ?? 0,
|
|
301
|
+
suspiciousTokenCount: suspiciousTokens.length,
|
|
302
|
+
suspiciousTokens,
|
|
303
|
+
totalDuration
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
function compareTimelines(baseline, test) {
|
|
307
|
+
const baselineText = baseline.map((e) => e.text).join(" ").toLowerCase();
|
|
308
|
+
const testText = test.map((e) => e.text).join(" ").toLowerCase();
|
|
309
|
+
const baselineWords = new Set(baselineText.split(/\s+/));
|
|
310
|
+
const testWords = new Set(testText.split(/\s+/));
|
|
311
|
+
const intersection = [...baselineWords].filter((w) => testWords.has(w));
|
|
312
|
+
const union = /* @__PURE__ */ new Set([...baselineWords, ...testWords]);
|
|
313
|
+
const textSimilarity = intersection.length / union.size;
|
|
314
|
+
const baselineMetrics = scoreTimeline(baseline);
|
|
315
|
+
const testMetrics = scoreTimeline(test);
|
|
316
|
+
const durationDifference = Math.abs(
|
|
317
|
+
baselineMetrics.totalDuration - testMetrics.totalDuration
|
|
318
|
+
);
|
|
319
|
+
const wordCountDifference = Math.abs(
|
|
320
|
+
baselineMetrics.totalWords - testMetrics.totalWords
|
|
321
|
+
);
|
|
322
|
+
const maxDurationDifference = Math.abs(
|
|
323
|
+
baselineMetrics.maxWordDuration - testMetrics.maxWordDuration
|
|
324
|
+
);
|
|
325
|
+
const isAcceptable = textSimilarity > 0.8 && testMetrics.suspiciousTokenCount === 0 && maxDurationDifference < 2;
|
|
326
|
+
return {
|
|
327
|
+
textSimilarity,
|
|
328
|
+
durationDifference,
|
|
329
|
+
wordCountDifference,
|
|
330
|
+
maxDurationDifference,
|
|
331
|
+
isAcceptable
|
|
332
|
+
};
|
|
333
|
+
}
|
|
334
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
335
|
+
0 && (module.exports = {
|
|
336
|
+
calculateEffectiveProcessors,
|
|
337
|
+
calculateWhisperSplits,
|
|
338
|
+
compareTimelines,
|
|
339
|
+
countProcessorBoundaries,
|
|
340
|
+
extractCorrectedTimeline,
|
|
341
|
+
parseWhisperCppOutput,
|
|
342
|
+
parseWhisperServerOutput,
|
|
343
|
+
scoreTimeline
|
|
344
|
+
});
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { Timeline } from './Timeline.cjs';
|
|
2
|
+
|
|
3
|
+
declare function calculateWhisperSplits(durationSeconds: number, numProcessors: number, sampleRate?: number): number[];
|
|
4
|
+
interface RawWhisperSegment {
|
|
5
|
+
text: string;
|
|
6
|
+
segmentStart: number;
|
|
7
|
+
segmentEnd: number;
|
|
8
|
+
words: RawWhisperWord[];
|
|
9
|
+
}
|
|
10
|
+
interface RawWhisperWord {
|
|
11
|
+
text: string;
|
|
12
|
+
start: number;
|
|
13
|
+
end: number;
|
|
14
|
+
confidence: number;
|
|
15
|
+
}
|
|
16
|
+
interface WhisperCppTranscriptionSegment {
|
|
17
|
+
text: string;
|
|
18
|
+
timestamps: {
|
|
19
|
+
from: string;
|
|
20
|
+
to: string;
|
|
21
|
+
};
|
|
22
|
+
offsets: {
|
|
23
|
+
from: number;
|
|
24
|
+
to: number;
|
|
25
|
+
};
|
|
26
|
+
tokens: WhisperCppToken[];
|
|
27
|
+
}
|
|
28
|
+
interface WhisperCppToken {
|
|
29
|
+
text: string;
|
|
30
|
+
timestamps: {
|
|
31
|
+
from: string;
|
|
32
|
+
to: string;
|
|
33
|
+
};
|
|
34
|
+
offsets?: {
|
|
35
|
+
from: number;
|
|
36
|
+
to: number;
|
|
37
|
+
};
|
|
38
|
+
t_dtw: number;
|
|
39
|
+
p: number;
|
|
40
|
+
id: number;
|
|
41
|
+
}
|
|
42
|
+
interface WhisperServerSegment {
|
|
43
|
+
text: string;
|
|
44
|
+
start: number;
|
|
45
|
+
end: number;
|
|
46
|
+
words?: WhisperServerWord[];
|
|
47
|
+
}
|
|
48
|
+
interface WhisperServerWord {
|
|
49
|
+
word: string;
|
|
50
|
+
start: number;
|
|
51
|
+
end: number;
|
|
52
|
+
probability?: number;
|
|
53
|
+
}
|
|
54
|
+
declare function parseWhisperCppOutput(transcription: WhisperCppTranscriptionSegment[]): RawWhisperSegment[];
|
|
55
|
+
declare function parseWhisperServerOutput(segments: WhisperServerSegment[]): RawWhisperSegment[];
|
|
56
|
+
declare function countProcessorBoundaries(segments: RawWhisperSegment[]): number;
|
|
57
|
+
declare function calculateEffectiveProcessors(durationSeconds: number, requestedProcessors: number): number;
|
|
58
|
+
interface TimelineCorrectionOptions {
|
|
59
|
+
splitBoundaries?: number[] | undefined;
|
|
60
|
+
}
|
|
61
|
+
declare function extractCorrectedTimeline(segments: RawWhisperSegment[], options?: TimelineCorrectionOptions): Timeline;
|
|
62
|
+
interface TimelineQualityMetrics {
|
|
63
|
+
totalWords: number;
|
|
64
|
+
maxWordDuration: number;
|
|
65
|
+
averageWordDuration: number;
|
|
66
|
+
medianWordDuration: number;
|
|
67
|
+
suspiciousTokenCount: number;
|
|
68
|
+
suspiciousTokens: Array<{
|
|
69
|
+
text: string;
|
|
70
|
+
duration: number;
|
|
71
|
+
confidence: number;
|
|
72
|
+
startTime: number;
|
|
73
|
+
}>;
|
|
74
|
+
totalDuration: number;
|
|
75
|
+
}
|
|
76
|
+
declare function scoreTimeline(timeline: Timeline): TimelineQualityMetrics;
|
|
77
|
+
interface TimelineComparisonResult {
|
|
78
|
+
textSimilarity: number;
|
|
79
|
+
durationDifference: number;
|
|
80
|
+
wordCountDifference: number;
|
|
81
|
+
maxDurationDifference: number;
|
|
82
|
+
isAcceptable: boolean;
|
|
83
|
+
}
|
|
84
|
+
declare function compareTimelines(baseline: Timeline, test: Timeline): TimelineComparisonResult;
|
|
85
|
+
|
|
86
|
+
export { type RawWhisperSegment, type RawWhisperWord, type TimelineComparisonResult, type TimelineCorrectionOptions, type TimelineQualityMetrics, type WhisperCppToken, type WhisperCppTranscriptionSegment, type WhisperServerSegment, type WhisperServerWord, calculateEffectiveProcessors, calculateWhisperSplits, compareTimelines, countProcessorBoundaries, extractCorrectedTimeline, parseWhisperCppOutput, parseWhisperServerOutput, scoreTimeline };
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { Timeline } from './Timeline.js';
|
|
2
|
+
|
|
3
|
+
declare function calculateWhisperSplits(durationSeconds: number, numProcessors: number, sampleRate?: number): number[];
|
|
4
|
+
interface RawWhisperSegment {
|
|
5
|
+
text: string;
|
|
6
|
+
segmentStart: number;
|
|
7
|
+
segmentEnd: number;
|
|
8
|
+
words: RawWhisperWord[];
|
|
9
|
+
}
|
|
10
|
+
interface RawWhisperWord {
|
|
11
|
+
text: string;
|
|
12
|
+
start: number;
|
|
13
|
+
end: number;
|
|
14
|
+
confidence: number;
|
|
15
|
+
}
|
|
16
|
+
interface WhisperCppTranscriptionSegment {
|
|
17
|
+
text: string;
|
|
18
|
+
timestamps: {
|
|
19
|
+
from: string;
|
|
20
|
+
to: string;
|
|
21
|
+
};
|
|
22
|
+
offsets: {
|
|
23
|
+
from: number;
|
|
24
|
+
to: number;
|
|
25
|
+
};
|
|
26
|
+
tokens: WhisperCppToken[];
|
|
27
|
+
}
|
|
28
|
+
interface WhisperCppToken {
|
|
29
|
+
text: string;
|
|
30
|
+
timestamps: {
|
|
31
|
+
from: string;
|
|
32
|
+
to: string;
|
|
33
|
+
};
|
|
34
|
+
offsets?: {
|
|
35
|
+
from: number;
|
|
36
|
+
to: number;
|
|
37
|
+
};
|
|
38
|
+
t_dtw: number;
|
|
39
|
+
p: number;
|
|
40
|
+
id: number;
|
|
41
|
+
}
|
|
42
|
+
interface WhisperServerSegment {
|
|
43
|
+
text: string;
|
|
44
|
+
start: number;
|
|
45
|
+
end: number;
|
|
46
|
+
words?: WhisperServerWord[];
|
|
47
|
+
}
|
|
48
|
+
interface WhisperServerWord {
|
|
49
|
+
word: string;
|
|
50
|
+
start: number;
|
|
51
|
+
end: number;
|
|
52
|
+
probability?: number;
|
|
53
|
+
}
|
|
54
|
+
declare function parseWhisperCppOutput(transcription: WhisperCppTranscriptionSegment[]): RawWhisperSegment[];
|
|
55
|
+
declare function parseWhisperServerOutput(segments: WhisperServerSegment[]): RawWhisperSegment[];
|
|
56
|
+
declare function countProcessorBoundaries(segments: RawWhisperSegment[]): number;
|
|
57
|
+
declare function calculateEffectiveProcessors(durationSeconds: number, requestedProcessors: number): number;
|
|
58
|
+
interface TimelineCorrectionOptions {
|
|
59
|
+
splitBoundaries?: number[] | undefined;
|
|
60
|
+
}
|
|
61
|
+
declare function extractCorrectedTimeline(segments: RawWhisperSegment[], options?: TimelineCorrectionOptions): Timeline;
|
|
62
|
+
interface TimelineQualityMetrics {
|
|
63
|
+
totalWords: number;
|
|
64
|
+
maxWordDuration: number;
|
|
65
|
+
averageWordDuration: number;
|
|
66
|
+
medianWordDuration: number;
|
|
67
|
+
suspiciousTokenCount: number;
|
|
68
|
+
suspiciousTokens: Array<{
|
|
69
|
+
text: string;
|
|
70
|
+
duration: number;
|
|
71
|
+
confidence: number;
|
|
72
|
+
startTime: number;
|
|
73
|
+
}>;
|
|
74
|
+
totalDuration: number;
|
|
75
|
+
}
|
|
76
|
+
declare function scoreTimeline(timeline: Timeline): TimelineQualityMetrics;
|
|
77
|
+
interface TimelineComparisonResult {
|
|
78
|
+
textSimilarity: number;
|
|
79
|
+
durationDifference: number;
|
|
80
|
+
wordCountDifference: number;
|
|
81
|
+
maxDurationDifference: number;
|
|
82
|
+
isAcceptable: boolean;
|
|
83
|
+
}
|
|
84
|
+
declare function compareTimelines(baseline: Timeline, test: Timeline): TimelineComparisonResult;
|
|
85
|
+
|
|
86
|
+
export { type RawWhisperSegment, type RawWhisperWord, type TimelineComparisonResult, type TimelineCorrectionOptions, type TimelineQualityMetrics, type WhisperCppToken, type WhisperCppTranscriptionSegment, type WhisperServerSegment, type WhisperServerWord, calculateEffectiveProcessors, calculateWhisperSplits, compareTimelines, countProcessorBoundaries, extractCorrectedTimeline, parseWhisperCppOutput, parseWhisperServerOutput, scoreTimeline };
|