@storyteller-platform/ghost-story 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +7 -0
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +8 -0
- package/dist/recognition/WhisperServerSTT.cjs +46 -4
- package/dist/recognition/WhisperServerSTT.js +47 -4
- package/dist/utilities/WhisperTimeline.cjs +22 -8
- package/dist/utilities/WhisperTimeline.js +22 -8
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -72,6 +72,9 @@ __export(index_exports, {
|
|
|
72
72
|
serviceCapabilities: () => import_audio.serviceCapabilities,
|
|
73
73
|
setConversionMode: () => import_config2.setConversionMode,
|
|
74
74
|
setTimingEnabled: () => import_config2.setTimingEnabled,
|
|
75
|
+
spacelessScriptPattern: () => import_SpacelessScripts.spacelessScriptPattern,
|
|
76
|
+
spacelessScripts: () => import_SpacelessScripts.spacelessScripts,
|
|
77
|
+
startsWithSpacelessScript: () => import_SpacelessScripts.startsWithSpacelessScript,
|
|
75
78
|
toBuffer: () => import_audio.toBuffer,
|
|
76
79
|
toFilePath: () => import_audio.toFilePath,
|
|
77
80
|
toReadStream: () => import_audio.toReadStream,
|
|
@@ -87,6 +90,7 @@ var import_config2 = require("./config.cjs");
|
|
|
87
90
|
var import_OpenAICloudSTT = require("./recognition/OpenAICloudSTT.cjs");
|
|
88
91
|
var import_WhisperCppSTT = require("./recognition/WhisperCppSTT.cjs");
|
|
89
92
|
var import_WhisperServerSTT = require("./recognition/WhisperServerSTT.cjs");
|
|
93
|
+
var import_SpacelessScripts = require("./utilities/SpacelessScripts.cjs");
|
|
90
94
|
var import_Timing = require("./utilities/Timing.cjs");
|
|
91
95
|
var import_Silero = require("./vad/Silero.cjs");
|
|
92
96
|
// Annotate the CommonJS export names for ESM import in node:
|
|
@@ -145,6 +149,9 @@ var import_Silero = require("./vad/Silero.cjs");
|
|
|
145
149
|
serviceCapabilities,
|
|
146
150
|
setConversionMode,
|
|
147
151
|
setTimingEnabled,
|
|
152
|
+
spacelessScriptPattern,
|
|
153
|
+
spacelessScripts,
|
|
154
|
+
startsWithSpacelessScript,
|
|
148
155
|
toBuffer,
|
|
149
156
|
toFilePath,
|
|
150
157
|
toReadStream,
|
package/dist/index.d.cts
CHANGED
|
@@ -9,6 +9,7 @@ export { OpenAICloudSTTOptions, RecognitionResult as OpenAIResult, inputPreferen
|
|
|
9
9
|
export { WhisperCppModelId, WhisperCppOptions, RecognitionResult as WhisperCppResult, recognize as recognizeWhisperCpp, inputPreference as whisperCppInputPreference } from './recognition/WhisperCppSTT.cjs';
|
|
10
10
|
export { WhisperServerOptions, RecognitionResult as WhisperServerResult, recognize as recognizeWhisperServer, inputPreference as whisperServerInputPreference } from './recognition/WhisperServerSTT.cjs';
|
|
11
11
|
export { Timeline, TimelineEntry, TimelineEntryType } from './utilities/Timeline.cjs';
|
|
12
|
+
export { spacelessScriptPattern, spacelessScripts, startsWithSpacelessScript } from './utilities/SpacelessScripts.cjs';
|
|
12
13
|
export { AggregatedStats, PhaseTiming, Timing, TimingAggregator, TimingSpan, TimingSummary, createAggregator, createTiming, formatDuration, formatPercentage, formatSingleReport, printSingleReport } from './utilities/Timing.cjs';
|
|
13
14
|
export { SileroOptions, VadSegment, detectVoiceActivity, ensureVadInstalled, segmentsToTimeline } from './vad/Silero.cjs';
|
|
14
15
|
import 'node:fs';
|
package/dist/index.d.ts
CHANGED
|
@@ -9,6 +9,7 @@ export { OpenAICloudSTTOptions, RecognitionResult as OpenAIResult, inputPreferen
|
|
|
9
9
|
export { WhisperCppModelId, WhisperCppOptions, RecognitionResult as WhisperCppResult, recognize as recognizeWhisperCpp, inputPreference as whisperCppInputPreference } from './recognition/WhisperCppSTT.js';
|
|
10
10
|
export { WhisperServerOptions, RecognitionResult as WhisperServerResult, recognize as recognizeWhisperServer, inputPreference as whisperServerInputPreference } from './recognition/WhisperServerSTT.js';
|
|
11
11
|
export { Timeline, TimelineEntry, TimelineEntryType } from './utilities/Timeline.js';
|
|
12
|
+
export { spacelessScriptPattern, spacelessScripts, startsWithSpacelessScript } from './utilities/SpacelessScripts.js';
|
|
12
13
|
export { AggregatedStats, PhaseTiming, Timing, TimingAggregator, TimingSpan, TimingSummary, createAggregator, createTiming, formatDuration, formatPercentage, formatSingleReport, printSingleReport } from './utilities/Timing.js';
|
|
13
14
|
export { SileroOptions, VadSegment, detectVoiceActivity, ensureVadInstalled, segmentsToTimeline } from './vad/Silero.js';
|
|
14
15
|
import 'node:fs';
|
package/dist/index.js
CHANGED
|
@@ -62,6 +62,11 @@ import {
|
|
|
62
62
|
inputPreference as inputPreference3,
|
|
63
63
|
recognize as recognize4
|
|
64
64
|
} from "./recognition/WhisperServerSTT.js";
|
|
65
|
+
import {
|
|
66
|
+
spacelessScriptPattern,
|
|
67
|
+
spacelessScripts,
|
|
68
|
+
startsWithSpacelessScript
|
|
69
|
+
} from "./utilities/SpacelessScripts.js";
|
|
65
70
|
import {
|
|
66
71
|
Timing,
|
|
67
72
|
TimingAggregator,
|
|
@@ -132,6 +137,9 @@ export {
|
|
|
132
137
|
serviceCapabilities,
|
|
133
138
|
setConversionMode,
|
|
134
139
|
setTimingEnabled,
|
|
140
|
+
spacelessScriptPattern,
|
|
141
|
+
spacelessScripts,
|
|
142
|
+
startsWithSpacelessScript,
|
|
135
143
|
toBuffer,
|
|
136
144
|
toFilePath,
|
|
137
145
|
toReadStream,
|
|
@@ -59,11 +59,49 @@ async function recognize(input, languageCode, timing, options) {
|
|
|
59
59
|
if (languageCode) {
|
|
60
60
|
form.append("language", languageCode);
|
|
61
61
|
}
|
|
62
|
-
const
|
|
62
|
+
const baseUrl = opts.baseURL.replace(/\/+$/g, "");
|
|
63
|
+
const url = `${baseUrl}${opts.inferencePath}`;
|
|
63
64
|
const headers = {};
|
|
64
65
|
if (opts.apiKey) {
|
|
65
66
|
headers["Authorization"] = `Bearer ${opts.apiKey}`;
|
|
66
67
|
}
|
|
68
|
+
const configResponse = await fetch(`${baseUrl}/config`, {
|
|
69
|
+
headers,
|
|
70
|
+
dispatcher: (0, import_fetch.createTimeoutAgent)(opts.timeout)
|
|
71
|
+
});
|
|
72
|
+
let whisperConfig = null;
|
|
73
|
+
if (configResponse.ok) {
|
|
74
|
+
try {
|
|
75
|
+
const [config, audioLength] = await Promise.all([
|
|
76
|
+
configResponse.json(),
|
|
77
|
+
(0, import_audio.getAudioDuration)(filePath)
|
|
78
|
+
]);
|
|
79
|
+
whisperConfig = {
|
|
80
|
+
...config,
|
|
81
|
+
audioDuration: audioLength
|
|
82
|
+
};
|
|
83
|
+
const effectiveProcessors = (0, import_WhisperTimeline.calculateEffectiveProcessors)(
|
|
84
|
+
audioLength,
|
|
85
|
+
whisperConfig.maxProcessors
|
|
86
|
+
);
|
|
87
|
+
if (effectiveProcessors !== whisperConfig.processors) {
|
|
88
|
+
const configForm = new FormData();
|
|
89
|
+
configForm.append("processors", String(effectiveProcessors));
|
|
90
|
+
configForm.append("threads", String(whisperConfig.threads));
|
|
91
|
+
await fetch(`${baseUrl}/config`, {
|
|
92
|
+
method: "POST",
|
|
93
|
+
headers,
|
|
94
|
+
body: configForm
|
|
95
|
+
});
|
|
96
|
+
whisperConfig.processors = effectiveProcessors;
|
|
97
|
+
}
|
|
98
|
+
} catch (e) {
|
|
99
|
+
console.warn(
|
|
100
|
+
`Failed to get config from Whisper server, continuing with default config. If you aren't using ghost-story server, this is expected`,
|
|
101
|
+
e
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
67
105
|
const response = await timing.timeAsync(
|
|
68
106
|
"upload",
|
|
69
107
|
async () => fetch(url, {
|
|
@@ -81,7 +119,8 @@ async function recognize(input, languageCode, timing, options) {
|
|
|
81
119
|
const data = await response.json();
|
|
82
120
|
const { timeline, transcript } = await extractTimelineAndTranscript(
|
|
83
121
|
data,
|
|
84
|
-
filePath
|
|
122
|
+
filePath,
|
|
123
|
+
whisperConfig
|
|
85
124
|
);
|
|
86
125
|
if (!timeline) {
|
|
87
126
|
throw new Error(
|
|
@@ -93,7 +132,7 @@ async function recognize(input, languageCode, timing, options) {
|
|
|
93
132
|
await prepared.cleanup();
|
|
94
133
|
}
|
|
95
134
|
}
|
|
96
|
-
async function extractTimelineAndTranscript(response, audioPath) {
|
|
135
|
+
async function extractTimelineAndTranscript(response, audioPath, whisperConfig) {
|
|
97
136
|
var _a, _b, _c, _d;
|
|
98
137
|
if (response.segments.length === 0) {
|
|
99
138
|
return { timeline: [], transcript: ((_a = response.text) == null ? void 0 : _a.trim()) ?? "" };
|
|
@@ -101,7 +140,10 @@ async function extractTimelineAndTranscript(response, audioPath) {
|
|
|
101
140
|
const hasNestedWords = (((_c = (_b = response.segments[0]) == null ? void 0 : _b.words) == null ? void 0 : _c.length) ?? 0) > 0;
|
|
102
141
|
if (hasNestedWords) {
|
|
103
142
|
const rawSegments = (0, import_WhisperTimeline.parseWhisperServerOutput)(response.segments);
|
|
104
|
-
const splitBoundaries =
|
|
143
|
+
const splitBoundaries = (whisperConfig == null ? void 0 : whisperConfig.audioDuration) ? (0, import_WhisperTimeline.calculateWhisperSplits)(
|
|
144
|
+
whisperConfig.audioDuration,
|
|
145
|
+
whisperConfig.processors
|
|
146
|
+
) : await detectSplitBoundaries(rawSegments, audioPath);
|
|
105
147
|
const timeline2 = (0, import_WhisperTimeline.extractCorrectedTimeline)(rawSegments, { splitBoundaries });
|
|
106
148
|
const transcript = timeline2.map((entry) => entry.text).join(" ");
|
|
107
149
|
return { timeline: timeline2, transcript };
|
|
@@ -9,6 +9,7 @@ import {
|
|
|
9
9
|
} from "../audio/index.js";
|
|
10
10
|
import { createTimeoutAgent } from "../fetch.js";
|
|
11
11
|
import {
|
|
12
|
+
calculateEffectiveProcessors,
|
|
12
13
|
calculateWhisperSplits,
|
|
13
14
|
countProcessorBoundaries,
|
|
14
15
|
extractCorrectedTimeline,
|
|
@@ -46,11 +47,49 @@ async function recognize(input, languageCode, timing, options) {
|
|
|
46
47
|
if (languageCode) {
|
|
47
48
|
form.append("language", languageCode);
|
|
48
49
|
}
|
|
49
|
-
const
|
|
50
|
+
const baseUrl = opts.baseURL.replace(/\/+$/g, "");
|
|
51
|
+
const url = `${baseUrl}${opts.inferencePath}`;
|
|
50
52
|
const headers = {};
|
|
51
53
|
if (opts.apiKey) {
|
|
52
54
|
headers["Authorization"] = `Bearer ${opts.apiKey}`;
|
|
53
55
|
}
|
|
56
|
+
const configResponse = await fetch(`${baseUrl}/config`, {
|
|
57
|
+
headers,
|
|
58
|
+
dispatcher: createTimeoutAgent(opts.timeout)
|
|
59
|
+
});
|
|
60
|
+
let whisperConfig = null;
|
|
61
|
+
if (configResponse.ok) {
|
|
62
|
+
try {
|
|
63
|
+
const [config, audioLength] = await Promise.all([
|
|
64
|
+
configResponse.json(),
|
|
65
|
+
getAudioDuration(filePath)
|
|
66
|
+
]);
|
|
67
|
+
whisperConfig = {
|
|
68
|
+
...config,
|
|
69
|
+
audioDuration: audioLength
|
|
70
|
+
};
|
|
71
|
+
const effectiveProcessors = calculateEffectiveProcessors(
|
|
72
|
+
audioLength,
|
|
73
|
+
whisperConfig.maxProcessors
|
|
74
|
+
);
|
|
75
|
+
if (effectiveProcessors !== whisperConfig.processors) {
|
|
76
|
+
const configForm = new FormData();
|
|
77
|
+
configForm.append("processors", String(effectiveProcessors));
|
|
78
|
+
configForm.append("threads", String(whisperConfig.threads));
|
|
79
|
+
await fetch(`${baseUrl}/config`, {
|
|
80
|
+
method: "POST",
|
|
81
|
+
headers,
|
|
82
|
+
body: configForm
|
|
83
|
+
});
|
|
84
|
+
whisperConfig.processors = effectiveProcessors;
|
|
85
|
+
}
|
|
86
|
+
} catch (e) {
|
|
87
|
+
console.warn(
|
|
88
|
+
`Failed to get config from Whisper server, continuing with default config. If you aren't using ghost-story server, this is expected`,
|
|
89
|
+
e
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
54
93
|
const response = await timing.timeAsync(
|
|
55
94
|
"upload",
|
|
56
95
|
async () => fetch(url, {
|
|
@@ -68,7 +107,8 @@ async function recognize(input, languageCode, timing, options) {
|
|
|
68
107
|
const data = await response.json();
|
|
69
108
|
const { timeline, transcript } = await extractTimelineAndTranscript(
|
|
70
109
|
data,
|
|
71
|
-
filePath
|
|
110
|
+
filePath,
|
|
111
|
+
whisperConfig
|
|
72
112
|
);
|
|
73
113
|
if (!timeline) {
|
|
74
114
|
throw new Error(
|
|
@@ -80,7 +120,7 @@ async function recognize(input, languageCode, timing, options) {
|
|
|
80
120
|
await prepared.cleanup();
|
|
81
121
|
}
|
|
82
122
|
}
|
|
83
|
-
async function extractTimelineAndTranscript(response, audioPath) {
|
|
123
|
+
async function extractTimelineAndTranscript(response, audioPath, whisperConfig) {
|
|
84
124
|
var _a, _b, _c, _d;
|
|
85
125
|
if (response.segments.length === 0) {
|
|
86
126
|
return { timeline: [], transcript: ((_a = response.text) == null ? void 0 : _a.trim()) ?? "" };
|
|
@@ -88,7 +128,10 @@ async function extractTimelineAndTranscript(response, audioPath) {
|
|
|
88
128
|
const hasNestedWords = (((_c = (_b = response.segments[0]) == null ? void 0 : _b.words) == null ? void 0 : _c.length) ?? 0) > 0;
|
|
89
129
|
if (hasNestedWords) {
|
|
90
130
|
const rawSegments = parseWhisperServerOutput(response.segments);
|
|
91
|
-
const splitBoundaries =
|
|
131
|
+
const splitBoundaries = (whisperConfig == null ? void 0 : whisperConfig.audioDuration) ? calculateWhisperSplits(
|
|
132
|
+
whisperConfig.audioDuration,
|
|
133
|
+
whisperConfig.processors
|
|
134
|
+
) : await detectSplitBoundaries(rawSegments, audioPath);
|
|
92
135
|
const timeline2 = extractCorrectedTimeline(rawSegments, { splitBoundaries });
|
|
93
136
|
const transcript = timeline2.map((entry) => entry.text).join(" ");
|
|
94
137
|
return { timeline: timeline2, transcript };
|
|
@@ -302,14 +302,28 @@ function extractCorrectedTimeline(segments, options = {}) {
|
|
|
302
302
|
if (!segment) continue;
|
|
303
303
|
const segmentStart = segment.segmentStart;
|
|
304
304
|
const segmentEnd = segment.segmentEnd < segment.segmentStart ? segment.segmentStart : segment.segmentEnd;
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
305
|
+
if (splitBoundaries.length > 0) {
|
|
306
|
+
const boundary = splitBoundaries.find((boundary2) => {
|
|
307
|
+
return Math.abs(boundary2 - segmentStart) < 2;
|
|
308
|
+
});
|
|
309
|
+
if (boundary) {
|
|
310
|
+
state.cumulativeOffset = getBetterCumulativeOffset(
|
|
311
|
+
state,
|
|
312
|
+
segment,
|
|
313
|
+
splitBoundaries,
|
|
314
|
+
usedSplits
|
|
315
|
+
);
|
|
316
|
+
}
|
|
317
|
+
} else {
|
|
318
|
+
const boundary = detectProcessorBoundary(segment, state);
|
|
319
|
+
if (boundary.isBoundary) {
|
|
320
|
+
state.cumulativeOffset = getBetterCumulativeOffset(
|
|
321
|
+
state,
|
|
322
|
+
segment,
|
|
323
|
+
splitBoundaries,
|
|
324
|
+
usedSplits
|
|
325
|
+
);
|
|
326
|
+
}
|
|
313
327
|
}
|
|
314
328
|
const nextSegment = segments[i + 1] ?? null;
|
|
315
329
|
if (nextSegment && isTimeTravelingSegment(nextSegment)) {
|
|
@@ -272,14 +272,28 @@ function extractCorrectedTimeline(segments, options = {}) {
|
|
|
272
272
|
if (!segment) continue;
|
|
273
273
|
const segmentStart = segment.segmentStart;
|
|
274
274
|
const segmentEnd = segment.segmentEnd < segment.segmentStart ? segment.segmentStart : segment.segmentEnd;
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
275
|
+
if (splitBoundaries.length > 0) {
|
|
276
|
+
const boundary = splitBoundaries.find((boundary2) => {
|
|
277
|
+
return Math.abs(boundary2 - segmentStart) < 2;
|
|
278
|
+
});
|
|
279
|
+
if (boundary) {
|
|
280
|
+
state.cumulativeOffset = getBetterCumulativeOffset(
|
|
281
|
+
state,
|
|
282
|
+
segment,
|
|
283
|
+
splitBoundaries,
|
|
284
|
+
usedSplits
|
|
285
|
+
);
|
|
286
|
+
}
|
|
287
|
+
} else {
|
|
288
|
+
const boundary = detectProcessorBoundary(segment, state);
|
|
289
|
+
if (boundary.isBoundary) {
|
|
290
|
+
state.cumulativeOffset = getBetterCumulativeOffset(
|
|
291
|
+
state,
|
|
292
|
+
segment,
|
|
293
|
+
splitBoundaries,
|
|
294
|
+
usedSplits
|
|
295
|
+
);
|
|
296
|
+
}
|
|
283
297
|
}
|
|
284
298
|
const nextSegment = segments[i + 1] ?? null;
|
|
285
299
|
if (nextSegment && isTimeTravelingSegment(nextSegment)) {
|
package/package.json
CHANGED