@storyteller-platform/ghost-story 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -72,6 +72,9 @@ __export(index_exports, {
72
72
  serviceCapabilities: () => import_audio.serviceCapabilities,
73
73
  setConversionMode: () => import_config2.setConversionMode,
74
74
  setTimingEnabled: () => import_config2.setTimingEnabled,
75
+ spacelessScriptPattern: () => import_SpacelessScripts.spacelessScriptPattern,
76
+ spacelessScripts: () => import_SpacelessScripts.spacelessScripts,
77
+ startsWithSpacelessScript: () => import_SpacelessScripts.startsWithSpacelessScript,
75
78
  toBuffer: () => import_audio.toBuffer,
76
79
  toFilePath: () => import_audio.toFilePath,
77
80
  toReadStream: () => import_audio.toReadStream,
@@ -87,6 +90,7 @@ var import_config2 = require("./config.cjs");
87
90
  var import_OpenAICloudSTT = require("./recognition/OpenAICloudSTT.cjs");
88
91
  var import_WhisperCppSTT = require("./recognition/WhisperCppSTT.cjs");
89
92
  var import_WhisperServerSTT = require("./recognition/WhisperServerSTT.cjs");
93
+ var import_SpacelessScripts = require("./utilities/SpacelessScripts.cjs");
90
94
  var import_Timing = require("./utilities/Timing.cjs");
91
95
  var import_Silero = require("./vad/Silero.cjs");
92
96
  // Annotate the CommonJS export names for ESM import in node:
@@ -145,6 +149,9 @@ var import_Silero = require("./vad/Silero.cjs");
145
149
  serviceCapabilities,
146
150
  setConversionMode,
147
151
  setTimingEnabled,
152
+ spacelessScriptPattern,
153
+ spacelessScripts,
154
+ startsWithSpacelessScript,
148
155
  toBuffer,
149
156
  toFilePath,
150
157
  toReadStream,
package/dist/index.d.cts CHANGED
@@ -9,6 +9,7 @@ export { OpenAICloudSTTOptions, RecognitionResult as OpenAIResult, inputPreferen
9
9
  export { WhisperCppModelId, WhisperCppOptions, RecognitionResult as WhisperCppResult, recognize as recognizeWhisperCpp, inputPreference as whisperCppInputPreference } from './recognition/WhisperCppSTT.cjs';
10
10
  export { WhisperServerOptions, RecognitionResult as WhisperServerResult, recognize as recognizeWhisperServer, inputPreference as whisperServerInputPreference } from './recognition/WhisperServerSTT.cjs';
11
11
  export { Timeline, TimelineEntry, TimelineEntryType } from './utilities/Timeline.cjs';
12
+ export { spacelessScriptPattern, spacelessScripts, startsWithSpacelessScript } from './utilities/SpacelessScripts.cjs';
12
13
  export { AggregatedStats, PhaseTiming, Timing, TimingAggregator, TimingSpan, TimingSummary, createAggregator, createTiming, formatDuration, formatPercentage, formatSingleReport, printSingleReport } from './utilities/Timing.cjs';
13
14
  export { SileroOptions, VadSegment, detectVoiceActivity, ensureVadInstalled, segmentsToTimeline } from './vad/Silero.cjs';
14
15
  import 'node:fs';
package/dist/index.d.ts CHANGED
@@ -9,6 +9,7 @@ export { OpenAICloudSTTOptions, RecognitionResult as OpenAIResult, inputPreferen
9
9
  export { WhisperCppModelId, WhisperCppOptions, RecognitionResult as WhisperCppResult, recognize as recognizeWhisperCpp, inputPreference as whisperCppInputPreference } from './recognition/WhisperCppSTT.js';
10
10
  export { WhisperServerOptions, RecognitionResult as WhisperServerResult, recognize as recognizeWhisperServer, inputPreference as whisperServerInputPreference } from './recognition/WhisperServerSTT.js';
11
11
  export { Timeline, TimelineEntry, TimelineEntryType } from './utilities/Timeline.js';
12
+ export { spacelessScriptPattern, spacelessScripts, startsWithSpacelessScript } from './utilities/SpacelessScripts.js';
12
13
  export { AggregatedStats, PhaseTiming, Timing, TimingAggregator, TimingSpan, TimingSummary, createAggregator, createTiming, formatDuration, formatPercentage, formatSingleReport, printSingleReport } from './utilities/Timing.js';
13
14
  export { SileroOptions, VadSegment, detectVoiceActivity, ensureVadInstalled, segmentsToTimeline } from './vad/Silero.js';
14
15
  import 'node:fs';
package/dist/index.js CHANGED
@@ -62,6 +62,11 @@ import {
62
62
  inputPreference as inputPreference3,
63
63
  recognize as recognize4
64
64
  } from "./recognition/WhisperServerSTT.js";
65
+ import {
66
+ spacelessScriptPattern,
67
+ spacelessScripts,
68
+ startsWithSpacelessScript
69
+ } from "./utilities/SpacelessScripts.js";
65
70
  import {
66
71
  Timing,
67
72
  TimingAggregator,
@@ -132,6 +137,9 @@ export {
132
137
  serviceCapabilities,
133
138
  setConversionMode,
134
139
  setTimingEnabled,
140
+ spacelessScriptPattern,
141
+ spacelessScripts,
142
+ startsWithSpacelessScript,
135
143
  toBuffer,
136
144
  toFilePath,
137
145
  toReadStream,
@@ -59,11 +59,49 @@ async function recognize(input, languageCode, timing, options) {
59
59
  if (languageCode) {
60
60
  form.append("language", languageCode);
61
61
  }
62
- const url = `${opts.baseURL}${opts.inferencePath}`;
62
+ const baseUrl = opts.baseURL.replace(/\/+$/g, "");
63
+ const url = `${baseUrl}${opts.inferencePath}`;
63
64
  const headers = {};
64
65
  if (opts.apiKey) {
65
66
  headers["Authorization"] = `Bearer ${opts.apiKey}`;
66
67
  }
68
+ const configResponse = await fetch(`${baseUrl}/config`, {
69
+ headers,
70
+ dispatcher: (0, import_fetch.createTimeoutAgent)(opts.timeout)
71
+ });
72
+ let whisperConfig = null;
73
+ if (configResponse.ok) {
74
+ try {
75
+ const [config, audioLength] = await Promise.all([
76
+ configResponse.json(),
77
+ (0, import_audio.getAudioDuration)(filePath)
78
+ ]);
79
+ whisperConfig = {
80
+ ...config,
81
+ audioDuration: audioLength
82
+ };
83
+ const effectiveProcessors = (0, import_WhisperTimeline.calculateEffectiveProcessors)(
84
+ audioLength,
85
+ whisperConfig.maxProcessors
86
+ );
87
+ if (effectiveProcessors !== whisperConfig.processors) {
88
+ const configForm = new FormData();
89
+ configForm.append("processors", String(effectiveProcessors));
90
+ configForm.append("threads", String(whisperConfig.threads));
91
+ await fetch(`${baseUrl}/config`, {
92
+ method: "POST",
93
+ headers,
94
+ body: configForm
95
+ });
96
+ whisperConfig.processors = effectiveProcessors;
97
+ }
98
+ } catch (e) {
99
+ console.warn(
100
+ `Failed to get config from Whisper server, continuing with default config. If you aren't using ghost-story server, this is expected`,
101
+ e
102
+ );
103
+ }
104
+ }
67
105
  const response = await timing.timeAsync(
68
106
  "upload",
69
107
  async () => fetch(url, {
@@ -81,7 +119,8 @@ async function recognize(input, languageCode, timing, options) {
81
119
  const data = await response.json();
82
120
  const { timeline, transcript } = await extractTimelineAndTranscript(
83
121
  data,
84
- filePath
122
+ filePath,
123
+ whisperConfig
85
124
  );
86
125
  if (!timeline) {
87
126
  throw new Error(
@@ -93,7 +132,7 @@ async function recognize(input, languageCode, timing, options) {
93
132
  await prepared.cleanup();
94
133
  }
95
134
  }
96
- async function extractTimelineAndTranscript(response, audioPath) {
135
+ async function extractTimelineAndTranscript(response, audioPath, whisperConfig) {
97
136
  var _a, _b, _c, _d;
98
137
  if (response.segments.length === 0) {
99
138
  return { timeline: [], transcript: ((_a = response.text) == null ? void 0 : _a.trim()) ?? "" };
@@ -101,7 +140,10 @@ async function extractTimelineAndTranscript(response, audioPath) {
101
140
  const hasNestedWords = (((_c = (_b = response.segments[0]) == null ? void 0 : _b.words) == null ? void 0 : _c.length) ?? 0) > 0;
102
141
  if (hasNestedWords) {
103
142
  const rawSegments = (0, import_WhisperTimeline.parseWhisperServerOutput)(response.segments);
104
- const splitBoundaries = await detectSplitBoundaries(rawSegments, audioPath);
143
+ const splitBoundaries = (whisperConfig == null ? void 0 : whisperConfig.audioDuration) ? (0, import_WhisperTimeline.calculateWhisperSplits)(
144
+ whisperConfig.audioDuration,
145
+ whisperConfig.processors
146
+ ) : await detectSplitBoundaries(rawSegments, audioPath);
105
147
  const timeline2 = (0, import_WhisperTimeline.extractCorrectedTimeline)(rawSegments, { splitBoundaries });
106
148
  const transcript = timeline2.map((entry) => entry.text).join(" ");
107
149
  return { timeline: timeline2, transcript };
@@ -9,6 +9,7 @@ import {
9
9
  } from "../audio/index.js";
10
10
  import { createTimeoutAgent } from "../fetch.js";
11
11
  import {
12
+ calculateEffectiveProcessors,
12
13
  calculateWhisperSplits,
13
14
  countProcessorBoundaries,
14
15
  extractCorrectedTimeline,
@@ -46,11 +47,49 @@ async function recognize(input, languageCode, timing, options) {
46
47
  if (languageCode) {
47
48
  form.append("language", languageCode);
48
49
  }
49
- const url = `${opts.baseURL}${opts.inferencePath}`;
50
+ const baseUrl = opts.baseURL.replace(/\/+$/g, "");
51
+ const url = `${baseUrl}${opts.inferencePath}`;
50
52
  const headers = {};
51
53
  if (opts.apiKey) {
52
54
  headers["Authorization"] = `Bearer ${opts.apiKey}`;
53
55
  }
56
+ const configResponse = await fetch(`${baseUrl}/config`, {
57
+ headers,
58
+ dispatcher: createTimeoutAgent(opts.timeout)
59
+ });
60
+ let whisperConfig = null;
61
+ if (configResponse.ok) {
62
+ try {
63
+ const [config, audioLength] = await Promise.all([
64
+ configResponse.json(),
65
+ getAudioDuration(filePath)
66
+ ]);
67
+ whisperConfig = {
68
+ ...config,
69
+ audioDuration: audioLength
70
+ };
71
+ const effectiveProcessors = calculateEffectiveProcessors(
72
+ audioLength,
73
+ whisperConfig.maxProcessors
74
+ );
75
+ if (effectiveProcessors !== whisperConfig.processors) {
76
+ const configForm = new FormData();
77
+ configForm.append("processors", String(effectiveProcessors));
78
+ configForm.append("threads", String(whisperConfig.threads));
79
+ await fetch(`${baseUrl}/config`, {
80
+ method: "POST",
81
+ headers,
82
+ body: configForm
83
+ });
84
+ whisperConfig.processors = effectiveProcessors;
85
+ }
86
+ } catch (e) {
87
+ console.warn(
88
+ `Failed to get config from Whisper server, continuing with default config. If you aren't using ghost-story server, this is expected`,
89
+ e
90
+ );
91
+ }
92
+ }
54
93
  const response = await timing.timeAsync(
55
94
  "upload",
56
95
  async () => fetch(url, {
@@ -68,7 +107,8 @@ async function recognize(input, languageCode, timing, options) {
68
107
  const data = await response.json();
69
108
  const { timeline, transcript } = await extractTimelineAndTranscript(
70
109
  data,
71
- filePath
110
+ filePath,
111
+ whisperConfig
72
112
  );
73
113
  if (!timeline) {
74
114
  throw new Error(
@@ -80,7 +120,7 @@ async function recognize(input, languageCode, timing, options) {
80
120
  await prepared.cleanup();
81
121
  }
82
122
  }
83
- async function extractTimelineAndTranscript(response, audioPath) {
123
+ async function extractTimelineAndTranscript(response, audioPath, whisperConfig) {
84
124
  var _a, _b, _c, _d;
85
125
  if (response.segments.length === 0) {
86
126
  return { timeline: [], transcript: ((_a = response.text) == null ? void 0 : _a.trim()) ?? "" };
@@ -88,7 +128,10 @@ async function extractTimelineAndTranscript(response, audioPath) {
88
128
  const hasNestedWords = (((_c = (_b = response.segments[0]) == null ? void 0 : _b.words) == null ? void 0 : _c.length) ?? 0) > 0;
89
129
  if (hasNestedWords) {
90
130
  const rawSegments = parseWhisperServerOutput(response.segments);
91
- const splitBoundaries = await detectSplitBoundaries(rawSegments, audioPath);
131
+ const splitBoundaries = (whisperConfig == null ? void 0 : whisperConfig.audioDuration) ? calculateWhisperSplits(
132
+ whisperConfig.audioDuration,
133
+ whisperConfig.processors
134
+ ) : await detectSplitBoundaries(rawSegments, audioPath);
92
135
  const timeline2 = extractCorrectedTimeline(rawSegments, { splitBoundaries });
93
136
  const transcript = timeline2.map((entry) => entry.text).join(" ");
94
137
  return { timeline: timeline2, transcript };
@@ -302,14 +302,28 @@ function extractCorrectedTimeline(segments, options = {}) {
302
302
  if (!segment) continue;
303
303
  const segmentStart = segment.segmentStart;
304
304
  const segmentEnd = segment.segmentEnd < segment.segmentStart ? segment.segmentStart : segment.segmentEnd;
305
- const boundary = detectProcessorBoundary(segment, state);
306
- if (boundary.isBoundary) {
307
- state.cumulativeOffset = getBetterCumulativeOffset(
308
- state,
309
- segment,
310
- splitBoundaries,
311
- usedSplits
312
- );
305
+ if (splitBoundaries.length > 0) {
306
+ const boundary = splitBoundaries.find((boundary2) => {
307
+ return Math.abs(boundary2 - segmentStart) < 2;
308
+ });
309
+ if (boundary) {
310
+ state.cumulativeOffset = getBetterCumulativeOffset(
311
+ state,
312
+ segment,
313
+ splitBoundaries,
314
+ usedSplits
315
+ );
316
+ }
317
+ } else {
318
+ const boundary = detectProcessorBoundary(segment, state);
319
+ if (boundary.isBoundary) {
320
+ state.cumulativeOffset = getBetterCumulativeOffset(
321
+ state,
322
+ segment,
323
+ splitBoundaries,
324
+ usedSplits
325
+ );
326
+ }
313
327
  }
314
328
  const nextSegment = segments[i + 1] ?? null;
315
329
  if (nextSegment && isTimeTravelingSegment(nextSegment)) {
@@ -272,14 +272,28 @@ function extractCorrectedTimeline(segments, options = {}) {
272
272
  if (!segment) continue;
273
273
  const segmentStart = segment.segmentStart;
274
274
  const segmentEnd = segment.segmentEnd < segment.segmentStart ? segment.segmentStart : segment.segmentEnd;
275
- const boundary = detectProcessorBoundary(segment, state);
276
- if (boundary.isBoundary) {
277
- state.cumulativeOffset = getBetterCumulativeOffset(
278
- state,
279
- segment,
280
- splitBoundaries,
281
- usedSplits
282
- );
275
+ if (splitBoundaries.length > 0) {
276
+ const boundary = splitBoundaries.find((boundary2) => {
277
+ return Math.abs(boundary2 - segmentStart) < 2;
278
+ });
279
+ if (boundary) {
280
+ state.cumulativeOffset = getBetterCumulativeOffset(
281
+ state,
282
+ segment,
283
+ splitBoundaries,
284
+ usedSplits
285
+ );
286
+ }
287
+ } else {
288
+ const boundary = detectProcessorBoundary(segment, state);
289
+ if (boundary.isBoundary) {
290
+ state.cumulativeOffset = getBetterCumulativeOffset(
291
+ state,
292
+ segment,
293
+ splitBoundaries,
294
+ usedSplits
295
+ );
296
+ }
283
297
  }
284
298
  const nextSegment = segments[i + 1] ?? null;
285
299
  if (nextSegment && isTimeTravelingSegment(nextSegment)) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@storyteller-platform/ghost-story",
3
- "version": "0.1.10",
3
+ "version": "0.1.11",
4
4
  "description": "An easy-to-use speech toolset. Fork of the original echogarden project.",
5
5
  "author": "Thomas F. K. Jorna",
6
6
  "license": "GPL-3.0",