@storyteller-platform/ghost-story 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/LICENSE.md +611 -0
  2. package/README.md +18 -0
  3. package/dist/api/APIOptions.cjs +16 -0
  4. package/dist/api/APIOptions.d.cts +18 -0
  5. package/dist/api/APIOptions.d.ts +18 -0
  6. package/dist/api/APIOptions.js +0 -0
  7. package/dist/api/Recognition.cjs +263 -0
  8. package/dist/api/Recognition.d.cts +77 -0
  9. package/dist/api/Recognition.d.ts +77 -0
  10. package/dist/api/Recognition.js +233 -0
  11. package/dist/api/VoiceActivityDetection.cjs +77 -0
  12. package/dist/api/VoiceActivityDetection.d.cts +24 -0
  13. package/dist/api/VoiceActivityDetection.d.ts +24 -0
  14. package/dist/api/VoiceActivityDetection.js +43 -0
  15. package/dist/audio/AudioConverter.cjs +331 -0
  16. package/dist/audio/AudioConverter.d.cts +53 -0
  17. package/dist/audio/AudioConverter.d.ts +53 -0
  18. package/dist/audio/AudioConverter.js +310 -0
  19. package/dist/audio/AudioFormat.cjs +151 -0
  20. package/dist/audio/AudioFormat.d.cts +25 -0
  21. package/dist/audio/AudioFormat.d.ts +25 -0
  22. package/dist/audio/AudioFormat.js +123 -0
  23. package/dist/audio/AudioSource.cjs +119 -0
  24. package/dist/audio/AudioSource.d.cts +33 -0
  25. package/dist/audio/AudioSource.d.ts +33 -0
  26. package/dist/audio/AudioSource.js +88 -0
  27. package/dist/audio/index.cjs +74 -0
  28. package/dist/audio/index.d.cts +6 -0
  29. package/dist/audio/index.d.ts +6 -0
  30. package/dist/audio/index.js +54 -0
  31. package/dist/cli/bin.cjs +277 -0
  32. package/dist/cli/bin.d.cts +1 -0
  33. package/dist/cli/bin.d.ts +1 -0
  34. package/dist/cli/bin.js +275 -0
  35. package/dist/cli/config.cjs +347 -0
  36. package/dist/cli/config.d.cts +33 -0
  37. package/dist/cli/config.d.ts +33 -0
  38. package/dist/cli/config.js +285 -0
  39. package/dist/cli/install.cjs +334 -0
  40. package/dist/cli/install.d.cts +62 -0
  41. package/dist/cli/install.d.ts +62 -0
  42. package/dist/cli/install.js +316 -0
  43. package/dist/cli/whisper-server.cjs +172 -0
  44. package/dist/cli/whisper-server.d.cts +24 -0
  45. package/dist/cli/whisper-server.d.ts +24 -0
  46. package/dist/cli/whisper-server.js +152 -0
  47. package/dist/config.cjs +60 -0
  48. package/dist/config.d.cts +12 -0
  49. package/dist/config.d.ts +12 -0
  50. package/dist/config.js +32 -0
  51. package/dist/convert.cjs +88 -0
  52. package/dist/convert.d.cts +12 -0
  53. package/dist/convert.d.ts +12 -0
  54. package/dist/convert.js +63 -0
  55. package/dist/encodings/Ascii.cjs +75 -0
  56. package/dist/encodings/Ascii.d.cts +13 -0
  57. package/dist/encodings/Ascii.d.ts +13 -0
  58. package/dist/encodings/Ascii.js +48 -0
  59. package/dist/encodings/Base64.cjs +155 -0
  60. package/dist/encodings/Base64.d.cts +5 -0
  61. package/dist/encodings/Base64.d.ts +5 -0
  62. package/dist/encodings/Base64.js +129 -0
  63. package/dist/encodings/TextEncodingsCommon.cjs +16 -0
  64. package/dist/encodings/TextEncodingsCommon.d.cts +6 -0
  65. package/dist/encodings/TextEncodingsCommon.d.ts +6 -0
  66. package/dist/encodings/TextEncodingsCommon.js +0 -0
  67. package/dist/index.cjs +153 -0
  68. package/dist/index.d.cts +15 -0
  69. package/dist/index.d.ts +15 -0
  70. package/dist/index.js +140 -0
  71. package/dist/recognition/AmazonTranscribeSTT.cjs +188 -0
  72. package/dist/recognition/AmazonTranscribeSTT.d.cts +21 -0
  73. package/dist/recognition/AmazonTranscribeSTT.d.ts +21 -0
  74. package/dist/recognition/AmazonTranscribeSTT.js +160 -0
  75. package/dist/recognition/AzureCognitiveServicesSTT.cjs +124 -0
  76. package/dist/recognition/AzureCognitiveServicesSTT.d.cts +21 -0
  77. package/dist/recognition/AzureCognitiveServicesSTT.d.ts +21 -0
  78. package/dist/recognition/AzureCognitiveServicesSTT.js +95 -0
  79. package/dist/recognition/DeepgramSTT.cjs +172 -0
  80. package/dist/recognition/DeepgramSTT.d.cts +23 -0
  81. package/dist/recognition/DeepgramSTT.d.ts +23 -0
  82. package/dist/recognition/DeepgramSTT.js +153 -0
  83. package/dist/recognition/GoogleCloudSTT.cjs +125 -0
  84. package/dist/recognition/GoogleCloudSTT.d.cts +35 -0
  85. package/dist/recognition/GoogleCloudSTT.d.ts +35 -0
  86. package/dist/recognition/GoogleCloudSTT.js +107 -0
  87. package/dist/recognition/OpenAICloudSTT.cjs +180 -0
  88. package/dist/recognition/OpenAICloudSTT.d.cts +29 -0
  89. package/dist/recognition/OpenAICloudSTT.d.ts +29 -0
  90. package/dist/recognition/OpenAICloudSTT.js +150 -0
  91. package/dist/recognition/WhisperCppSTT.cjs +296 -0
  92. package/dist/recognition/WhisperCppSTT.d.cts +40 -0
  93. package/dist/recognition/WhisperCppSTT.d.ts +40 -0
  94. package/dist/recognition/WhisperCppSTT.js +275 -0
  95. package/dist/recognition/WhisperServerSTT.cjs +119 -0
  96. package/dist/recognition/WhisperServerSTT.d.cts +24 -0
  97. package/dist/recognition/WhisperServerSTT.d.ts +24 -0
  98. package/dist/recognition/WhisperServerSTT.js +105 -0
  99. package/dist/utilities/FileSystem.cjs +54 -0
  100. package/dist/utilities/FileSystem.d.cts +3 -0
  101. package/dist/utilities/FileSystem.d.ts +3 -0
  102. package/dist/utilities/FileSystem.js +20 -0
  103. package/dist/utilities/Locale.cjs +46 -0
  104. package/dist/utilities/Locale.d.cts +9 -0
  105. package/dist/utilities/Locale.d.ts +9 -0
  106. package/dist/utilities/Locale.js +20 -0
  107. package/dist/utilities/ObjectUtilities.cjs +41 -0
  108. package/dist/utilities/ObjectUtilities.d.cts +3 -0
  109. package/dist/utilities/ObjectUtilities.d.ts +3 -0
  110. package/dist/utilities/ObjectUtilities.js +7 -0
  111. package/dist/utilities/Timeline.cjs +120 -0
  112. package/dist/utilities/Timeline.d.cts +23 -0
  113. package/dist/utilities/Timeline.d.ts +23 -0
  114. package/dist/utilities/Timeline.js +94 -0
  115. package/dist/utilities/Timing.cjs +287 -0
  116. package/dist/utilities/Timing.d.cts +64 -0
  117. package/dist/utilities/Timing.d.ts +64 -0
  118. package/dist/utilities/Timing.js +256 -0
  119. package/dist/utilities/WhisperTimeline.cjs +344 -0
  120. package/dist/utilities/WhisperTimeline.d.cts +86 -0
  121. package/dist/utilities/WhisperTimeline.d.ts +86 -0
  122. package/dist/utilities/WhisperTimeline.js +313 -0
  123. package/dist/vad/ActiveGate.cjs +357 -0
  124. package/dist/vad/ActiveGate.d.cts +53 -0
  125. package/dist/vad/ActiveGate.d.ts +53 -0
  126. package/dist/vad/ActiveGate.js +329 -0
  127. package/dist/vad/ActiveGateOg.cjs +1366 -0
  128. package/dist/vad/ActiveGateOg.d.cts +33 -0
  129. package/dist/vad/ActiveGateOg.d.ts +33 -0
  130. package/dist/vad/ActiveGateOg.js +1341 -0
  131. package/dist/vad/Silero.cjs +174 -0
  132. package/dist/vad/Silero.d.cts +25 -0
  133. package/dist/vad/Silero.d.ts +25 -0
  134. package/dist/vad/Silero.js +153 -0
  135. package/package.json +125 -0
@@ -0,0 +1,180 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+ var OpenAICloudSTT_exports = {};
30
+ __export(OpenAICloudSTT_exports, {
31
+ inputPreference: () => inputPreference,
32
+ recognize: () => recognize
33
+ });
34
+ module.exports = __toCommonJS(OpenAICloudSTT_exports);
35
+ var import_node_fs = require("node:fs");
36
+ var import_audio = require("../audio/index.cjs");
37
+ var import_ObjectUtilities = require("../utilities/ObjectUtilities.cjs");
38
+ const SERVICE_ID = "openai-cloud";
39
+ const inputPreference = "stream";
40
+ const defaultOptions = {
41
+ apiKey: void 0,
42
+ organization: void 0,
43
+ baseURL: void 0,
44
+ model: void 0,
45
+ temperature: 0,
46
+ prompt: void 0,
47
+ timeout: void 0,
48
+ maxRetries: 10,
49
+ requestWordTimestamps: void 0
50
+ };
51
+ async function recognize(input, languageCode, options) {
52
+ const opts = (0, import_ObjectUtilities.extendDeep)(defaultOptions, options);
53
+ const timing = opts.timing;
54
+ if (opts.requestWordTimestamps === void 0) {
55
+ opts.requestWordTimestamps = opts.baseURL === void 0;
56
+ }
57
+ if (opts.model === void 0) {
58
+ if (opts.baseURL === void 0) {
59
+ opts.model = "whisper-1";
60
+ } else {
61
+ throw new Error(
62
+ "A custom provider for the OpenAI Cloud API requires specifying a model name"
63
+ );
64
+ }
65
+ }
66
+ const source = (0, import_audio.isAudioSource)(input) ? input : (0, import_audio.normalizeToAudioSource)(input, opts.inputFormat);
67
+ const doPrepare = () => (0, import_audio.prepareForService)(source, { service: SERVICE_ID, preferFile: true });
68
+ const prepared = timing ? await timing.timeAsync("conversion", doPrepare) : await doPrepare();
69
+ const conversionOccurred = source.format !== prepared.source.format;
70
+ timing == null ? void 0 : timing.setMetadata("conversionRequired", conversionOccurred);
71
+ timing == null ? void 0 : timing.setMetadata("targetFormat", prepared.source.format);
72
+ try {
73
+ const { default: OpenAI } = await import("openai");
74
+ const openai = new OpenAI(opts);
75
+ const filePath = (0, import_audio.toFilePath)(prepared.source);
76
+ if (!filePath) {
77
+ throw new Error(
78
+ "OpenAI Cloud STT requires a file path. The audio could not be prepared as a file."
79
+ );
80
+ }
81
+ const file = (0, import_node_fs.createReadStream)(filePath);
82
+ const timestamp_granularities = opts.requestWordTimestamps ? ["word", "segment"] : void 0;
83
+ const doUpload = () => openai.audio.transcriptions.create({
84
+ file,
85
+ model: opts.model,
86
+ language: languageCode,
87
+ prompt: opts.prompt,
88
+ response_format: "verbose_json",
89
+ temperature: opts.temperature,
90
+ timestamp_granularities,
91
+ stream: false
92
+ });
93
+ const response = timing ? await timing.timeAsync("upload", doUpload) : await doUpload();
94
+ const verboseResponse = response;
95
+ const transcript = verboseResponse.text.trim();
96
+ const timeline = extractTimeline(verboseResponse);
97
+ if (!timeline) {
98
+ throw new Error("Failed to extract timeline from OpenAI Cloud response");
99
+ }
100
+ return { transcript, timeline };
101
+ } finally {
102
+ await prepared.cleanup();
103
+ }
104
+ }
105
+ function extractTimeline(response) {
106
+ var _a;
107
+ if (response.words) {
108
+ return response.words.map((entry) => ({
109
+ type: "word",
110
+ text: entry.word,
111
+ startTime: entry.start,
112
+ endTime: entry.end
113
+ }));
114
+ }
115
+ const hasNestedWords = response.segments.length > 0 && ((_a = response.segments[0]) == null ? void 0 : _a.words) && response.segments[0].words.length > 0;
116
+ if (hasNestedWords) {
117
+ return extractWordTimelineFromSegments(response.segments);
118
+ }
119
+ return response.segments.map((entry) => ({
120
+ type: "segment",
121
+ text: entry.text,
122
+ startTime: entry.start,
123
+ endTime: entry.end
124
+ }));
125
+ }
126
+ function extractWordTimelineFromSegments(segments) {
127
+ var _a, _b;
128
+ if (segments.length === 0) {
129
+ return [];
130
+ }
131
+ const splitOffsets = findSplitOffsets(segments);
132
+ const wordTimeline = [];
133
+ let currentOffsetIndex = 0;
134
+ for (let i = 0; i < segments.length; i++) {
135
+ const segment = segments[i];
136
+ if (!(segment == null ? void 0 : segment.words) || segment.words.length === 0 || !segment.words[0]) {
137
+ continue;
138
+ }
139
+ while (currentOffsetIndex < splitOffsets.length - 1 && i >= (((_a = splitOffsets[currentOffsetIndex + 1]) == null ? void 0 : _a.segmentIndex) ?? -1)) {
140
+ currentOffsetIndex++;
141
+ }
142
+ const timeOffset = currentOffsetIndex < splitOffsets.length ? ((_b = splitOffsets[currentOffsetIndex]) == null ? void 0 : _b.offset) ?? 0 : 0;
143
+ for (const word of segment.words) {
144
+ const text = word.word.trim();
145
+ if (text === "" || text.includes("BLANK_AUDIO")) {
146
+ continue;
147
+ }
148
+ wordTimeline.push({
149
+ type: "word",
150
+ text,
151
+ startTime: word.start + timeOffset,
152
+ endTime: word.end + timeOffset,
153
+ confidence: word.probability ?? 0
154
+ });
155
+ }
156
+ }
157
+ return wordTimeline;
158
+ }
159
+ function findSplitOffsets(segments) {
160
+ var _a;
161
+ const splitOffsets = [];
162
+ let lastWordEnd = -1;
163
+ for (let i = 0; i < segments.length; i++) {
164
+ const segment = segments[i];
165
+ if (!(segment == null ? void 0 : segment.words) || segment.words.length === 0 || !segment.words[0]) {
166
+ continue;
167
+ }
168
+ const firstWordStart = segment.words[0].start;
169
+ if (firstWordStart < lastWordEnd - 0.5 || lastWordEnd === -1) {
170
+ splitOffsets.push({ segmentIndex: i, offset: segment.start });
171
+ }
172
+ lastWordEnd = ((_a = segment.words[segment.words.length - 1]) == null ? void 0 : _a.end) ?? -1;
173
+ }
174
+ return splitOffsets;
175
+ }
176
+ // Annotate the CommonJS export names for ESM import in node:
177
+ 0 && (module.exports = {
178
+ inputPreference,
179
+ recognize
180
+ });
@@ -0,0 +1,29 @@
1
+ import { AudioFormat } from '../audio/AudioFormat.cjs';
2
+ import { RawAudioInput, AudioSource } from '../audio/AudioSource.cjs';
3
+ import { Timeline } from '../utilities/Timeline.cjs';
4
+ import { Timing } from '../utilities/Timing.cjs';
5
+ import 'node:fs';
6
+ import 'node:stream';
7
+
8
+ type InputPreference = "stream";
9
+ declare const inputPreference: InputPreference;
10
+ interface OpenAICloudSTTOptions {
11
+ model?: "whisper-1" | undefined;
12
+ apiKey?: string | undefined;
13
+ organization?: string | undefined;
14
+ baseURL?: string | undefined;
15
+ temperature?: number | undefined;
16
+ prompt?: string | undefined;
17
+ timeout?: number | undefined;
18
+ maxRetries?: number | undefined;
19
+ requestWordTimestamps?: boolean | undefined;
20
+ inputFormat?: AudioFormat;
21
+ timing?: Timing | undefined;
22
+ }
23
+ interface RecognitionResult {
24
+ transcript: string;
25
+ timeline?: Timeline;
26
+ }
27
+ declare function recognize(input: RawAudioInput | AudioSource, languageCode: string, options: OpenAICloudSTTOptions): Promise<RecognitionResult>;
28
+
29
+ export { type InputPreference, type OpenAICloudSTTOptions, type RecognitionResult, inputPreference, recognize };
@@ -0,0 +1,29 @@
1
+ import { AudioFormat } from '../audio/AudioFormat.js';
2
+ import { RawAudioInput, AudioSource } from '../audio/AudioSource.js';
3
+ import { Timeline } from '../utilities/Timeline.js';
4
+ import { Timing } from '../utilities/Timing.js';
5
+ import 'node:fs';
6
+ import 'node:stream';
7
+
8
+ type InputPreference = "stream";
9
+ declare const inputPreference: InputPreference;
10
+ interface OpenAICloudSTTOptions {
11
+ model?: "whisper-1" | undefined;
12
+ apiKey?: string | undefined;
13
+ organization?: string | undefined;
14
+ baseURL?: string | undefined;
15
+ temperature?: number | undefined;
16
+ prompt?: string | undefined;
17
+ timeout?: number | undefined;
18
+ maxRetries?: number | undefined;
19
+ requestWordTimestamps?: boolean | undefined;
20
+ inputFormat?: AudioFormat;
21
+ timing?: Timing | undefined;
22
+ }
23
+ interface RecognitionResult {
24
+ transcript: string;
25
+ timeline?: Timeline;
26
+ }
27
+ declare function recognize(input: RawAudioInput | AudioSource, languageCode: string, options: OpenAICloudSTTOptions): Promise<RecognitionResult>;
28
+
29
+ export { type InputPreference, type OpenAICloudSTTOptions, type RecognitionResult, inputPreference, recognize };
@@ -0,0 +1,150 @@
1
+ import { createReadStream } from "node:fs";
2
+ import {
3
+ isAudioSource,
4
+ normalizeToAudioSource,
5
+ prepareForService,
6
+ toFilePath
7
+ } from "../audio/index.js";
8
+ import { extendDeep } from "../utilities/ObjectUtilities.js";
9
+ const SERVICE_ID = "openai-cloud";
10
+ const inputPreference = "stream";
11
+ const defaultOptions = {
12
+ apiKey: void 0,
13
+ organization: void 0,
14
+ baseURL: void 0,
15
+ model: void 0,
16
+ temperature: 0,
17
+ prompt: void 0,
18
+ timeout: void 0,
19
+ maxRetries: 10,
20
+ requestWordTimestamps: void 0
21
+ };
22
+ async function recognize(input, languageCode, options) {
23
+ const opts = extendDeep(defaultOptions, options);
24
+ const timing = opts.timing;
25
+ if (opts.requestWordTimestamps === void 0) {
26
+ opts.requestWordTimestamps = opts.baseURL === void 0;
27
+ }
28
+ if (opts.model === void 0) {
29
+ if (opts.baseURL === void 0) {
30
+ opts.model = "whisper-1";
31
+ } else {
32
+ throw new Error(
33
+ "A custom provider for the OpenAI Cloud API requires specifying a model name"
34
+ );
35
+ }
36
+ }
37
+ const source = isAudioSource(input) ? input : normalizeToAudioSource(input, opts.inputFormat);
38
+ const doPrepare = () => prepareForService(source, { service: SERVICE_ID, preferFile: true });
39
+ const prepared = timing ? await timing.timeAsync("conversion", doPrepare) : await doPrepare();
40
+ const conversionOccurred = source.format !== prepared.source.format;
41
+ timing == null ? void 0 : timing.setMetadata("conversionRequired", conversionOccurred);
42
+ timing == null ? void 0 : timing.setMetadata("targetFormat", prepared.source.format);
43
+ try {
44
+ const { default: OpenAI } = await import("openai");
45
+ const openai = new OpenAI(opts);
46
+ const filePath = toFilePath(prepared.source);
47
+ if (!filePath) {
48
+ throw new Error(
49
+ "OpenAI Cloud STT requires a file path. The audio could not be prepared as a file."
50
+ );
51
+ }
52
+ const file = createReadStream(filePath);
53
+ const timestamp_granularities = opts.requestWordTimestamps ? ["word", "segment"] : void 0;
54
+ const doUpload = () => openai.audio.transcriptions.create({
55
+ file,
56
+ model: opts.model,
57
+ language: languageCode,
58
+ prompt: opts.prompt,
59
+ response_format: "verbose_json",
60
+ temperature: opts.temperature,
61
+ timestamp_granularities,
62
+ stream: false
63
+ });
64
+ const response = timing ? await timing.timeAsync("upload", doUpload) : await doUpload();
65
+ const verboseResponse = response;
66
+ const transcript = verboseResponse.text.trim();
67
+ const timeline = extractTimeline(verboseResponse);
68
+ if (!timeline) {
69
+ throw new Error("Failed to extract timeline from OpenAI Cloud response");
70
+ }
71
+ return { transcript, timeline };
72
+ } finally {
73
+ await prepared.cleanup();
74
+ }
75
+ }
76
+ function extractTimeline(response) {
77
+ var _a;
78
+ if (response.words) {
79
+ return response.words.map((entry) => ({
80
+ type: "word",
81
+ text: entry.word,
82
+ startTime: entry.start,
83
+ endTime: entry.end
84
+ }));
85
+ }
86
+ const hasNestedWords = response.segments.length > 0 && ((_a = response.segments[0]) == null ? void 0 : _a.words) && response.segments[0].words.length > 0;
87
+ if (hasNestedWords) {
88
+ return extractWordTimelineFromSegments(response.segments);
89
+ }
90
+ return response.segments.map((entry) => ({
91
+ type: "segment",
92
+ text: entry.text,
93
+ startTime: entry.start,
94
+ endTime: entry.end
95
+ }));
96
+ }
97
+ function extractWordTimelineFromSegments(segments) {
98
+ var _a, _b;
99
+ if (segments.length === 0) {
100
+ return [];
101
+ }
102
+ const splitOffsets = findSplitOffsets(segments);
103
+ const wordTimeline = [];
104
+ let currentOffsetIndex = 0;
105
+ for (let i = 0; i < segments.length; i++) {
106
+ const segment = segments[i];
107
+ if (!(segment == null ? void 0 : segment.words) || segment.words.length === 0 || !segment.words[0]) {
108
+ continue;
109
+ }
110
+ while (currentOffsetIndex < splitOffsets.length - 1 && i >= (((_a = splitOffsets[currentOffsetIndex + 1]) == null ? void 0 : _a.segmentIndex) ?? -1)) {
111
+ currentOffsetIndex++;
112
+ }
113
+ const timeOffset = currentOffsetIndex < splitOffsets.length ? ((_b = splitOffsets[currentOffsetIndex]) == null ? void 0 : _b.offset) ?? 0 : 0;
114
+ for (const word of segment.words) {
115
+ const text = word.word.trim();
116
+ if (text === "" || text.includes("BLANK_AUDIO")) {
117
+ continue;
118
+ }
119
+ wordTimeline.push({
120
+ type: "word",
121
+ text,
122
+ startTime: word.start + timeOffset,
123
+ endTime: word.end + timeOffset,
124
+ confidence: word.probability ?? 0
125
+ });
126
+ }
127
+ }
128
+ return wordTimeline;
129
+ }
130
+ function findSplitOffsets(segments) {
131
+ var _a;
132
+ const splitOffsets = [];
133
+ let lastWordEnd = -1;
134
+ for (let i = 0; i < segments.length; i++) {
135
+ const segment = segments[i];
136
+ if (!(segment == null ? void 0 : segment.words) || segment.words.length === 0 || !segment.words[0]) {
137
+ continue;
138
+ }
139
+ const firstWordStart = segment.words[0].start;
140
+ if (firstWordStart < lastWordEnd - 0.5 || lastWordEnd === -1) {
141
+ splitOffsets.push({ segmentIndex: i, offset: segment.start });
142
+ }
143
+ lastWordEnd = ((_a = segment.words[segment.words.length - 1]) == null ? void 0 : _a.end) ?? -1;
144
+ }
145
+ return splitOffsets;
146
+ }
147
+ export {
148
+ inputPreference,
149
+ recognize
150
+ };
@@ -0,0 +1,296 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+ var WhisperCppSTT_exports = {};
30
+ __export(WhisperCppSTT_exports, {
31
+ ensureModelDownloaded: () => ensureModelDownloaded,
32
+ ensureWhisperCppInstalled: () => ensureWhisperCppInstalled,
33
+ inputPreference: () => inputPreference,
34
+ recognize: () => recognize
35
+ });
36
+ module.exports = __toCommonJS(WhisperCppSTT_exports);
37
+ var import_node_child_process = require("node:child_process");
38
+ var import_node_fs = __toESM(require("node:fs"), 1);
39
+ var import_node_os = __toESM(require("node:os"), 1);
40
+ var import_node_path = __toESM(require("node:path"), 1);
41
+ var import_fs_extra = require("fs-extra");
42
+ var import_audio = require("../audio/index.cjs");
43
+ var import_config = require("../cli/config.cjs");
44
+ var import_install = require("../cli/install.cjs");
45
+ var import_WhisperTimeline = require("../utilities/WhisperTimeline.cjs");
46
+ const inputPreference = "file";
47
+ const defaultOptions = {
48
+ processors: 1,
49
+ threads: 4,
50
+ flashAttention: true,
51
+ suppressNonSpeechTokens: true,
52
+ tokenLevelTimestamps: true,
53
+ printOutput: false,
54
+ model: "tiny.en",
55
+ autoInstall: true
56
+ };
57
+ const acceptedFormats = ["wav", "flac", "ogg", "mp3"];
58
+ async function recognize(input, options) {
59
+ const opts = { ...defaultOptions, ...options };
60
+ const timing = opts.timing;
61
+ const modelDir = opts.modelDir ?? (0, import_config.getModelDir)();
62
+ const installDir = opts.installDir ?? (0, import_config.getInstallDir)();
63
+ const source = (0, import_audio.isAudioSource)(input) ? input : (0, import_audio.normalizeToAudioSource)(input, opts.inputFormat);
64
+ await (0, import_fs_extra.ensureDir)(modelDir);
65
+ const doInstall = async () => {
66
+ await ensureWhisperCppInstalled();
67
+ await ensureModelDownloaded(modelDir, opts.model, opts.printOutput);
68
+ };
69
+ if (opts.autoInstall) {
70
+ if (timing) {
71
+ await timing.timeAsync("installation", doInstall);
72
+ } else {
73
+ await doInstall();
74
+ }
75
+ }
76
+ const conversionNeeded = !acceptedFormats.includes(source.format);
77
+ timing == null ? void 0 : timing.setMetadata("conversionRequired", conversionNeeded);
78
+ timing == null ? void 0 : timing.setMetadata("targetFormat", conversionNeeded ? "wav" : source.format);
79
+ const doPrepare = async () => {
80
+ if (!conversionNeeded) return { source, cleanup: async () => {
81
+ } };
82
+ return (0, import_audio.prepareWavForService)(source, { sampleRate: 16e3, channels: 1 });
83
+ };
84
+ const prepared = timing ? await timing.timeAsync("conversion", doPrepare) : await doPrepare();
85
+ try {
86
+ const inputPath = (0, import_audio.toFilePath)(prepared.source);
87
+ if (!inputPath) {
88
+ throw new Error(
89
+ "whisper.cpp requires a file path. The audio could not be prepared as a file."
90
+ );
91
+ }
92
+ if (!(0, import_node_fs.existsSync)(inputPath)) {
93
+ throw new Error(`Input file does not exist: ${inputPath}`);
94
+ }
95
+ const audioDuration = await (0, import_audio.getAudioDuration)(inputPath);
96
+ const effectiveProcessors = (0, import_WhisperTimeline.calculateEffectiveProcessors)(
97
+ audioDuration,
98
+ opts.processors
99
+ );
100
+ const doTranscribe = () => transcribe({
101
+ inputPath,
102
+ model: opts.model,
103
+ installDir,
104
+ modelFolder: modelDir,
105
+ language: opts.language ?? null,
106
+ tokenLevelTimestamps: opts.tokenLevelTimestamps,
107
+ printOutput: opts.printOutput,
108
+ flashAttention: opts.flashAttention,
109
+ suppressNonSpeechTokens: opts.suppressNonSpeechTokens,
110
+ processors: effectiveProcessors,
111
+ threads: opts.threads,
112
+ onProgress: opts.onProgress ?? null,
113
+ signal: opts.signal ?? null
114
+ });
115
+ const transcription = timing ? await timing.timeAsync("transcription", doTranscribe) : await doTranscribe();
116
+ const rawSegments = (0, import_WhisperTimeline.parseWhisperCppOutput)(transcription.transcription);
117
+ const splitBoundaries = effectiveProcessors > 1 ? (0, import_WhisperTimeline.calculateWhisperSplits)(audioDuration, effectiveProcessors) : [];
118
+ const timeline = (0, import_WhisperTimeline.extractCorrectedTimeline)(rawSegments, {
119
+ splitBoundaries: splitBoundaries.length > 0 ? splitBoundaries : void 0
120
+ });
121
+ const transcript = transcription.transcription.map((s) => s.text).join("").trim();
122
+ return {
123
+ transcript,
124
+ timeline,
125
+ language: transcription.result.language
126
+ };
127
+ } finally {
128
+ await prepared.cleanup();
129
+ }
130
+ }
131
+ async function ensureWhisperCppInstalled() {
132
+ await (0, import_install.installBinary)({ printOutput: false });
133
+ }
134
+ async function ensureModelDownloaded(modelDir, modelName, printOutput) {
135
+ const modelPath = (0, import_config.getModelPath)(modelName, modelDir);
136
+ if ((0, import_node_fs.existsSync)(modelPath)) {
137
+ return;
138
+ }
139
+ await (0, import_install.installModel)({
140
+ model: modelName,
141
+ modelDir,
142
+ printOutput
143
+ });
144
+ }
145
+ function getModelPath(folder, model) {
146
+ return import_node_path.default.join(folder, `ggml-${model}.bin`);
147
+ }
148
+ async function transcribe(options) {
149
+ const {
150
+ inputPath,
151
+ model,
152
+ installDir,
153
+ modelFolder,
154
+ language,
155
+ tokenLevelTimestamps,
156
+ printOutput,
157
+ flashAttention,
158
+ suppressNonSpeechTokens,
159
+ processors,
160
+ threads,
161
+ onProgress,
162
+ signal
163
+ } = options;
164
+ const executable = (0, import_config.getWhisperExecutablePath)(installDir);
165
+ const modelPath = getModelPath(modelFolder, model);
166
+ if (!(0, import_node_fs.existsSync)(executable)) {
167
+ throw new Error(`Whisper executable not found at ${executable}`);
168
+ }
169
+ if (!(0, import_node_fs.existsSync)(modelPath)) {
170
+ throw new Error(`Model not found at ${modelPath}`);
171
+ }
172
+ const tmpDir = import_node_path.default.join(import_node_os.default.tmpdir(), "ghost-story-whisper");
173
+ await (0, import_fs_extra.ensureDir)(tmpDir);
174
+ const tmpJsonPath = import_node_path.default.join(tmpDir, `transcription-${Date.now()}`);
175
+ const args = buildTranscribeArgs({
176
+ inputPath,
177
+ modelPath,
178
+ outputPath: tmpJsonPath,
179
+ model,
180
+ language,
181
+ tokenLevelTimestamps,
182
+ flashAttention,
183
+ suppressNonSpeechTokens,
184
+ processors,
185
+ threads
186
+ });
187
+ try {
188
+ const outputPath = await runWhisperProcess({
189
+ executable,
190
+ args,
191
+ cwd: installDir,
192
+ printOutput,
193
+ onProgress,
194
+ signal,
195
+ expectedOutputPath: `${tmpJsonPath}.json`
196
+ });
197
+ const json = JSON.parse(
198
+ await import_node_fs.default.promises.readFile(outputPath, "utf8")
199
+ );
200
+ import_node_fs.default.promises.unlink(outputPath).catch(() => {
201
+ });
202
+ return json;
203
+ } catch (error) {
204
+ await import_node_fs.default.promises.unlink(`${tmpJsonPath}.json`).catch(() => {
205
+ });
206
+ throw error;
207
+ }
208
+ }
209
+ function buildTranscribeArgs(options) {
210
+ const args = [
211
+ "--file",
212
+ options.inputPath,
213
+ "--output-file",
214
+ options.outputPath,
215
+ "--output-json-full",
216
+ "--model",
217
+ options.modelPath,
218
+ "--print-progress",
219
+ options.language ? ["--language", options.language.toLowerCase()] : null,
220
+ options.flashAttention ? ["--flash-attn"] : null,
221
+ options.suppressNonSpeechTokens ? ["--suppress-nst", "--no-prints"] : null,
222
+ ["--processors", String(options.processors)],
223
+ ["--threads", String(options.threads)]
224
+ ];
225
+ return args.flat().filter((arg) => arg !== null);
226
+ }
227
+ function runWhisperProcess(options) {
228
+ const {
229
+ executable,
230
+ args,
231
+ cwd,
232
+ printOutput,
233
+ onProgress,
234
+ signal,
235
+ expectedOutputPath
236
+ } = options;
237
+ if (signal == null ? void 0 : signal.aborted) {
238
+ return Promise.reject(new Error("Signal aborted"));
239
+ }
240
+ return new Promise((resolve, reject) => {
241
+ const task = (0, import_node_child_process.spawn)(executable, args, { cwd, signal: signal ?? void 0 });
242
+ let output = "";
243
+ const handleData = (data) => {
244
+ const str = data.toString("utf-8");
245
+ output += str;
246
+ if (str.includes("progress =")) {
247
+ const match = str.match(/progress\s*=\s*([\d.]+)/);
248
+ if (match == null ? void 0 : match[1]) {
249
+ onProgress == null ? void 0 : onProgress(parseFloat(match[1]) / 100);
250
+ }
251
+ }
252
+ };
253
+ task.stdout.on("data", (data) => {
254
+ handleData(data);
255
+ if (printOutput) {
256
+ process.stdout.write(data);
257
+ }
258
+ });
259
+ task.stderr.on("data", (data) => {
260
+ handleData(data);
261
+ if (printOutput) {
262
+ process.stderr.write(data);
263
+ }
264
+ });
265
+ task.on("exit", (code, exitSignal) => {
266
+ if ((0, import_node_fs.existsSync)(expectedOutputPath)) {
267
+ onProgress == null ? void 0 : onProgress(1);
268
+ resolve(expectedOutputPath);
269
+ return;
270
+ }
271
+ if (exitSignal) {
272
+ reject(new Error(`Process killed with signal ${exitSignal}: ${output}`));
273
+ return;
274
+ }
275
+ if (output.includes("must be 16 kHz")) {
276
+ reject(
277
+ new Error(
278
+ "Audio file must be 16 kHz. Convert your audio to 16-bit, 16KHz WAV format."
279
+ )
280
+ );
281
+ return;
282
+ }
283
+ reject(new Error(`Transcription failed (exit code ${code}): ${output}`));
284
+ });
285
+ task.on("error", (err) => {
286
+ reject(new Error(`Failed to start whisper process: ${err.message}`));
287
+ });
288
+ });
289
+ }
290
+ // Annotate the CommonJS export names for ESM import in node:
291
+ 0 && (module.exports = {
292
+ ensureModelDownloaded,
293
+ ensureWhisperCppInstalled,
294
+ inputPreference,
295
+ recognize
296
+ });