@storyteller-platform/ghost-story 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/config.cjs +7 -1
- package/dist/cli/config.d.cts +4 -2
- package/dist/cli/config.d.ts +4 -2
- package/dist/cli/config.js +5 -1
- package/dist/utilities/SpacelessScripts.cjs +48 -0
- package/dist/utilities/SpacelessScripts.d.cts +10 -0
- package/dist/utilities/SpacelessScripts.d.ts +10 -0
- package/dist/utilities/SpacelessScripts.js +22 -0
- package/dist/utilities/Timeline.cjs +12 -5
- package/dist/utilities/Timeline.js +12 -5
- package/dist/utilities/WhisperTimeline.cjs +118 -22
- package/dist/utilities/WhisperTimeline.js +118 -22
- package/package.json +1 -1
package/dist/cli/config.cjs
CHANGED
|
@@ -35,6 +35,8 @@ __export(config_exports, {
|
|
|
35
35
|
MODEL_SIZES: () => MODEL_SIZES,
|
|
36
36
|
RECOGNITION_ENGINES: () => RECOGNITION_ENGINES,
|
|
37
37
|
SILERO_VAD_VERSION: () => SILERO_VAD_VERSION,
|
|
38
|
+
WHISPER_CPP_PATCH_LEVEL: () => WHISPER_CPP_PATCH_LEVEL,
|
|
39
|
+
WHISPER_CPP_UPSTREAM_VERSION: () => WHISPER_CPP_UPSTREAM_VERSION,
|
|
38
40
|
WHISPER_CPP_VERSION: () => WHISPER_CPP_VERSION,
|
|
39
41
|
WHISPER_MODELS: () => WHISPER_MODELS,
|
|
40
42
|
WHISPER_MODEL_VERSION: () => WHISPER_MODEL_VERSION,
|
|
@@ -72,7 +74,9 @@ var import_node_os = __toESM(require("node:os"), 1);
|
|
|
72
74
|
var import_node_path = __toESM(require("node:path"), 1);
|
|
73
75
|
var import_zod = __toESM(require("zod"), 1);
|
|
74
76
|
var import_FileSystem = require("../utilities/FileSystem.cjs");
|
|
75
|
-
const
|
|
77
|
+
const WHISPER_CPP_UPSTREAM_VERSION = "1.8.3";
|
|
78
|
+
const WHISPER_CPP_PATCH_LEVEL = 2;
|
|
79
|
+
const WHISPER_CPP_VERSION = `${WHISPER_CPP_UPSTREAM_VERSION}-st.${WHISPER_CPP_PATCH_LEVEL}`;
|
|
76
80
|
const WHISPER_MODEL_VERSION = "1.0.0";
|
|
77
81
|
const SILERO_VAD_VERSION = "6.2.0";
|
|
78
82
|
const GITLAB_PROJECT_PATH = "storyteller-platform/storyteller";
|
|
@@ -478,6 +482,8 @@ function writeConfig(config) {
|
|
|
478
482
|
MODEL_SIZES,
|
|
479
483
|
RECOGNITION_ENGINES,
|
|
480
484
|
SILERO_VAD_VERSION,
|
|
485
|
+
WHISPER_CPP_PATCH_LEVEL,
|
|
486
|
+
WHISPER_CPP_UPSTREAM_VERSION,
|
|
481
487
|
WHISPER_CPP_VERSION,
|
|
482
488
|
WHISPER_MODELS,
|
|
483
489
|
WHISPER_MODEL_VERSION,
|
package/dist/cli/config.d.cts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import z from 'zod';
|
|
2
2
|
|
|
3
|
-
declare const
|
|
3
|
+
declare const WHISPER_CPP_UPSTREAM_VERSION = "1.8.3";
|
|
4
|
+
declare const WHISPER_CPP_PATCH_LEVEL = 2;
|
|
5
|
+
declare const WHISPER_CPP_VERSION = "1.8.3-st.2";
|
|
4
6
|
declare const WHISPER_MODEL_VERSION = "1.0.0";
|
|
5
7
|
declare const SILERO_VAD_VERSION = "6.2.0";
|
|
6
8
|
declare const GITLAB_PROJECT_PATH = "storyteller-platform/storyteller";
|
|
@@ -73,4 +75,4 @@ type CLIConfig = z.infer<typeof cliConfigSchema>;
|
|
|
73
75
|
declare function readConfig(): CLIConfig;
|
|
74
76
|
declare function writeConfig(config: Partial<CLIConfig>): void;
|
|
75
77
|
|
|
76
|
-
export { BUILD_VARIANTS, type BuildVariant, type CLIConfig, GITLAB_PROJECT_ID, GITLAB_PROJECT_PATH, GITLAB_WHIPSER_ML_ID, MODEL_SIZES, RECOGNITION_ENGINES, SILERO_VAD_VERSION, WHISPER_CPP_VERSION, WHISPER_MODELS, WHISPER_MODEL_VERSION, type WhisperModel, applyLegacyCpuFallback, cliConfigSchema, detectPlatform, getBinaryDownloadUrl, getCompatibleVariants, getConfiguredVariant, getCoremlModelDownloadUrl, getCoremlModelPath, getInstallDir, getInstalledVariant, getModelDir, getModelDownloadUrl, getModelPath, getVadExecutablePath, getVadModelDownloadUrl, getVadModelPath, getWhisperBaseDir, getWhisperExecutablePath, getWhisperServerExecutablePath, isValidModel, isValidVariant, isVariantCompatibleWithCurrentPlatform, needsCoremlModel, readConfig, resolveVariant, writeConfig };
|
|
78
|
+
export { BUILD_VARIANTS, type BuildVariant, type CLIConfig, GITLAB_PROJECT_ID, GITLAB_PROJECT_PATH, GITLAB_WHIPSER_ML_ID, MODEL_SIZES, RECOGNITION_ENGINES, SILERO_VAD_VERSION, WHISPER_CPP_PATCH_LEVEL, WHISPER_CPP_UPSTREAM_VERSION, WHISPER_CPP_VERSION, WHISPER_MODELS, WHISPER_MODEL_VERSION, type WhisperModel, applyLegacyCpuFallback, cliConfigSchema, detectPlatform, getBinaryDownloadUrl, getCompatibleVariants, getConfiguredVariant, getCoremlModelDownloadUrl, getCoremlModelPath, getInstallDir, getInstalledVariant, getModelDir, getModelDownloadUrl, getModelPath, getVadExecutablePath, getVadModelDownloadUrl, getVadModelPath, getWhisperBaseDir, getWhisperExecutablePath, getWhisperServerExecutablePath, isValidModel, isValidVariant, isVariantCompatibleWithCurrentPlatform, needsCoremlModel, readConfig, resolveVariant, writeConfig };
|
package/dist/cli/config.d.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import z from 'zod';
|
|
2
2
|
|
|
3
|
-
declare const
|
|
3
|
+
declare const WHISPER_CPP_UPSTREAM_VERSION = "1.8.3";
|
|
4
|
+
declare const WHISPER_CPP_PATCH_LEVEL = 2;
|
|
5
|
+
declare const WHISPER_CPP_VERSION = "1.8.3-st.2";
|
|
4
6
|
declare const WHISPER_MODEL_VERSION = "1.0.0";
|
|
5
7
|
declare const SILERO_VAD_VERSION = "6.2.0";
|
|
6
8
|
declare const GITLAB_PROJECT_PATH = "storyteller-platform/storyteller";
|
|
@@ -73,4 +75,4 @@ type CLIConfig = z.infer<typeof cliConfigSchema>;
|
|
|
73
75
|
declare function readConfig(): CLIConfig;
|
|
74
76
|
declare function writeConfig(config: Partial<CLIConfig>): void;
|
|
75
77
|
|
|
76
|
-
export { BUILD_VARIANTS, type BuildVariant, type CLIConfig, GITLAB_PROJECT_ID, GITLAB_PROJECT_PATH, GITLAB_WHIPSER_ML_ID, MODEL_SIZES, RECOGNITION_ENGINES, SILERO_VAD_VERSION, WHISPER_CPP_VERSION, WHISPER_MODELS, WHISPER_MODEL_VERSION, type WhisperModel, applyLegacyCpuFallback, cliConfigSchema, detectPlatform, getBinaryDownloadUrl, getCompatibleVariants, getConfiguredVariant, getCoremlModelDownloadUrl, getCoremlModelPath, getInstallDir, getInstalledVariant, getModelDir, getModelDownloadUrl, getModelPath, getVadExecutablePath, getVadModelDownloadUrl, getVadModelPath, getWhisperBaseDir, getWhisperExecutablePath, getWhisperServerExecutablePath, isValidModel, isValidVariant, isVariantCompatibleWithCurrentPlatform, needsCoremlModel, readConfig, resolveVariant, writeConfig };
|
|
78
|
+
export { BUILD_VARIANTS, type BuildVariant, type CLIConfig, GITLAB_PROJECT_ID, GITLAB_PROJECT_PATH, GITLAB_WHIPSER_ML_ID, MODEL_SIZES, RECOGNITION_ENGINES, SILERO_VAD_VERSION, WHISPER_CPP_PATCH_LEVEL, WHISPER_CPP_UPSTREAM_VERSION, WHISPER_CPP_VERSION, WHISPER_MODELS, WHISPER_MODEL_VERSION, type WhisperModel, applyLegacyCpuFallback, cliConfigSchema, detectPlatform, getBinaryDownloadUrl, getCompatibleVariants, getConfiguredVariant, getCoremlModelDownloadUrl, getCoremlModelPath, getInstallDir, getInstalledVariant, getModelDir, getModelDownloadUrl, getModelPath, getVadExecutablePath, getVadModelDownloadUrl, getVadModelPath, getWhisperBaseDir, getWhisperExecutablePath, getWhisperServerExecutablePath, isValidModel, isValidVariant, isVariantCompatibleWithCurrentPlatform, needsCoremlModel, readConfig, resolveVariant, writeConfig };
|
package/dist/cli/config.js
CHANGED
|
@@ -4,7 +4,9 @@ import os from "node:os";
|
|
|
4
4
|
import path from "node:path";
|
|
5
5
|
import z from "zod";
|
|
6
6
|
import { getAppDataDir } from "../utilities/FileSystem.js";
|
|
7
|
-
const
|
|
7
|
+
const WHISPER_CPP_UPSTREAM_VERSION = "1.8.3";
|
|
8
|
+
const WHISPER_CPP_PATCH_LEVEL = 2;
|
|
9
|
+
const WHISPER_CPP_VERSION = `${WHISPER_CPP_UPSTREAM_VERSION}-st.${WHISPER_CPP_PATCH_LEVEL}`;
|
|
8
10
|
const WHISPER_MODEL_VERSION = "1.0.0";
|
|
9
11
|
const SILERO_VAD_VERSION = "6.2.0";
|
|
10
12
|
const GITLAB_PROJECT_PATH = "storyteller-platform/storyteller";
|
|
@@ -409,6 +411,8 @@ export {
|
|
|
409
411
|
MODEL_SIZES,
|
|
410
412
|
RECOGNITION_ENGINES,
|
|
411
413
|
SILERO_VAD_VERSION,
|
|
414
|
+
WHISPER_CPP_PATCH_LEVEL,
|
|
415
|
+
WHISPER_CPP_UPSTREAM_VERSION,
|
|
412
416
|
WHISPER_CPP_VERSION,
|
|
413
417
|
WHISPER_MODELS,
|
|
414
418
|
WHISPER_MODEL_VERSION,
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var SpacelessScripts_exports = {};
|
|
20
|
+
__export(SpacelessScripts_exports, {
|
|
21
|
+
spacelessScriptPattern: () => spacelessScriptPattern,
|
|
22
|
+
spacelessScripts: () => spacelessScripts,
|
|
23
|
+
startsWithSpacelessScript: () => startsWithSpacelessScript
|
|
24
|
+
});
|
|
25
|
+
module.exports = __toCommonJS(SpacelessScripts_exports);
|
|
26
|
+
const spacelessScripts = [
|
|
27
|
+
{ name: "thai", from: 3585, to: 3663 },
|
|
28
|
+
{ name: "lao", from: 3713, to: 3807 },
|
|
29
|
+
{ name: "tibetan", from: 3840, to: 4095 },
|
|
30
|
+
{ name: "myanmar", from: 4096, to: 4255 },
|
|
31
|
+
{ name: "khmer", from: 6016, to: 6143 },
|
|
32
|
+
{ name: "hiragana", from: 12352, to: 12447 },
|
|
33
|
+
{ name: "katakana", from: 12448, to: 12543 },
|
|
34
|
+
{ name: "cjk-ext-a", from: 13312, to: 19903 },
|
|
35
|
+
{ name: "cjk-unified", from: 19968, to: 40959 },
|
|
36
|
+
{ name: "cjk-compat", from: 63744, to: 64255 }
|
|
37
|
+
];
|
|
38
|
+
const charClass = spacelessScripts.map((s) => `${String.fromCharCode(s.from)}-${String.fromCharCode(s.to)}`).join("");
|
|
39
|
+
const spacelessScriptPattern = new RegExp(`[${charClass}]`);
|
|
40
|
+
function startsWithSpacelessScript(text) {
|
|
41
|
+
return spacelessScriptPattern.test(text.charAt(0));
|
|
42
|
+
}
|
|
43
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
44
|
+
0 && (module.exports = {
|
|
45
|
+
spacelessScriptPattern,
|
|
46
|
+
spacelessScripts,
|
|
47
|
+
startsWithSpacelessScript
|
|
48
|
+
});
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
interface ScriptRange {
|
|
2
|
+
name: string;
|
|
3
|
+
from: number;
|
|
4
|
+
to: number;
|
|
5
|
+
}
|
|
6
|
+
declare const spacelessScripts: readonly ScriptRange[];
|
|
7
|
+
declare const spacelessScriptPattern: RegExp;
|
|
8
|
+
declare function startsWithSpacelessScript(text: string): boolean;
|
|
9
|
+
|
|
10
|
+
export { spacelessScriptPattern, spacelessScripts, startsWithSpacelessScript };
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
interface ScriptRange {
|
|
2
|
+
name: string;
|
|
3
|
+
from: number;
|
|
4
|
+
to: number;
|
|
5
|
+
}
|
|
6
|
+
declare const spacelessScripts: readonly ScriptRange[];
|
|
7
|
+
declare const spacelessScriptPattern: RegExp;
|
|
8
|
+
declare function startsWithSpacelessScript(text: string): boolean;
|
|
9
|
+
|
|
10
|
+
export { spacelessScriptPattern, spacelessScripts, startsWithSpacelessScript };
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
const spacelessScripts = [
|
|
2
|
+
{ name: "thai", from: 3585, to: 3663 },
|
|
3
|
+
{ name: "lao", from: 3713, to: 3807 },
|
|
4
|
+
{ name: "tibetan", from: 3840, to: 4095 },
|
|
5
|
+
{ name: "myanmar", from: 4096, to: 4255 },
|
|
6
|
+
{ name: "khmer", from: 6016, to: 6143 },
|
|
7
|
+
{ name: "hiragana", from: 12352, to: 12447 },
|
|
8
|
+
{ name: "katakana", from: 12448, to: 12543 },
|
|
9
|
+
{ name: "cjk-ext-a", from: 13312, to: 19903 },
|
|
10
|
+
{ name: "cjk-unified", from: 19968, to: 40959 },
|
|
11
|
+
{ name: "cjk-compat", from: 63744, to: 64255 }
|
|
12
|
+
];
|
|
13
|
+
const charClass = spacelessScripts.map((s) => `${String.fromCharCode(s.from)}-${String.fromCharCode(s.to)}`).join("");
|
|
14
|
+
const spacelessScriptPattern = new RegExp(`[${charClass}]`);
|
|
15
|
+
function startsWithSpacelessScript(text) {
|
|
16
|
+
return spacelessScriptPattern.test(text.charAt(0));
|
|
17
|
+
}
|
|
18
|
+
export {
|
|
19
|
+
spacelessScriptPattern,
|
|
20
|
+
spacelessScripts,
|
|
21
|
+
startsWithSpacelessScript
|
|
22
|
+
};
|
|
@@ -24,6 +24,7 @@ __export(Timeline_exports, {
|
|
|
24
24
|
getUTF32Chars: () => getUTF32Chars
|
|
25
25
|
});
|
|
26
26
|
module.exports = __toCommonJS(Timeline_exports);
|
|
27
|
+
var import_SpacelessScripts = require("./SpacelessScripts.cjs");
|
|
27
28
|
function addWordTextOffsetsToTimelineInPlace(timeline, text) {
|
|
28
29
|
const { utf16To32Mapping } = getUTF32Chars(text);
|
|
29
30
|
let currentOffset = 0;
|
|
@@ -96,16 +97,22 @@ function getUTF32Chars(str) {
|
|
|
96
97
|
function buildTranscriptFromTimeline(timeline) {
|
|
97
98
|
const words = [];
|
|
98
99
|
function collectWords(entries) {
|
|
99
|
-
for (
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
} else if (entry.timeline) {
|
|
100
|
+
for (let i = 0; i < entries.length; i++) {
|
|
101
|
+
const entry = entries[i];
|
|
102
|
+
if (entry.timeline) {
|
|
103
103
|
collectWords(entry.timeline);
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
if (!(0, import_SpacelessScripts.startsWithSpacelessScript)(entry.text) && i !== entries.length - 1) {
|
|
107
|
+
words.push(entry.text);
|
|
108
|
+
words.push(" ");
|
|
109
|
+
continue;
|
|
104
110
|
}
|
|
111
|
+
words.push(entry.text);
|
|
105
112
|
}
|
|
106
113
|
}
|
|
107
114
|
collectWords(timeline);
|
|
108
|
-
return words.join("
|
|
115
|
+
return words.join("");
|
|
109
116
|
}
|
|
110
117
|
function addTimeOffsetToTimeline(targetTimeline, timeOffset) {
|
|
111
118
|
const newTimeline = structuredClone(targetTimeline);
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { startsWithSpacelessScript } from "./SpacelessScripts.js";
|
|
1
2
|
function addWordTextOffsetsToTimelineInPlace(timeline, text) {
|
|
2
3
|
const { utf16To32Mapping } = getUTF32Chars(text);
|
|
3
4
|
let currentOffset = 0;
|
|
@@ -70,16 +71,22 @@ function getUTF32Chars(str) {
|
|
|
70
71
|
function buildTranscriptFromTimeline(timeline) {
|
|
71
72
|
const words = [];
|
|
72
73
|
function collectWords(entries) {
|
|
73
|
-
for (
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
} else if (entry.timeline) {
|
|
74
|
+
for (let i = 0; i < entries.length; i++) {
|
|
75
|
+
const entry = entries[i];
|
|
76
|
+
if (entry.timeline) {
|
|
77
77
|
collectWords(entry.timeline);
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
if (!startsWithSpacelessScript(entry.text) && i !== entries.length - 1) {
|
|
81
|
+
words.push(entry.text);
|
|
82
|
+
words.push(" ");
|
|
83
|
+
continue;
|
|
78
84
|
}
|
|
85
|
+
words.push(entry.text);
|
|
79
86
|
}
|
|
80
87
|
}
|
|
81
88
|
collectWords(timeline);
|
|
82
|
-
return words.join("
|
|
89
|
+
return words.join("");
|
|
83
90
|
}
|
|
84
91
|
function addTimeOffsetToTimeline(targetTimeline, timeOffset) {
|
|
85
92
|
const newTimeline = structuredClone(targetTimeline);
|
|
@@ -28,6 +28,7 @@ __export(WhisperTimeline_exports, {
|
|
|
28
28
|
scoreTimeline: () => scoreTimeline
|
|
29
29
|
});
|
|
30
30
|
module.exports = __toCommonJS(WhisperTimeline_exports);
|
|
31
|
+
var import_SpacelessScripts = require("./SpacelessScripts.cjs");
|
|
31
32
|
const WHISPER_SAMPLE_RATE = 16e3;
|
|
32
33
|
function calculateWhisperSplits(durationSeconds, numProcessors, sampleRate = WHISPER_SAMPLE_RATE) {
|
|
33
34
|
if (numProcessors <= 1) return [];
|
|
@@ -42,26 +43,107 @@ function calculateWhisperSplits(durationSeconds, numProcessors, sampleRate = WHI
|
|
|
42
43
|
return splits;
|
|
43
44
|
}
|
|
44
45
|
const specialTokenPattern = /\[_.+\]|<\|[a-z_]+\|>/g;
|
|
46
|
+
const REPLACEMENT_CHAR = "\uFFFD";
|
|
47
|
+
function isSpecialToken(text) {
|
|
48
|
+
return text.startsWith("[_") || text.startsWith("<|");
|
|
49
|
+
}
|
|
50
|
+
function hasUtf8Corruption(text) {
|
|
51
|
+
return text.includes(REPLACEMENT_CHAR);
|
|
52
|
+
}
|
|
53
|
+
function buildAnchor(items, startIdx, options, maxItems = 5) {
|
|
54
|
+
let anchor = "";
|
|
55
|
+
let count = 0;
|
|
56
|
+
for (let j = startIdx; j < items.length && count < maxItems; j++) {
|
|
57
|
+
const item = items[j];
|
|
58
|
+
if (!item) break;
|
|
59
|
+
const text = options.getText(item);
|
|
60
|
+
if (options.shouldSkipInAnchor(item)) continue;
|
|
61
|
+
if (hasUtf8Corruption(text)) break;
|
|
62
|
+
anchor += text;
|
|
63
|
+
count++;
|
|
64
|
+
}
|
|
65
|
+
return anchor;
|
|
66
|
+
}
|
|
67
|
+
function forEachMergedUtf8Run(items, segmentText, options, emit) {
|
|
68
|
+
let segPos = 0;
|
|
69
|
+
let i = 0;
|
|
70
|
+
while (i < items.length) {
|
|
71
|
+
const item = items[i];
|
|
72
|
+
if (!item) break;
|
|
73
|
+
const text = options.getText(item);
|
|
74
|
+
const isSkippable = options.shouldSkipInAnchor(item);
|
|
75
|
+
if (isSkippable || !hasUtf8Corruption(text)) {
|
|
76
|
+
if (!isSkippable) {
|
|
77
|
+
segPos += text.length;
|
|
78
|
+
}
|
|
79
|
+
emit({
|
|
80
|
+
first: item,
|
|
81
|
+
last: item,
|
|
82
|
+
text,
|
|
83
|
+
probability: options.getProbability(item),
|
|
84
|
+
isMerged: false
|
|
85
|
+
});
|
|
86
|
+
i++;
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
const runStart = i;
|
|
90
|
+
let probability = 1;
|
|
91
|
+
while (i < items.length) {
|
|
92
|
+
const runItem = items[i];
|
|
93
|
+
if (!runItem) break;
|
|
94
|
+
const runText = options.getText(runItem);
|
|
95
|
+
const shouldStop = options.shouldSkipInAnchor(runItem) || !hasUtf8Corruption(runText);
|
|
96
|
+
if (shouldStop) break;
|
|
97
|
+
probability *= options.getProbability(runItem);
|
|
98
|
+
i++;
|
|
99
|
+
}
|
|
100
|
+
const first = items[runStart];
|
|
101
|
+
const last = items[i - 1];
|
|
102
|
+
if (!first || !last) continue;
|
|
103
|
+
const anchor = buildAnchor(items, i, options);
|
|
104
|
+
const anchorIdx = anchor.length > 0 ? segmentText.indexOf(anchor, segPos) : -1;
|
|
105
|
+
const runEndSegPos = anchorIdx >= 0 ? anchorIdx : segmentText.length;
|
|
106
|
+
const mergedText = segmentText.slice(segPos, runEndSegPos);
|
|
107
|
+
segPos = runEndSegPos;
|
|
108
|
+
emit({
|
|
109
|
+
first,
|
|
110
|
+
last,
|
|
111
|
+
text: mergedText,
|
|
112
|
+
probability,
|
|
113
|
+
isMerged: true
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
}
|
|
45
117
|
function parseWhisperCppOutput(transcription) {
|
|
46
118
|
return transcription.map((segment) => {
|
|
47
|
-
var _a, _b;
|
|
48
119
|
const words = [];
|
|
49
120
|
let lastTokenEndMs = 0;
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
121
|
+
forEachMergedUtf8Run(
|
|
122
|
+
segment.tokens,
|
|
123
|
+
segment.text,
|
|
124
|
+
{
|
|
125
|
+
getText: (token) => token.text,
|
|
126
|
+
getProbability: (token) => token.p,
|
|
127
|
+
shouldSkipInAnchor: (token) => isSpecialToken(token.text)
|
|
128
|
+
},
|
|
129
|
+
(run) => {
|
|
130
|
+
var _a, _b;
|
|
131
|
+
const cleanedText = run.text.replace(specialTokenPattern, "");
|
|
132
|
+
if (cleanedText.trim().length === 0) return;
|
|
133
|
+
const fallbackOffset = run.isMerged ? 0 : lastTokenEndMs;
|
|
134
|
+
const offsetFrom = ((_a = run.first.offsets) == null ? void 0 : _a.from) ?? fallbackOffset;
|
|
135
|
+
const offsetTo = ((_b = run.last.offsets) == null ? void 0 : _b.to) ?? fallbackOffset;
|
|
136
|
+
if (run.isMerged || run.last.offsets) {
|
|
137
|
+
lastTokenEndMs = offsetTo;
|
|
138
|
+
}
|
|
139
|
+
words.push({
|
|
140
|
+
text: cleanedText,
|
|
141
|
+
start: offsetFrom / 1e3,
|
|
142
|
+
end: offsetTo / 1e3,
|
|
143
|
+
confidence: run.probability
|
|
144
|
+
});
|
|
57
145
|
}
|
|
58
|
-
|
|
59
|
-
text: cleanedText,
|
|
60
|
-
start: offsetFrom / 1e3,
|
|
61
|
-
end: offsetTo / 1e3,
|
|
62
|
-
confidence: token.p
|
|
63
|
-
});
|
|
64
|
-
}
|
|
146
|
+
);
|
|
65
147
|
return {
|
|
66
148
|
text: segment.text,
|
|
67
149
|
segmentStart: segment.offsets.from / 1e3,
|
|
@@ -72,12 +154,25 @@ function parseWhisperCppOutput(transcription) {
|
|
|
72
154
|
}
|
|
73
155
|
function parseWhisperServerOutput(segments) {
|
|
74
156
|
return segments.map((segment) => {
|
|
75
|
-
const words =
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
157
|
+
const words = [];
|
|
158
|
+
forEachMergedUtf8Run(
|
|
159
|
+
segment.words ?? [],
|
|
160
|
+
segment.text,
|
|
161
|
+
{
|
|
162
|
+
getText: (word) => word.word,
|
|
163
|
+
getProbability: (word) => word.probability ?? 1,
|
|
164
|
+
shouldSkipInAnchor: () => false
|
|
165
|
+
},
|
|
166
|
+
(run) => {
|
|
167
|
+
const confidence = run.isMerged ? run.probability : run.first.probability ?? 0;
|
|
168
|
+
words.push({
|
|
169
|
+
text: run.text,
|
|
170
|
+
start: run.first.start,
|
|
171
|
+
end: run.last.end,
|
|
172
|
+
confidence
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
);
|
|
81
176
|
return {
|
|
82
177
|
text: segment.text,
|
|
83
178
|
segmentStart: segment.start,
|
|
@@ -256,7 +351,8 @@ function extractCorrectedTimeline(segments, options = {}) {
|
|
|
256
351
|
end: segmentEnd
|
|
257
352
|
});
|
|
258
353
|
const lastEntry = timeline[timeline.length - 1];
|
|
259
|
-
|
|
354
|
+
const isSubwordContinuation = !word.text.startsWith(" ") && !(0, import_SpacelessScripts.startsWithSpacelessScript)(trimmedText);
|
|
355
|
+
if (lastEntry && isSubwordContinuation) {
|
|
260
356
|
lastEntry.text += trimmedText;
|
|
261
357
|
if (lastEntry.confidence !== void 0) {
|
|
262
358
|
lastEntry.confidence = Math.min(lastEntry.confidence, word.confidence);
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { startsWithSpacelessScript } from "./SpacelessScripts.js";
|
|
1
2
|
const WHISPER_SAMPLE_RATE = 16e3;
|
|
2
3
|
function calculateWhisperSplits(durationSeconds, numProcessors, sampleRate = WHISPER_SAMPLE_RATE) {
|
|
3
4
|
if (numProcessors <= 1) return [];
|
|
@@ -12,26 +13,107 @@ function calculateWhisperSplits(durationSeconds, numProcessors, sampleRate = WHI
|
|
|
12
13
|
return splits;
|
|
13
14
|
}
|
|
14
15
|
const specialTokenPattern = /\[_.+\]|<\|[a-z_]+\|>/g;
|
|
16
|
+
const REPLACEMENT_CHAR = "\uFFFD";
|
|
17
|
+
function isSpecialToken(text) {
|
|
18
|
+
return text.startsWith("[_") || text.startsWith("<|");
|
|
19
|
+
}
|
|
20
|
+
function hasUtf8Corruption(text) {
|
|
21
|
+
return text.includes(REPLACEMENT_CHAR);
|
|
22
|
+
}
|
|
23
|
+
function buildAnchor(items, startIdx, options, maxItems = 5) {
|
|
24
|
+
let anchor = "";
|
|
25
|
+
let count = 0;
|
|
26
|
+
for (let j = startIdx; j < items.length && count < maxItems; j++) {
|
|
27
|
+
const item = items[j];
|
|
28
|
+
if (!item) break;
|
|
29
|
+
const text = options.getText(item);
|
|
30
|
+
if (options.shouldSkipInAnchor(item)) continue;
|
|
31
|
+
if (hasUtf8Corruption(text)) break;
|
|
32
|
+
anchor += text;
|
|
33
|
+
count++;
|
|
34
|
+
}
|
|
35
|
+
return anchor;
|
|
36
|
+
}
|
|
37
|
+
function forEachMergedUtf8Run(items, segmentText, options, emit) {
|
|
38
|
+
let segPos = 0;
|
|
39
|
+
let i = 0;
|
|
40
|
+
while (i < items.length) {
|
|
41
|
+
const item = items[i];
|
|
42
|
+
if (!item) break;
|
|
43
|
+
const text = options.getText(item);
|
|
44
|
+
const isSkippable = options.shouldSkipInAnchor(item);
|
|
45
|
+
if (isSkippable || !hasUtf8Corruption(text)) {
|
|
46
|
+
if (!isSkippable) {
|
|
47
|
+
segPos += text.length;
|
|
48
|
+
}
|
|
49
|
+
emit({
|
|
50
|
+
first: item,
|
|
51
|
+
last: item,
|
|
52
|
+
text,
|
|
53
|
+
probability: options.getProbability(item),
|
|
54
|
+
isMerged: false
|
|
55
|
+
});
|
|
56
|
+
i++;
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
const runStart = i;
|
|
60
|
+
let probability = 1;
|
|
61
|
+
while (i < items.length) {
|
|
62
|
+
const runItem = items[i];
|
|
63
|
+
if (!runItem) break;
|
|
64
|
+
const runText = options.getText(runItem);
|
|
65
|
+
const shouldStop = options.shouldSkipInAnchor(runItem) || !hasUtf8Corruption(runText);
|
|
66
|
+
if (shouldStop) break;
|
|
67
|
+
probability *= options.getProbability(runItem);
|
|
68
|
+
i++;
|
|
69
|
+
}
|
|
70
|
+
const first = items[runStart];
|
|
71
|
+
const last = items[i - 1];
|
|
72
|
+
if (!first || !last) continue;
|
|
73
|
+
const anchor = buildAnchor(items, i, options);
|
|
74
|
+
const anchorIdx = anchor.length > 0 ? segmentText.indexOf(anchor, segPos) : -1;
|
|
75
|
+
const runEndSegPos = anchorIdx >= 0 ? anchorIdx : segmentText.length;
|
|
76
|
+
const mergedText = segmentText.slice(segPos, runEndSegPos);
|
|
77
|
+
segPos = runEndSegPos;
|
|
78
|
+
emit({
|
|
79
|
+
first,
|
|
80
|
+
last,
|
|
81
|
+
text: mergedText,
|
|
82
|
+
probability,
|
|
83
|
+
isMerged: true
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
}
|
|
15
87
|
function parseWhisperCppOutput(transcription) {
|
|
16
88
|
return transcription.map((segment) => {
|
|
17
|
-
var _a, _b;
|
|
18
89
|
const words = [];
|
|
19
90
|
let lastTokenEndMs = 0;
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
91
|
+
forEachMergedUtf8Run(
|
|
92
|
+
segment.tokens,
|
|
93
|
+
segment.text,
|
|
94
|
+
{
|
|
95
|
+
getText: (token) => token.text,
|
|
96
|
+
getProbability: (token) => token.p,
|
|
97
|
+
shouldSkipInAnchor: (token) => isSpecialToken(token.text)
|
|
98
|
+
},
|
|
99
|
+
(run) => {
|
|
100
|
+
var _a, _b;
|
|
101
|
+
const cleanedText = run.text.replace(specialTokenPattern, "");
|
|
102
|
+
if (cleanedText.trim().length === 0) return;
|
|
103
|
+
const fallbackOffset = run.isMerged ? 0 : lastTokenEndMs;
|
|
104
|
+
const offsetFrom = ((_a = run.first.offsets) == null ? void 0 : _a.from) ?? fallbackOffset;
|
|
105
|
+
const offsetTo = ((_b = run.last.offsets) == null ? void 0 : _b.to) ?? fallbackOffset;
|
|
106
|
+
if (run.isMerged || run.last.offsets) {
|
|
107
|
+
lastTokenEndMs = offsetTo;
|
|
108
|
+
}
|
|
109
|
+
words.push({
|
|
110
|
+
text: cleanedText,
|
|
111
|
+
start: offsetFrom / 1e3,
|
|
112
|
+
end: offsetTo / 1e3,
|
|
113
|
+
confidence: run.probability
|
|
114
|
+
});
|
|
27
115
|
}
|
|
28
|
-
|
|
29
|
-
text: cleanedText,
|
|
30
|
-
start: offsetFrom / 1e3,
|
|
31
|
-
end: offsetTo / 1e3,
|
|
32
|
-
confidence: token.p
|
|
33
|
-
});
|
|
34
|
-
}
|
|
116
|
+
);
|
|
35
117
|
return {
|
|
36
118
|
text: segment.text,
|
|
37
119
|
segmentStart: segment.offsets.from / 1e3,
|
|
@@ -42,12 +124,25 @@ function parseWhisperCppOutput(transcription) {
|
|
|
42
124
|
}
|
|
43
125
|
function parseWhisperServerOutput(segments) {
|
|
44
126
|
return segments.map((segment) => {
|
|
45
|
-
const words =
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
127
|
+
const words = [];
|
|
128
|
+
forEachMergedUtf8Run(
|
|
129
|
+
segment.words ?? [],
|
|
130
|
+
segment.text,
|
|
131
|
+
{
|
|
132
|
+
getText: (word) => word.word,
|
|
133
|
+
getProbability: (word) => word.probability ?? 1,
|
|
134
|
+
shouldSkipInAnchor: () => false
|
|
135
|
+
},
|
|
136
|
+
(run) => {
|
|
137
|
+
const confidence = run.isMerged ? run.probability : run.first.probability ?? 0;
|
|
138
|
+
words.push({
|
|
139
|
+
text: run.text,
|
|
140
|
+
start: run.first.start,
|
|
141
|
+
end: run.last.end,
|
|
142
|
+
confidence
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
);
|
|
51
146
|
return {
|
|
52
147
|
text: segment.text,
|
|
53
148
|
segmentStart: segment.start,
|
|
@@ -226,7 +321,8 @@ function extractCorrectedTimeline(segments, options = {}) {
|
|
|
226
321
|
end: segmentEnd
|
|
227
322
|
});
|
|
228
323
|
const lastEntry = timeline[timeline.length - 1];
|
|
229
|
-
|
|
324
|
+
const isSubwordContinuation = !word.text.startsWith(" ") && !startsWithSpacelessScript(trimmedText);
|
|
325
|
+
if (lastEntry && isSubwordContinuation) {
|
|
230
326
|
lastEntry.text += trimmedText;
|
|
231
327
|
if (lastEntry.confidence !== void 0) {
|
|
232
328
|
lastEntry.confidence = Math.min(lastEntry.confidence, word.confidence);
|
package/package.json
CHANGED