vidistill 0.6.4 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +103 -7
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -2087,13 +2087,74 @@ async function runLinkConsensus(params) {
|
|
|
2087
2087
|
// src/core/transcript-consensus.ts
|
|
2088
2088
|
var ALIGN_WINDOW_S = 3;
|
|
2089
2089
|
var DEDUP_WINDOW_S = 10;
|
|
2090
|
+
var BOUNDARY_WINDOW_S = 30;
|
|
2091
|
+
var MIN_OVERLAP_WORDS = 5;
|
|
2092
|
+
function extractWordsWithPositions(text4) {
|
|
2093
|
+
const regex = /[\p{L}\p{N}_]+/gu;
|
|
2094
|
+
const words = [];
|
|
2095
|
+
let match;
|
|
2096
|
+
while ((match = regex.exec(text4)) !== null) {
|
|
2097
|
+
words.push({ lower: match[0].toLowerCase(), endPos: match.index + match[0].length });
|
|
2098
|
+
}
|
|
2099
|
+
return words;
|
|
2100
|
+
}
|
|
2101
|
+
function findSuffixPrefixOverlap(prevWords, currWords) {
|
|
2102
|
+
const maxOverlap = Math.min(prevWords.length, currWords.length);
|
|
2103
|
+
for (let len = maxOverlap; len >= MIN_OVERLAP_WORDS; len--) {
|
|
2104
|
+
const offset = prevWords.length - len;
|
|
2105
|
+
let match = true;
|
|
2106
|
+
for (let k = 0; k < len; k++) {
|
|
2107
|
+
if (prevWords[offset + k] !== currWords[k]) {
|
|
2108
|
+
match = false;
|
|
2109
|
+
break;
|
|
2110
|
+
}
|
|
2111
|
+
}
|
|
2112
|
+
if (match) return len;
|
|
2113
|
+
}
|
|
2114
|
+
return 0;
|
|
2115
|
+
}
|
|
2116
|
+
function trimBoundaryOverlap(entries) {
|
|
2117
|
+
if (entries.length <= 1) return entries;
|
|
2118
|
+
const sorted = [...entries].sort(
|
|
2119
|
+
(a, b) => parseTimestamp(a.timestamp) - parseTimestamp(b.timestamp)
|
|
2120
|
+
);
|
|
2121
|
+
const result = [sorted[0]];
|
|
2122
|
+
for (let i = 1; i < sorted.length; i++) {
|
|
2123
|
+
const prev = result[result.length - 1];
|
|
2124
|
+
const curr = sorted[i];
|
|
2125
|
+
const delta = parseTimestamp(curr.timestamp) - parseTimestamp(prev.timestamp);
|
|
2126
|
+
if (delta > BOUNDARY_WINDOW_S || delta < 0) {
|
|
2127
|
+
result.push(curr);
|
|
2128
|
+
continue;
|
|
2129
|
+
}
|
|
2130
|
+
const prevWords = extractWordsWithPositions(prev.text);
|
|
2131
|
+
const currWords = extractWordsWithPositions(curr.text);
|
|
2132
|
+
const overlapLen = findSuffixPrefixOverlap(
|
|
2133
|
+
prevWords.map((w) => w.lower),
|
|
2134
|
+
currWords.map((w) => w.lower)
|
|
2135
|
+
);
|
|
2136
|
+
if (overlapLen < MIN_OVERLAP_WORDS) {
|
|
2137
|
+
result.push(curr);
|
|
2138
|
+
continue;
|
|
2139
|
+
}
|
|
2140
|
+
const cutPos = currWords[overlapLen - 1].endPos;
|
|
2141
|
+
const remaining = curr.text.slice(cutPos).replace(/^[\s,;.!?:—–\-]+/, "").trim();
|
|
2142
|
+
if (remaining.length === 0) {
|
|
2143
|
+
continue;
|
|
2144
|
+
}
|
|
2145
|
+
result.push({ ...curr, text: remaining });
|
|
2146
|
+
}
|
|
2147
|
+
return result;
|
|
2148
|
+
}
|
|
2090
2149
|
function isNearDuplicate(a, b) {
|
|
2091
2150
|
const delta = Math.abs(parseTimestamp(a.timestamp) - parseTimestamp(b.timestamp));
|
|
2092
2151
|
if (delta > DEDUP_WINDOW_S) return false;
|
|
2093
2152
|
if (a.text === b.text) return true;
|
|
2094
|
-
const
|
|
2095
|
-
const
|
|
2096
|
-
const
|
|
2153
|
+
const aLower = a.text.toLowerCase();
|
|
2154
|
+
const bLower = b.text.toLowerCase();
|
|
2155
|
+
const shared = tokenOverlap(aLower, bLower);
|
|
2156
|
+
const aTokens = new Set(aLower.match(/[\p{L}\p{N}_]+/gu) ?? []).size;
|
|
2157
|
+
const bTokens = new Set(bLower.match(/[\p{L}\p{N}_]+/gu) ?? []).size;
|
|
2097
2158
|
const maxTokens = Math.max(aTokens, bTokens);
|
|
2098
2159
|
const minTokens = Math.min(aTokens, bTokens);
|
|
2099
2160
|
if (maxTokens > 0 && shared / maxTokens >= 0.8) return true;
|
|
@@ -2139,7 +2200,7 @@ function mergeTranscriptRuns(runs) {
|
|
|
2139
2200
|
if (runs.length === 1) {
|
|
2140
2201
|
return {
|
|
2141
2202
|
...runs[0],
|
|
2142
|
-
transcript_entries: deduplicateEntries(runs[0].transcript_entries)
|
|
2203
|
+
transcript_entries: trimBoundaryOverlap(deduplicateEntries(runs[0].transcript_entries))
|
|
2143
2204
|
};
|
|
2144
2205
|
}
|
|
2145
2206
|
const referenceRun = runs.reduce(
|
|
@@ -2188,7 +2249,7 @@ function mergeTranscriptRuns(runs) {
|
|
|
2188
2249
|
return {
|
|
2189
2250
|
segment_index: referenceRun.segment_index,
|
|
2190
2251
|
time_range: referenceRun.time_range,
|
|
2191
|
-
transcript_entries: deduplicateEntries(mergedEntries)
|
|
2252
|
+
transcript_entries: trimBoundaryOverlap(deduplicateEntries(mergedEntries))
|
|
2192
2253
|
};
|
|
2193
2254
|
}
|
|
2194
2255
|
function mergeSpeakerSummaries(summaries) {
|
|
@@ -2288,7 +2349,8 @@ async function runTranscriptionConsensus(params) {
|
|
|
2288
2349
|
return { result: null, runsCompleted: 0, runsAttempted: runs };
|
|
2289
2350
|
}
|
|
2290
2351
|
if (runs === 1 && runsCompleted === 1) {
|
|
2291
|
-
|
|
2352
|
+
const result = mergeTranscriptRuns(successfulRuns);
|
|
2353
|
+
return { result, runsCompleted: 1, runsAttempted: 1 };
|
|
2292
2354
|
}
|
|
2293
2355
|
const merged = mergeTranscriptRuns(successfulRuns);
|
|
2294
2356
|
return { result: merged, runsCompleted, runsAttempted: runs };
|
|
@@ -3699,6 +3761,18 @@ function writeTranscript(params) {
|
|
|
3699
3761
|
curr.transcript_entries = curr.transcript_entries.filter(
|
|
3700
3762
|
(entry) => !tail.some((prevEntry) => isNearDuplicate(entry, prevEntry))
|
|
3701
3763
|
);
|
|
3764
|
+
if (curr.transcript_entries.length > 0 && prev.transcript_entries.length > 0) {
|
|
3765
|
+
const boundaryRegion = [
|
|
3766
|
+
prev.transcript_entries[prev.transcript_entries.length - 1],
|
|
3767
|
+
curr.transcript_entries[0]
|
|
3768
|
+
];
|
|
3769
|
+
const trimmed = trimBoundaryOverlap(boundaryRegion);
|
|
3770
|
+
if (trimmed.length < 2) {
|
|
3771
|
+
curr.transcript_entries.shift();
|
|
3772
|
+
} else if (trimmed[1].text !== curr.transcript_entries[0].text) {
|
|
3773
|
+
curr.transcript_entries[0] = { ...curr.transcript_entries[0], text: trimmed[1].text };
|
|
3774
|
+
}
|
|
3775
|
+
}
|
|
3702
3776
|
}
|
|
3703
3777
|
for (const seg of segmentsWithPass1) {
|
|
3704
3778
|
if (seg.pass1 != null) {
|
|
@@ -3760,6 +3834,28 @@ function writeCombined(params) {
|
|
|
3760
3834
|
const { pipelineResult, speakerMapping } = params;
|
|
3761
3835
|
const { segments } = pipelineResult;
|
|
3762
3836
|
const sections = ["# Combined View", "", "_Chronological interleaving of speech, code, and visuals._", ""];
|
|
3837
|
+
const segmentsWithPass1 = segments.filter((s) => s.pass1 != null);
|
|
3838
|
+
for (let i = 1; i < segmentsWithPass1.length; i++) {
|
|
3839
|
+
const prev = segmentsWithPass1[i - 1].pass1;
|
|
3840
|
+
const curr = segmentsWithPass1[i].pass1;
|
|
3841
|
+
if (prev == null || curr == null) continue;
|
|
3842
|
+
const tail = prev.transcript_entries.slice(-5);
|
|
3843
|
+
curr.transcript_entries = curr.transcript_entries.filter(
|
|
3844
|
+
(entry) => !tail.some((prevEntry) => isNearDuplicate(entry, prevEntry))
|
|
3845
|
+
);
|
|
3846
|
+
if (curr.transcript_entries.length > 0 && prev.transcript_entries.length > 0) {
|
|
3847
|
+
const boundaryRegion = [
|
|
3848
|
+
prev.transcript_entries[prev.transcript_entries.length - 1],
|
|
3849
|
+
curr.transcript_entries[0]
|
|
3850
|
+
];
|
|
3851
|
+
const trimmed = trimBoundaryOverlap(boundaryRegion);
|
|
3852
|
+
if (trimmed.length < 2) {
|
|
3853
|
+
curr.transcript_entries.shift();
|
|
3854
|
+
} else if (trimmed[1].text !== curr.transcript_entries[0].text) {
|
|
3855
|
+
curr.transcript_entries[0] = { ...curr.transcript_entries[0], text: trimmed[1].text };
|
|
3856
|
+
}
|
|
3857
|
+
}
|
|
3858
|
+
}
|
|
3763
3859
|
for (const seg of segments) {
|
|
3764
3860
|
const { pass1, pass2 } = seg;
|
|
3765
3861
|
const timeRange = pass1?.time_range ?? pass2?.time_range ?? `Segment ${seg.index + 1}`;
|
|
@@ -5621,7 +5717,7 @@ async function run2(args) {
|
|
|
5621
5717
|
}
|
|
5622
5718
|
|
|
5623
5719
|
// src/cli/index.ts
|
|
5624
|
-
var version = "0.
|
|
5720
|
+
var version = "0.7.0";
|
|
5625
5721
|
var DEFAULT_OUTPUT2 = "./vidistill-output/";
|
|
5626
5722
|
var SUBCOMMANDS = {
|
|
5627
5723
|
mcp: run,
|
package/package.json
CHANGED