vidistill 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +82 -29
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1597,6 +1597,10 @@ function applySpeakerMapping(label, mapping) {
|
|
|
1597
1597
|
for (const [key, value] of Object.entries(mapping)) {
|
|
1598
1598
|
if (key.toLowerCase() === lower) return value;
|
|
1599
1599
|
}
|
|
1600
|
+
for (const [key, value] of Object.entries(mapping)) {
|
|
1601
|
+
const keyParen = key.match(/\(([^)]+)\)/);
|
|
1602
|
+
if (keyParen && keyParen[1].trim().toLowerCase() === lower) return value;
|
|
1603
|
+
}
|
|
1600
1604
|
return label;
|
|
1601
1605
|
}
|
|
1602
1606
|
function replaceNamesInText(text4, mapping) {
|
|
@@ -1604,14 +1608,22 @@ function replaceNamesInText(text4, mapping) {
|
|
|
1604
1608
|
const entries = Object.entries(mapping).filter(([key, value]) => key !== value && !/^SPEAKER_\d+$/.test(key)).sort((a, b) => b[0].length - a[0].length);
|
|
1605
1609
|
if (entries.length === 0) return text4;
|
|
1606
1610
|
let result = text4;
|
|
1611
|
+
const placeholders = /* @__PURE__ */ new Map();
|
|
1612
|
+
let idx = 0;
|
|
1607
1613
|
for (const [key, value] of entries) {
|
|
1614
|
+
const placeholder = `\0PH${idx}\0`;
|
|
1615
|
+
placeholders.set(placeholder, value);
|
|
1608
1616
|
const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1609
1617
|
const re = new RegExp(`\\b${escaped}\\b`, "g");
|
|
1610
|
-
result = result.replace(re,
|
|
1618
|
+
result = result.replace(re, placeholder);
|
|
1619
|
+
idx++;
|
|
1620
|
+
}
|
|
1621
|
+
for (const [placeholder, value] of placeholders) {
|
|
1622
|
+
result = result.replaceAll(placeholder, value);
|
|
1611
1623
|
}
|
|
1612
1624
|
return result;
|
|
1613
1625
|
}
|
|
1614
|
-
function buildExpandedMapping(segments, speakerMapping) {
|
|
1626
|
+
function buildExpandedMapping(segments, speakerMapping, peopleExtraction) {
|
|
1615
1627
|
const expanded = { ...speakerMapping };
|
|
1616
1628
|
for (const seg of segments) {
|
|
1617
1629
|
if (seg.pass1 == null) continue;
|
|
@@ -1640,6 +1652,19 @@ function buildExpandedMapping(segments, speakerMapping) {
|
|
|
1640
1652
|
}
|
|
1641
1653
|
}
|
|
1642
1654
|
}
|
|
1655
|
+
if (peopleExtraction?.participants != null) {
|
|
1656
|
+
for (const p of peopleExtraction.participants) {
|
|
1657
|
+
if (!p.name || expanded[p.name] != null) continue;
|
|
1658
|
+
for (const [key, value] of Object.entries(expanded)) {
|
|
1659
|
+
if (/^SPEAKER_\d+$/.test(key)) continue;
|
|
1660
|
+
if (key === value) continue;
|
|
1661
|
+
if (p.name !== key && p.name.includes(key)) {
|
|
1662
|
+
expanded[p.name] = value;
|
|
1663
|
+
break;
|
|
1664
|
+
}
|
|
1665
|
+
}
|
|
1666
|
+
}
|
|
1667
|
+
}
|
|
1643
1668
|
return expanded;
|
|
1644
1669
|
}
|
|
1645
1670
|
async function readJsonFile(filePath) {
|
|
@@ -2009,6 +2034,36 @@ async function runLinkConsensus(params) {
|
|
|
2009
2034
|
|
|
2010
2035
|
// src/core/transcript-consensus.ts
|
|
2011
2036
|
var ALIGN_WINDOW_S = 3;
|
|
2037
|
+
var DEDUP_WINDOW_S = 10;
|
|
2038
|
+
function isNearDuplicate(a, b) {
|
|
2039
|
+
const delta = Math.abs(parseTimestamp(a.timestamp) - parseTimestamp(b.timestamp));
|
|
2040
|
+
if (delta > DEDUP_WINDOW_S) return false;
|
|
2041
|
+
if (a.text === b.text) return true;
|
|
2042
|
+
const shared = tokenOverlap(a.text, b.text);
|
|
2043
|
+
const maxTokens = Math.max(a.text.split(/\s+/).length, b.text.split(/\s+/).length);
|
|
2044
|
+
return maxTokens > 0 && shared / maxTokens >= 0.8;
|
|
2045
|
+
}
|
|
2046
|
+
function deduplicateEntries(entries) {
|
|
2047
|
+
if (entries.length <= 1) return entries;
|
|
2048
|
+
const result = [entries[0]];
|
|
2049
|
+
for (let i = 1; i < entries.length; i++) {
|
|
2050
|
+
const curr = entries[i];
|
|
2051
|
+
let isDup = false;
|
|
2052
|
+
for (let j = result.length - 1; j >= Math.max(0, result.length - 3); j--) {
|
|
2053
|
+
if (isNearDuplicate(curr, result[j])) {
|
|
2054
|
+
if (curr.text.length > result[j].text.length) {
|
|
2055
|
+
result[j] = curr;
|
|
2056
|
+
}
|
|
2057
|
+
isDup = true;
|
|
2058
|
+
break;
|
|
2059
|
+
}
|
|
2060
|
+
}
|
|
2061
|
+
if (!isDup) {
|
|
2062
|
+
result.push(curr);
|
|
2063
|
+
}
|
|
2064
|
+
}
|
|
2065
|
+
return result;
|
|
2066
|
+
}
|
|
2012
2067
|
function selectBestText(texts) {
|
|
2013
2068
|
if (texts.length === 1) return texts[0];
|
|
2014
2069
|
const referenceText = texts.join(" ");
|
|
@@ -2024,7 +2079,12 @@ function selectBestText(texts) {
|
|
|
2024
2079
|
return bestText;
|
|
2025
2080
|
}
|
|
2026
2081
|
function mergeTranscriptRuns(runs) {
|
|
2027
|
-
if (runs.length === 1)
|
|
2082
|
+
if (runs.length === 1) {
|
|
2083
|
+
return {
|
|
2084
|
+
...runs[0],
|
|
2085
|
+
transcript_entries: deduplicateEntries(runs[0].transcript_entries)
|
|
2086
|
+
};
|
|
2087
|
+
}
|
|
2028
2088
|
const referenceRun = runs.reduce(
|
|
2029
2089
|
(best, run3) => run3.transcript_entries.length > best.transcript_entries.length ? run3 : best
|
|
2030
2090
|
);
|
|
@@ -2071,7 +2131,7 @@ function mergeTranscriptRuns(runs) {
|
|
|
2071
2131
|
return {
|
|
2072
2132
|
segment_index: referenceRun.segment_index,
|
|
2073
2133
|
time_range: referenceRun.time_range,
|
|
2074
|
-
transcript_entries: mergedEntries
|
|
2134
|
+
transcript_entries: deduplicateEntries(mergedEntries)
|
|
2075
2135
|
};
|
|
2076
2136
|
}
|
|
2077
2137
|
function mergeSpeakerSummaries(summaries) {
|
|
@@ -3482,37 +3542,30 @@ function writeGuide(params) {
|
|
|
3482
3542
|
}
|
|
3483
3543
|
|
|
3484
3544
|
// src/output/transcript.ts
|
|
3485
|
-
var PAUSE_THRESHOLD_SECONDS = 1.5;
|
|
3486
|
-
function applyEmphasis(text4, emphasisWords) {
|
|
3487
|
-
if (emphasisWords == null || emphasisWords.length === 0) return text4;
|
|
3488
|
-
let result = text4;
|
|
3489
|
-
for (const word of emphasisWords) {
|
|
3490
|
-
const escaped = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
3491
|
-
const re = new RegExp(`(?<![\\w*])${escaped}(?![\\w*])`, "gi");
|
|
3492
|
-
result = result.replace(re, `**$&**`);
|
|
3493
|
-
}
|
|
3494
|
-
return result;
|
|
3495
|
-
}
|
|
3496
3545
|
function renderEntry(entry, speakerMapping) {
|
|
3497
|
-
const emphasized = applyEmphasis(entry.text, entry.emphasis_words);
|
|
3498
|
-
const pause = entry.pause_after_seconds != null && entry.pause_after_seconds >= PAUSE_THRESHOLD_SECONDS ? ` _(pause ${entry.pause_after_seconds.toFixed(1)}s)_` : "";
|
|
3499
3546
|
const speaker = applySpeakerMapping(entry.speaker, speakerMapping);
|
|
3500
|
-
return `**[${entry.timestamp}]** **${speaker}:** ${
|
|
3547
|
+
return `**[${entry.timestamp}]** **${speaker}:** ${entry.text}`;
|
|
3548
|
+
}
|
|
3549
|
+
function parseEndTime(timeRange) {
|
|
3550
|
+
const parts = timeRange.split("-");
|
|
3551
|
+
if (parts.length < 2) return Infinity;
|
|
3552
|
+
const endStr = parts[parts.length - 1].trim();
|
|
3553
|
+
const seconds = parseTimestamp(endStr);
|
|
3554
|
+
return seconds > 0 ? seconds : Infinity;
|
|
3501
3555
|
}
|
|
3502
3556
|
function renderPass1(pass1, speakerMapping) {
|
|
3503
3557
|
const lines = [];
|
|
3504
3558
|
lines.push(`### Segment ${pass1.segment_index + 1} \u2014 ${pass1.time_range}`);
|
|
3505
3559
|
lines.push("");
|
|
3506
|
-
|
|
3507
|
-
|
|
3508
|
-
|
|
3509
|
-
|
|
3510
|
-
|
|
3511
|
-
|
|
3512
|
-
if (pass1.transcript_entries.length === 0) {
|
|
3560
|
+
const endTime = parseEndTime(pass1.time_range);
|
|
3561
|
+
const validEntries = pass1.transcript_entries.filter((entry) => {
|
|
3562
|
+
const entryTime = parseTimestamp(entry.timestamp);
|
|
3563
|
+
return entryTime <= endTime;
|
|
3564
|
+
});
|
|
3565
|
+
if (validEntries.length === 0) {
|
|
3513
3566
|
lines.push("_No transcript entries for this segment._");
|
|
3514
3567
|
} else {
|
|
3515
|
-
for (const entry of
|
|
3568
|
+
for (const entry of validEntries) {
|
|
3516
3569
|
lines.push(renderEntry(entry, speakerMapping));
|
|
3517
3570
|
}
|
|
3518
3571
|
}
|
|
@@ -4681,7 +4734,7 @@ async function generateOutput(params) {
|
|
|
4681
4734
|
const filesGenerated = [];
|
|
4682
4735
|
const errors = [];
|
|
4683
4736
|
const filesToGenerate = resolveFilesToGenerate(params);
|
|
4684
|
-
const expandedMapping = speakerMapping ? buildExpandedMapping(pipelineResult.segments, speakerMapping) : void 0;
|
|
4737
|
+
const expandedMapping = speakerMapping ? buildExpandedMapping(pipelineResult.segments, speakerMapping, pipelineResult.peopleExtraction) : void 0;
|
|
4685
4738
|
async function writeOutputFile(filename, content) {
|
|
4686
4739
|
const fullPath = join3(finalOutputDir, filename);
|
|
4687
4740
|
const dir = dirname(fullPath);
|
|
@@ -4875,7 +4928,7 @@ async function reRenderWithSpeakerMapping(params) {
|
|
|
4875
4928
|
await writeFile(fullPath, content, "utf8");
|
|
4876
4929
|
filesWritten.push(filename);
|
|
4877
4930
|
}
|
|
4878
|
-
const expandedMapping = buildExpandedMapping(pipelineResult.segments, speakerMapping);
|
|
4931
|
+
const expandedMapping = buildExpandedMapping(pipelineResult.segments, speakerMapping, pipelineResult.peopleExtraction);
|
|
4879
4932
|
const filesToReRender = new Set(filesGenerated.filter((f) => !f.startsWith("raw/")));
|
|
4880
4933
|
if (filesToReRender.has("transcript.md")) {
|
|
4881
4934
|
try {
|
|
@@ -5638,7 +5691,7 @@ async function run2(args) {
|
|
|
5638
5691
|
}
|
|
5639
5692
|
|
|
5640
5693
|
// src/cli/index.ts
|
|
5641
|
-
var version = "0.5.
|
|
5694
|
+
var version = "0.5.4";
|
|
5642
5695
|
var DEFAULT_OUTPUT = "./vidistill-output/";
|
|
5643
5696
|
var SUBCOMMANDS = {
|
|
5644
5697
|
mcp: run,
|
package/package.json
CHANGED