vidistill 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +82 -29
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -1597,6 +1597,10 @@ function applySpeakerMapping(label, mapping) {
1597
1597
  for (const [key, value] of Object.entries(mapping)) {
1598
1598
  if (key.toLowerCase() === lower) return value;
1599
1599
  }
1600
+ for (const [key, value] of Object.entries(mapping)) {
1601
+ const keyParen = key.match(/\(([^)]+)\)/);
1602
+ if (keyParen && keyParen[1].trim().toLowerCase() === lower) return value;
1603
+ }
1600
1604
  return label;
1601
1605
  }
1602
1606
  function replaceNamesInText(text4, mapping) {
@@ -1604,14 +1608,22 @@ function replaceNamesInText(text4, mapping) {
1604
1608
  const entries = Object.entries(mapping).filter(([key, value]) => key !== value && !/^SPEAKER_\d+$/.test(key)).sort((a, b) => b[0].length - a[0].length);
1605
1609
  if (entries.length === 0) return text4;
1606
1610
  let result = text4;
1611
+ const placeholders = /* @__PURE__ */ new Map();
1612
+ let idx = 0;
1607
1613
  for (const [key, value] of entries) {
1614
+ const placeholder = `\0PH${idx}\0`;
1615
+ placeholders.set(placeholder, value);
1608
1616
  const escaped = key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1609
1617
  const re = new RegExp(`\\b${escaped}\\b`, "g");
1610
- result = result.replace(re, value);
1618
+ result = result.replace(re, placeholder);
1619
+ idx++;
1620
+ }
1621
+ for (const [placeholder, value] of placeholders) {
1622
+ result = result.replaceAll(placeholder, value);
1611
1623
  }
1612
1624
  return result;
1613
1625
  }
1614
- function buildExpandedMapping(segments, speakerMapping) {
1626
+ function buildExpandedMapping(segments, speakerMapping, peopleExtraction) {
1615
1627
  const expanded = { ...speakerMapping };
1616
1628
  for (const seg of segments) {
1617
1629
  if (seg.pass1 == null) continue;
@@ -1640,6 +1652,19 @@ function buildExpandedMapping(segments, speakerMapping) {
1640
1652
  }
1641
1653
  }
1642
1654
  }
1655
+ if (peopleExtraction?.participants != null) {
1656
+ for (const p of peopleExtraction.participants) {
1657
+ if (!p.name || expanded[p.name] != null) continue;
1658
+ for (const [key, value] of Object.entries(expanded)) {
1659
+ if (/^SPEAKER_\d+$/.test(key)) continue;
1660
+ if (key === value) continue;
1661
+ if (p.name !== key && p.name.includes(key)) {
1662
+ expanded[p.name] = value;
1663
+ break;
1664
+ }
1665
+ }
1666
+ }
1667
+ }
1643
1668
  return expanded;
1644
1669
  }
1645
1670
  async function readJsonFile(filePath) {
@@ -2009,6 +2034,36 @@ async function runLinkConsensus(params) {
2009
2034
 
2010
2035
  // src/core/transcript-consensus.ts
2011
2036
  var ALIGN_WINDOW_S = 3;
2037
+ var DEDUP_WINDOW_S = 10;
2038
+ function isNearDuplicate(a, b) {
2039
+ const delta = Math.abs(parseTimestamp(a.timestamp) - parseTimestamp(b.timestamp));
2040
+ if (delta > DEDUP_WINDOW_S) return false;
2041
+ if (a.text === b.text) return true;
2042
+ const shared = tokenOverlap(a.text, b.text);
2043
+ const maxTokens = Math.max(a.text.split(/\s+/).length, b.text.split(/\s+/).length);
2044
+ return maxTokens > 0 && shared / maxTokens >= 0.8;
2045
+ }
2046
+ function deduplicateEntries(entries) {
2047
+ if (entries.length <= 1) return entries;
2048
+ const result = [entries[0]];
2049
+ for (let i = 1; i < entries.length; i++) {
2050
+ const curr = entries[i];
2051
+ let isDup = false;
2052
+ for (let j = result.length - 1; j >= Math.max(0, result.length - 3); j--) {
2053
+ if (isNearDuplicate(curr, result[j])) {
2054
+ if (curr.text.length > result[j].text.length) {
2055
+ result[j] = curr;
2056
+ }
2057
+ isDup = true;
2058
+ break;
2059
+ }
2060
+ }
2061
+ if (!isDup) {
2062
+ result.push(curr);
2063
+ }
2064
+ }
2065
+ return result;
2066
+ }
2012
2067
  function selectBestText(texts) {
2013
2068
  if (texts.length === 1) return texts[0];
2014
2069
  const referenceText = texts.join(" ");
@@ -2024,7 +2079,12 @@ function selectBestText(texts) {
2024
2079
  return bestText;
2025
2080
  }
2026
2081
  function mergeTranscriptRuns(runs) {
2027
- if (runs.length === 1) return runs[0];
2082
+ if (runs.length === 1) {
2083
+ return {
2084
+ ...runs[0],
2085
+ transcript_entries: deduplicateEntries(runs[0].transcript_entries)
2086
+ };
2087
+ }
2028
2088
  const referenceRun = runs.reduce(
2029
2089
  (best, run3) => run3.transcript_entries.length > best.transcript_entries.length ? run3 : best
2030
2090
  );
@@ -2071,7 +2131,7 @@ function mergeTranscriptRuns(runs) {
2071
2131
  return {
2072
2132
  segment_index: referenceRun.segment_index,
2073
2133
  time_range: referenceRun.time_range,
2074
- transcript_entries: mergedEntries
2134
+ transcript_entries: deduplicateEntries(mergedEntries)
2075
2135
  };
2076
2136
  }
2077
2137
  function mergeSpeakerSummaries(summaries) {
@@ -3482,37 +3542,30 @@ function writeGuide(params) {
3482
3542
  }
3483
3543
 
3484
3544
  // src/output/transcript.ts
3485
- var PAUSE_THRESHOLD_SECONDS = 1.5;
3486
- function applyEmphasis(text4, emphasisWords) {
3487
- if (emphasisWords == null || emphasisWords.length === 0) return text4;
3488
- let result = text4;
3489
- for (const word of emphasisWords) {
3490
- const escaped = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
3491
- const re = new RegExp(`(?<![\\w*])${escaped}(?![\\w*])`, "gi");
3492
- result = result.replace(re, `**$&**`);
3493
- }
3494
- return result;
3495
- }
3496
3545
  function renderEntry(entry, speakerMapping) {
3497
- const emphasized = applyEmphasis(entry.text, entry.emphasis_words);
3498
- const pause = entry.pause_after_seconds != null && entry.pause_after_seconds >= PAUSE_THRESHOLD_SECONDS ? ` _(pause ${entry.pause_after_seconds.toFixed(1)}s)_` : "";
3499
3546
  const speaker = applySpeakerMapping(entry.speaker, speakerMapping);
3500
- return `**[${entry.timestamp}]** **${speaker}:** ${emphasized}${pause}`;
3547
+ return `**[${entry.timestamp}]** **${speaker}:** ${entry.text}`;
3548
+ }
3549
+ function parseEndTime(timeRange) {
3550
+ const parts = timeRange.split("-");
3551
+ if (parts.length < 2) return Infinity;
3552
+ const endStr = parts[parts.length - 1].trim();
3553
+ const seconds = parseTimestamp(endStr);
3554
+ return seconds > 0 ? seconds : Infinity;
3501
3555
  }
3502
3556
  function renderPass1(pass1, speakerMapping) {
3503
3557
  const lines = [];
3504
3558
  lines.push(`### Segment ${pass1.segment_index + 1} \u2014 ${pass1.time_range}`);
3505
3559
  lines.push("");
3506
- if (pass1.speaker_summary.length > 0) {
3507
- lines.push(
3508
- "_Speakers: " + pass1.speaker_summary.map((s) => `${applySpeakerMapping(s.speaker_id, speakerMapping)} (${s.description})`).join(", ") + "_"
3509
- );
3510
- lines.push("");
3511
- }
3512
- if (pass1.transcript_entries.length === 0) {
3560
+ const endTime = parseEndTime(pass1.time_range);
3561
+ const validEntries = pass1.transcript_entries.filter((entry) => {
3562
+ const entryTime = parseTimestamp(entry.timestamp);
3563
+ return entryTime <= endTime;
3564
+ });
3565
+ if (validEntries.length === 0) {
3513
3566
  lines.push("_No transcript entries for this segment._");
3514
3567
  } else {
3515
- for (const entry of pass1.transcript_entries) {
3568
+ for (const entry of validEntries) {
3516
3569
  lines.push(renderEntry(entry, speakerMapping));
3517
3570
  }
3518
3571
  }
@@ -4681,7 +4734,7 @@ async function generateOutput(params) {
4681
4734
  const filesGenerated = [];
4682
4735
  const errors = [];
4683
4736
  const filesToGenerate = resolveFilesToGenerate(params);
4684
- const expandedMapping = speakerMapping ? buildExpandedMapping(pipelineResult.segments, speakerMapping) : void 0;
4737
+ const expandedMapping = speakerMapping ? buildExpandedMapping(pipelineResult.segments, speakerMapping, pipelineResult.peopleExtraction) : void 0;
4685
4738
  async function writeOutputFile(filename, content) {
4686
4739
  const fullPath = join3(finalOutputDir, filename);
4687
4740
  const dir = dirname(fullPath);
@@ -4875,7 +4928,7 @@ async function reRenderWithSpeakerMapping(params) {
4875
4928
  await writeFile(fullPath, content, "utf8");
4876
4929
  filesWritten.push(filename);
4877
4930
  }
4878
- const expandedMapping = buildExpandedMapping(pipelineResult.segments, speakerMapping);
4931
+ const expandedMapping = buildExpandedMapping(pipelineResult.segments, speakerMapping, pipelineResult.peopleExtraction);
4879
4932
  const filesToReRender = new Set(filesGenerated.filter((f) => !f.startsWith("raw/")));
4880
4933
  if (filesToReRender.has("transcript.md")) {
4881
4934
  try {
@@ -5638,7 +5691,7 @@ async function run2(args) {
5638
5691
  }
5639
5692
 
5640
5693
  // src/cli/index.ts
5641
- var version = "0.5.2";
5694
+ var version = "0.5.4";
5642
5695
  var DEFAULT_OUTPUT = "./vidistill-output/";
5643
5696
  var SUBCOMMANDS = {
5644
5697
  mcp: run,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vidistill",
3
- "version": "0.5.2",
3
+ "version": "0.5.4",
4
4
  "description": "Video intelligence distiller — extract structured notes, transcripts, and insights from any video using Gemini",
5
5
  "type": "module",
6
6
  "license": "MIT",