vidistill 0.6.4 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +103 -7
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -2087,13 +2087,74 @@ async function runLinkConsensus(params) {
2087
2087
  // src/core/transcript-consensus.ts
2088
2088
  var ALIGN_WINDOW_S = 3;
2089
2089
  var DEDUP_WINDOW_S = 10;
2090
+ var BOUNDARY_WINDOW_S = 30;
2091
+ var MIN_OVERLAP_WORDS = 5;
2092
+ function extractWordsWithPositions(text4) {
2093
+ const regex = /[\p{L}\p{N}_]+/gu;
2094
+ const words = [];
2095
+ let match;
2096
+ while ((match = regex.exec(text4)) !== null) {
2097
+ words.push({ lower: match[0].toLowerCase(), endPos: match.index + match[0].length });
2098
+ }
2099
+ return words;
2100
+ }
2101
+ function findSuffixPrefixOverlap(prevWords, currWords) {
2102
+ const maxOverlap = Math.min(prevWords.length, currWords.length);
2103
+ for (let len = maxOverlap; len >= MIN_OVERLAP_WORDS; len--) {
2104
+ const offset = prevWords.length - len;
2105
+ let match = true;
2106
+ for (let k = 0; k < len; k++) {
2107
+ if (prevWords[offset + k] !== currWords[k]) {
2108
+ match = false;
2109
+ break;
2110
+ }
2111
+ }
2112
+ if (match) return len;
2113
+ }
2114
+ return 0;
2115
+ }
2116
+ function trimBoundaryOverlap(entries) {
2117
+ if (entries.length <= 1) return entries;
2118
+ const sorted = [...entries].sort(
2119
+ (a, b) => parseTimestamp(a.timestamp) - parseTimestamp(b.timestamp)
2120
+ );
2121
+ const result = [sorted[0]];
2122
+ for (let i = 1; i < sorted.length; i++) {
2123
+ const prev = result[result.length - 1];
2124
+ const curr = sorted[i];
2125
+ const delta = parseTimestamp(curr.timestamp) - parseTimestamp(prev.timestamp);
2126
+ if (delta > BOUNDARY_WINDOW_S || delta < 0) {
2127
+ result.push(curr);
2128
+ continue;
2129
+ }
2130
+ const prevWords = extractWordsWithPositions(prev.text);
2131
+ const currWords = extractWordsWithPositions(curr.text);
2132
+ const overlapLen = findSuffixPrefixOverlap(
2133
+ prevWords.map((w) => w.lower),
2134
+ currWords.map((w) => w.lower)
2135
+ );
2136
+ if (overlapLen < MIN_OVERLAP_WORDS) {
2137
+ result.push(curr);
2138
+ continue;
2139
+ }
2140
+ const cutPos = currWords[overlapLen - 1].endPos;
2141
+ const remaining = curr.text.slice(cutPos).replace(/^[\s,;.!?:—–\-]+/, "").trim();
2142
+ if (remaining.length === 0) {
2143
+ continue;
2144
+ }
2145
+ result.push({ ...curr, text: remaining });
2146
+ }
2147
+ return result;
2148
+ }
2090
2149
  function isNearDuplicate(a, b) {
2091
2150
  const delta = Math.abs(parseTimestamp(a.timestamp) - parseTimestamp(b.timestamp));
2092
2151
  if (delta > DEDUP_WINDOW_S) return false;
2093
2152
  if (a.text === b.text) return true;
2094
- const shared = tokenOverlap(a.text, b.text);
2095
- const aTokens = a.text.split(/\s+/).length;
2096
- const bTokens = b.text.split(/\s+/).length;
2153
+ const aLower = a.text.toLowerCase();
2154
+ const bLower = b.text.toLowerCase();
2155
+ const shared = tokenOverlap(aLower, bLower);
2156
+ const aTokens = new Set(aLower.match(/[\p{L}\p{N}_]+/gu) ?? []).size;
2157
+ const bTokens = new Set(bLower.match(/[\p{L}\p{N}_]+/gu) ?? []).size;
2097
2158
  const maxTokens = Math.max(aTokens, bTokens);
2098
2159
  const minTokens = Math.min(aTokens, bTokens);
2099
2160
  if (maxTokens > 0 && shared / maxTokens >= 0.8) return true;
@@ -2139,7 +2200,7 @@ function mergeTranscriptRuns(runs) {
2139
2200
  if (runs.length === 1) {
2140
2201
  return {
2141
2202
  ...runs[0],
2142
- transcript_entries: deduplicateEntries(runs[0].transcript_entries)
2203
+ transcript_entries: trimBoundaryOverlap(deduplicateEntries(runs[0].transcript_entries))
2143
2204
  };
2144
2205
  }
2145
2206
  const referenceRun = runs.reduce(
@@ -2188,7 +2249,7 @@ function mergeTranscriptRuns(runs) {
2188
2249
  return {
2189
2250
  segment_index: referenceRun.segment_index,
2190
2251
  time_range: referenceRun.time_range,
2191
- transcript_entries: deduplicateEntries(mergedEntries)
2252
+ transcript_entries: trimBoundaryOverlap(deduplicateEntries(mergedEntries))
2192
2253
  };
2193
2254
  }
2194
2255
  function mergeSpeakerSummaries(summaries) {
@@ -2288,7 +2349,8 @@ async function runTranscriptionConsensus(params) {
2288
2349
  return { result: null, runsCompleted: 0, runsAttempted: runs };
2289
2350
  }
2290
2351
  if (runs === 1 && runsCompleted === 1) {
2291
- return { result: successfulRuns[0], runsCompleted: 1, runsAttempted: 1 };
2352
+ const result = mergeTranscriptRuns(successfulRuns);
2353
+ return { result, runsCompleted: 1, runsAttempted: 1 };
2292
2354
  }
2293
2355
  const merged = mergeTranscriptRuns(successfulRuns);
2294
2356
  return { result: merged, runsCompleted, runsAttempted: runs };
@@ -3699,6 +3761,18 @@ function writeTranscript(params) {
3699
3761
  curr.transcript_entries = curr.transcript_entries.filter(
3700
3762
  (entry) => !tail.some((prevEntry) => isNearDuplicate(entry, prevEntry))
3701
3763
  );
3764
+ if (curr.transcript_entries.length > 0 && prev.transcript_entries.length > 0) {
3765
+ const boundaryRegion = [
3766
+ prev.transcript_entries[prev.transcript_entries.length - 1],
3767
+ curr.transcript_entries[0]
3768
+ ];
3769
+ const trimmed = trimBoundaryOverlap(boundaryRegion);
3770
+ if (trimmed.length < 2) {
3771
+ curr.transcript_entries.shift();
3772
+ } else if (trimmed[1].text !== curr.transcript_entries[0].text) {
3773
+ curr.transcript_entries[0] = { ...curr.transcript_entries[0], text: trimmed[1].text };
3774
+ }
3775
+ }
3702
3776
  }
3703
3777
  for (const seg of segmentsWithPass1) {
3704
3778
  if (seg.pass1 != null) {
@@ -3760,6 +3834,28 @@ function writeCombined(params) {
3760
3834
  const { pipelineResult, speakerMapping } = params;
3761
3835
  const { segments } = pipelineResult;
3762
3836
  const sections = ["# Combined View", "", "_Chronological interleaving of speech, code, and visuals._", ""];
3837
+ const segmentsWithPass1 = segments.filter((s) => s.pass1 != null);
3838
+ for (let i = 1; i < segmentsWithPass1.length; i++) {
3839
+ const prev = segmentsWithPass1[i - 1].pass1;
3840
+ const curr = segmentsWithPass1[i].pass1;
3841
+ if (prev == null || curr == null) continue;
3842
+ const tail = prev.transcript_entries.slice(-5);
3843
+ curr.transcript_entries = curr.transcript_entries.filter(
3844
+ (entry) => !tail.some((prevEntry) => isNearDuplicate(entry, prevEntry))
3845
+ );
3846
+ if (curr.transcript_entries.length > 0 && prev.transcript_entries.length > 0) {
3847
+ const boundaryRegion = [
3848
+ prev.transcript_entries[prev.transcript_entries.length - 1],
3849
+ curr.transcript_entries[0]
3850
+ ];
3851
+ const trimmed = trimBoundaryOverlap(boundaryRegion);
3852
+ if (trimmed.length < 2) {
3853
+ curr.transcript_entries.shift();
3854
+ } else if (trimmed[1].text !== curr.transcript_entries[0].text) {
3855
+ curr.transcript_entries[0] = { ...curr.transcript_entries[0], text: trimmed[1].text };
3856
+ }
3857
+ }
3858
+ }
3763
3859
  for (const seg of segments) {
3764
3860
  const { pass1, pass2 } = seg;
3765
3861
  const timeRange = pass1?.time_range ?? pass2?.time_range ?? `Segment ${seg.index + 1}`;
@@ -5621,7 +5717,7 @@ async function run2(args) {
5621
5717
  }
5622
5718
 
5623
5719
  // src/cli/index.ts
5624
- var version = "0.6.4";
5720
+ var version = "0.7.0";
5625
5721
  var DEFAULT_OUTPUT2 = "./vidistill-output/";
5626
5722
  var SUBCOMMANDS = {
5627
5723
  mcp: run,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vidistill",
3
- "version": "0.6.4",
3
+ "version": "0.7.0",
4
4
  "description": "Video intelligence distiller — extract structured notes, transcripts, and insights from any video using Gemini",
5
5
  "type": "module",
6
6
  "license": "MIT",