vidistill 0.6.2 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +87 -1
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -242,6 +242,23 @@ var LANGUAGE_NAMES = {
242
242
  ar: "Arabic",
243
243
  hi: "Hindi"
244
244
  };
245
+ var SYSTEM_INSTRUCTION_DEDUP = `
246
+ You are a transcript deduplication reviewer. You will receive a list of numbered transcript entries. Your task is to identify entries that are SEMANTIC DUPLICATES of an earlier entry.
247
+
248
+ A duplicate means:
249
+ - The entry says essentially the same thing as an earlier entry (same meaning, possibly different wording)
250
+ - The entry is a subset of an earlier entry (the earlier one already covers this content)
251
+ - The entry repeats the same quote or statement that already appeared earlier
252
+
253
+ NOT a duplicate:
254
+ - Entries where the speaker genuinely repeats themselves for emphasis (common in lectures/presentations)
255
+ - Similar topics discussed at different points with different details
256
+ - Call-and-response patterns (e.g. Q&A)
257
+
258
+ Return ONLY the indices of entries to REMOVE (the later duplicate, not the original).
259
+ If no duplicates are found, return an empty array.
260
+ Be conservative \u2014 only flag clear duplicates. When in doubt, keep the entry.
261
+ `;
245
262
  function withLanguage(prompt, lang) {
246
263
  if (!lang || lang === "en") return prompt;
247
264
  const languageName = LANGUAGE_NAMES[lang] ?? lang;
@@ -1545,6 +1562,17 @@ var SCHEMA_SYNTHESIS = {
1545
1562
  },
1546
1563
  required: ["overview", "key_decisions", "key_concepts", "action_items", "questions_raised", "suggestions", "topics", "files_to_generate", "prerequisites"]
1547
1564
  };
1565
+ var SCHEMA_DEDUP_REVIEW = {
1566
+ type: Type.OBJECT,
1567
+ properties: {
1568
+ duplicate_indices: {
1569
+ type: Type.ARRAY,
1570
+ items: { type: Type.INTEGER },
1571
+ description: "Zero-based indices of transcript entries that are semantic duplicates of an earlier entry and should be removed"
1572
+ }
1573
+ },
1574
+ required: ["duplicate_indices"]
1575
+ };
1548
1576
 
1549
1577
  // src/lib/utils.ts
1550
1578
  import { readFile } from "fs/promises";
@@ -3286,6 +3314,55 @@ async function runPipeline(config) {
3286
3314
  const msg = e instanceof Error ? e.message : String(e);
3287
3315
  log5.warn(`speaker reconciliation failed, continuing with original labels: ${msg}`);
3288
3316
  }
3317
+ const allEntries = [];
3318
+ for (let segIdx = 0; segIdx < results.length; segIdx++) {
3319
+ const p1 = results[segIdx].pass1;
3320
+ if (p1 == null) continue;
3321
+ for (let entryIdx = 0; entryIdx < p1.transcript_entries.length; entryIdx++) {
3322
+ allEntries.push({ segIdx, entryIdx, entry: p1.transcript_entries[entryIdx] });
3323
+ }
3324
+ }
3325
+ if (allEntries.length > 20) {
3326
+ try {
3327
+ const numbered = allEntries.map(
3328
+ (e, i) => `[${i}] ${e.entry.timestamp} ${e.entry.speaker}: ${e.entry.text}`
3329
+ ).join("\n");
3330
+ const dedupResult = await rateLimiter.execute(
3331
+ () => client.generate({
3332
+ model: MODELS.flash,
3333
+ contents: [{ role: "user", parts: [{ text: numbered }] }],
3334
+ config: {
3335
+ systemInstruction: SYSTEM_INSTRUCTION_DEDUP,
3336
+ responseMimeType: "application/json",
3337
+ responseSchema: SCHEMA_DEDUP_REVIEW,
3338
+ temperature: 0
3339
+ }
3340
+ }),
3341
+ { onWait }
3342
+ );
3343
+ const parsed = dedupResult;
3344
+ if (parsed != null && Array.isArray(parsed.duplicate_indices)) {
3345
+ const indices = parsed.duplicate_indices;
3346
+ const toRemove = new Set(
3347
+ indices.filter((v) => typeof v === "number" && v >= 0 && v < allEntries.length)
3348
+ );
3349
+ if (toRemove.size > 0) {
3350
+ const segRemovals = /* @__PURE__ */ new Map();
3351
+ for (const globalIdx of toRemove) {
3352
+ const { segIdx, entryIdx } = allEntries[globalIdx];
3353
+ if (!segRemovals.has(segIdx)) segRemovals.set(segIdx, /* @__PURE__ */ new Set());
3354
+ segRemovals.get(segIdx).add(entryIdx);
3355
+ }
3356
+ for (const [segIdx, entryIndices] of segRemovals) {
3357
+ const p1 = results[segIdx].pass1;
3358
+ if (p1 == null) continue;
3359
+ p1.transcript_entries = p1.transcript_entries.filter((_, i) => !entryIndices.has(i));
3360
+ }
3361
+ }
3362
+ }
3363
+ } catch {
3364
+ }
3365
+ }
3289
3366
  let peopleExtraction = null;
3290
3367
  if (strategy.passes.includes("people")) {
3291
3368
  onProgress?.({ phase: "pass3b", segment: 0, totalSegments: 1, status: "running", totalSteps });
@@ -3580,6 +3657,15 @@ function writeTranscript(params) {
3580
3657
  sections.push("_No transcript data available._");
3581
3658
  return sections.join("\n");
3582
3659
  }
3660
+ for (let i = 1; i < segmentsWithPass1.length; i++) {
3661
+ const prev = segmentsWithPass1[i - 1].pass1;
3662
+ const curr = segmentsWithPass1[i].pass1;
3663
+ if (prev == null || curr == null) continue;
3664
+ const tail = prev.transcript_entries.slice(-5);
3665
+ curr.transcript_entries = curr.transcript_entries.filter(
3666
+ (entry) => !tail.some((prevEntry) => isNearDuplicate(entry, prevEntry))
3667
+ );
3668
+ }
3583
3669
  for (const seg of segmentsWithPass1) {
3584
3670
  if (seg.pass1 != null) {
3585
3671
  sections.push(renderPass1(seg.pass1, speakerMapping));
@@ -5902,7 +5988,7 @@ async function run2(args) {
5902
5988
  }
5903
5989
 
5904
5990
  // src/cli/index.ts
5905
- var version = "0.6.2";
5991
+ var version = "0.6.3";
5906
5992
  var DEFAULT_OUTPUT2 = "./vidistill-output/";
5907
5993
  var SUBCOMMANDS = {
5908
5994
  mcp: run,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vidistill",
3
- "version": "0.6.2",
3
+ "version": "0.6.3",
4
4
  "description": "Video intelligence distiller — extract structured notes, transcripts, and insights from any video using Gemini",
5
5
  "type": "module",
6
6
  "license": "MIT",