@clipform/mcp-server 1.10.1 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23232,6 +23232,65 @@ If neither native web search nor this tool is available and the topic is post-cu
23232
23232
  );
23233
23233
  }
23234
23234
 
23235
+ // src/tools/youtube-transcript.ts
23236
+ function registerYouTubeTranscriptTool(server) {
23237
+ server.registerTool(
23238
+ "clipform_youtube_transcript",
23239
+ {
23240
+ title: "Get YouTube Transcript",
23241
+ description: `Extract the transcript, title, and channel info from a YouTube video.
23242
+
23243
+ Use this as the first step when creating a comprehension quiz about a YouTube video. Returns the full transcript text plus metadata (title, channel, duration). Feed this into your quiz-writing process to create questions that test whether someone actually watched and understood the video.
23244
+
23245
+ WHEN TO USE:
23246
+ - User provides a YouTube URL and wants a quiz, summary, or content based on the video
23247
+ - You need to understand what a YouTube video covers before creating content about it
23248
+
23249
+ DOES NOT WORK FOR:
23250
+ - Private or age-restricted videos
23251
+ - Videos with captions disabled
23252
+ - Non-YouTube URLs`,
23253
+ inputSchema: {
23254
+ url: external_exports.string().url().describe(
23255
+ "YouTube video URL (any format: watch?v=, youtu.be/, shorts/)"
23256
+ ),
23257
+ lang: external_exports.string().optional().default("en").describe(
23258
+ "Transcript language code (default: 'en'). Try 'en' first, omit for auto-detect if English unavailable."
23259
+ ),
23260
+ max_chars: external_exports.number().optional().default(15e3).describe(
23261
+ "Maximum transcript characters to return (default: 15000). Increase for longer videos where full context matters."
23262
+ )
23263
+ },
23264
+ annotations: {
23265
+ readOnlyHint: true,
23266
+ destructiveHint: false,
23267
+ idempotentHint: true,
23268
+ openWorldHint: true
23269
+ }
23270
+ },
23271
+ async ({ url, lang, max_chars }) => {
23272
+ const result = await callInternalApi("/internal/youtube-transcript", {
23273
+ body: { url, lang, max_chars }
23274
+ });
23275
+ if (!result.ok) return errorResult(result.error);
23276
+ const data = result.data;
23277
+ const meta = data.metadata;
23278
+ const lines = [
23279
+ `## Video: ${meta.title}`,
23280
+ `Channel: ${meta.author}`,
23281
+ `Duration: ~${Math.round(data.durationSeconds / 60)} minutes`,
23282
+ `Language: ${data.language}`,
23283
+ `Transcript length: ${data.transcript.length} characters`,
23284
+ ``,
23285
+ `## Transcript`,
23286
+ ``,
23287
+ data.transcript
23288
+ ];
23289
+ return textResult(lines.join("\n"));
23290
+ }
23291
+ );
23292
+ }
23293
+
23235
23294
  // src/tools/generate-tts.ts
23236
23295
  var TtsItemSchema = external_exports.object({
23237
23296
  text: external_exports.string().min(1).max(5e3).describe("Narration text"),
@@ -23592,17 +23651,16 @@ function registerRenderCompositionTool(server) {
23592
23651
  "clipform_render_composition",
23593
23652
  {
23594
23653
  title: "Render Composition",
23595
- description: `Render a video composition to MP4, PNG, or GIF.
23654
+ description: `Render a video composition to MP4 or PNG.
23596
23655
 
23597
23656
  Output formats:
23598
- - mp4: Video file (H.264 codec, best for social media)
23657
+ - mp4: Video file (H.264 codec, correct BT.709 colors, best for social media)
23599
23658
  - png: Still image (single frame)
23600
- - gif: Animated GIF (looping)
23601
23659
 
23602
23660
  For narrated quiz slideshows, prefer clipform_generate_slideshow which handles the full workflow (focal point detection, audio sync, storage upload). Use this tool for custom compositions like ScorecardQuiz, ShortFormQuiz, or PresenterDirected.`,
23603
23661
  inputSchema: {
23604
23662
  compositionId: external_exports.string().describe("The composition ID (e.g. 'ScorecardQuiz', 'ShortFormQuiz', 'PresenterDirected')"),
23605
- outputFormat: external_exports.enum(["mp4", "png", "gif"]).default("mp4").describe("Output format (default: mp4)"),
23663
+ outputFormat: external_exports.enum(["mp4", "png"]).default("mp4").describe("Output format (default: mp4)"),
23606
23664
  inputProps: external_exports.record(external_exports.unknown()).optional().describe("Props object matching the composition's expected schema")
23607
23665
  },
23608
23666
  annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: false, openWorldHint: false }
@@ -24333,6 +24391,94 @@ There are NO correct answers. Each option maps to one or more outcome categories
24333
24391
  };
24334
24392
  }
24335
24393
  );
24394
+ server.registerPrompt(
24395
+ "create-comprehension-quiz",
24396
+ {
24397
+ title: "Create a YouTube Comprehension Quiz",
24398
+ description: "Build a comprehension quiz from a YouTube video - tests whether the viewer actually watched and understood the content"
24399
+ },
24400
+ async () => {
24401
+ const sessionContext = await getSessionContext();
24402
+ return {
24403
+ messages: [
24404
+ {
24405
+ role: "user",
24406
+ content: {
24407
+ type: "text",
24408
+ text: "I want to create a comprehension quiz based on a YouTube video. What's the best approach?"
24409
+ }
24410
+ },
24411
+ {
24412
+ role: "assistant",
24413
+ content: {
24414
+ type: "text",
24415
+ text: `${sessionContext ? sessionContext + "\n\n" : ""}Here's how to build a comprehension quiz from a YouTube video. Read the quiz guide (clipform://guides/quiz) for general craft, and the comprehension guide (clipform://guides/comprehension-quiz) for video-specific techniques.
24416
+
24417
+ ## Comprehension Quiz Workflow
24418
+
24419
+ 1. **Extract the transcript** with clipform_youtube_transcript - pass the YouTube URL. Returns transcript, title, channel, and duration.
24420
+ 2. **Analyse the content** - identify:
24421
+ - Key claims, facts, or arguments made in the video
24422
+ - Specific details a casual viewer might miss
24423
+ - The video's main thesis or conclusion
24424
+ - Any surprising or counterintuitive points
24425
+ 3. **Write comprehension questions** - these test whether someone WATCHED the video, not general knowledge:
24426
+ - "According to the video, what is the main reason...?"
24427
+ - "What example does the video use to illustrate...?"
24428
+ - Include 1-2 inference questions: "Based on the video, why does the presenter believe...?"
24429
+ - Avoid questions answerable without watching (e.g., common knowledge about the topic)
24430
+ 4. **Adapt to the audience** - if specified (e.g., "for a 5-year-old"), simplify language, reduce option count, focus on concrete/visual details rather than abstract arguments
24431
+ 5. **Create the form** with clipform_create_form:
24432
+ - show_step_counter: true
24433
+ - disable_back_navigation: true
24434
+ 6. **Add questions** with clipform_add_node (type: "choice"):
24435
+ - config: { choice: { show_answer_feedback: true } }
24436
+ - randomise_options: true in config
24437
+ - score: 1 on correct option, score: 0 on wrong
24438
+ - 3-4 wrong answers per question - make distractors plausible (things someone might guess without watching)
24439
+ 7. **Generate narration** with clipform_generate_tts - reference the video naturally: "If you watched closely, you'll know this one..." Keep each narration 5-10 seconds.
24440
+ 8. **Build video** for each question:
24441
+ - clipform_search_media (kind: "image") - 3 images per question
24442
+ - clipform_generate_video - Ken Burns video synced to audio
24443
+ 9. **Attach media** with clipform_upload_node_media. Include captions, set show_captions: true.
24444
+ 10. **Update end screen** with clipform_update_node:
24445
+ - show_score: true, icon: "trophy"
24446
+ - show_share_button: true
24447
+ - cta_type: "restart", cta_text: "Rewatch and try again?"
24448
+ - score_ranges with messages that reference the video:
24449
+ \`\`\`json
24450
+ { "min": 0, "max": 2, "title": "Were you even watching?", "message": "Time for a rewatch - this video is worth it." },
24451
+ { "min": 3, "max": 5, "title": "Casual Viewer", "message": "You caught the highlights but missed some details." },
24452
+ { "min": 6, "max": 8, "title": "Focused Student", "message": "You were paying attention - impressive." }
24453
+ \`\`\`
24454
+ 11. **Publish** with clipform_update_form
24455
+ 12. **Tag** - tags: ["quiz", "comprehension", "youtube"] + 2-3 topic words from the video
24456
+ 13. **Log** with clipform_log_generation - include the YouTube URL, video title, and channel as sources
24457
+
24458
+ ## Question Types for Comprehension
24459
+
24460
+ | Type | Example | Tests |
24461
+ |------|---------|-------|
24462
+ | Detail recall | "What specific number did the presenter mention?" | Active listening |
24463
+ | Sequence | "What was discussed BEFORE the section about...?" | Following the structure |
24464
+ | Inference | "Based on the video, why does the presenter believe...?" | Understanding arguments |
24465
+ | Contrast | "The video compares X and Y. What was the key difference?" | Comprehension depth |
24466
+ | Conclusion | "What was the presenter's final point?" | Watched to the end |
24467
+
24468
+ Wrong answers should sound right to someone who didn't watch but googled the topic. The quiz should be unfair to non-watchers and fair to watchers.
24469
+
24470
+ ## Before building, ask
24471
+
24472
+ 1. What's the YouTube URL?
24473
+ 2. How many questions? (default: 8)
24474
+ 3. Who's the audience? (age, knowledge level)
24475
+ 4. Media style: text only, still images, or slideshow video with narration?`
24476
+ }
24477
+ }
24478
+ ]
24479
+ };
24480
+ }
24481
+ );
24336
24482
  server.registerPrompt(
24337
24483
  "create-funnel",
24338
24484
  {
@@ -24456,6 +24602,7 @@ Each question is a micro variable-reward event - the same dopamine loop that kee
24456
24602
 
24457
24603
  ## Question Design
24458
24604
 
24605
+ - **Randomize correct answer position** - never put the correct answer in the same slot for every question. Vary it across A/B/C/D so there's no pattern to guess.
24459
24606
  - **Myth-busters**: "Sushi means raw fish - True or False?" (False - it means seasoned rice)
24460
24607
  - **Sounds fake but true**: counterintuitive correct answers make people rewatch
24461
24608
  - **Common misconceptions**: "Capital of Australia?" (not Sydney - Canberra)
@@ -24552,6 +24699,77 @@ Do NOT say "let's see if you get this right" - there is no right answer.
24552
24699
 
24553
24700
  ${WRITING_PRINCIPLES}
24554
24701
 
24702
+ ${MEDIA_WORKFLOW}`
24703
+ }
24704
+ ]
24705
+ })
24706
+ );
24707
+ server.registerResource(
24708
+ "guide-comprehension-quiz",
24709
+ "clipform://guides/comprehension-quiz",
24710
+ {
24711
+ description: "Craft knowledge for YouTube comprehension quizzes - extracting questions from transcripts, distractor design, audience adaptation",
24712
+ mimeType: "text/markdown"
24713
+ },
24714
+ async () => ({
24715
+ contents: [
24716
+ {
24717
+ uri: "clipform://guides/comprehension-quiz",
24718
+ mimeType: "text/markdown",
24719
+ text: `# Comprehension Quiz Guide
24720
+
24721
+ ## How it differs from a trivia quiz
24722
+
24723
+ A trivia quiz tests general knowledge. A comprehension quiz tests whether someone watched a specific piece of content. The questions should be **unfair to non-watchers and fair to watchers**.
24724
+
24725
+ | | Trivia Quiz | Comprehension Quiz |
24726
+ |---|---|---|
24727
+ | Source | Research + your knowledge | The video transcript |
24728
+ | Questions | General facts | Specific claims from the video |
24729
+ | Wrong answers | Common misconceptions | Things you'd guess without watching |
24730
+ | Goal | Entertainment + learning | Proof of watching + retention |
24731
+
24732
+ ## Extracting questions from transcripts
24733
+
24734
+ Read the transcript looking for:
24735
+
24736
+ 1. **Specific numbers or data** - "The presenter says it takes X days to..." (detail recall)
24737
+ 2. **Causal claims** - "According to the video, this happens because..." (comprehension)
24738
+ 3. **Examples used** - "What example does the presenter use to explain...?" (attention)
24739
+ 4. **Sequence of topics** - "What does the presenter discuss right after...?" (structure following)
24740
+ 5. **The main argument** - "What is the presenter's main point about...?" (thesis comprehension)
24741
+ 6. **Counterintuitive points** - anything the presenter says is surprising or commonly misunderstood
24742
+
24743
+ ## Distractor design (wrong answers)
24744
+
24745
+ Make wrong answers plausible to someone who **didn't watch**:
24746
+
24747
+ - Use correct facts from other sources about the same topic (tests whether they watched THIS video)
24748
+ - Include things that sound likely based on the title alone
24749
+ - For number questions, use nearby values that seem reasonable
24750
+ - Never include obviously joke answers - every option should feel possible
24751
+
24752
+ ## Audience adaptation
24753
+
24754
+ | Audience | Question style | Language | Count |
24755
+ |----------|---------------|----------|-------|
24756
+ | Young children (5-8) | Concrete details, visual moments | Simple, short sentences | 4-6 |
24757
+ | Older children (9-12) | Details + basic inference | Clear, direct | 6-8 |
24758
+ | Teens (13-17) | Inference + sequence + argument | Natural, conversational | 6-10 |
24759
+ | Adults | Full range including critical analysis | Match the video's register | 6-10 |
24760
+
24761
+ For young children: focus on "What did you SEE?" and "Who did what?" rather than abstract arguments.
24762
+
24763
+ ## Narration style
24764
+
24765
+ Reference the video naturally but don't spoil:
24766
+
24767
+ - "If you were paying attention during the bit about..."
24768
+ - "This is one of those details most people miss..."
24769
+ - "The presenter made a really specific claim here..."
24770
+
24771
+ ${WRITING_PRINCIPLES}
24772
+
24555
24773
  ${MEDIA_WORKFLOW}`
24556
24774
  }
24557
24775
  ]
@@ -24775,6 +24993,7 @@ function createServer() {
24775
24993
  registerAttachNodeAudioTool(server);
24776
24994
  registerLogGenerationTool(server);
24777
24995
  registerSearchNewsTool(server);
24996
+ registerYouTubeTranscriptTool(server);
24778
24997
  registerGenerateTtsTool(server);
24779
24998
  registerGenerateSlideshowTool(server);
24780
24999
  registerGenerateVideoTool(server);
@@ -24795,4 +25014,4 @@ export {
24795
25014
  setApiKey,
24796
25015
  createServer
24797
25016
  };
24798
- //# sourceMappingURL=chunk-MWNHJLHD.js.map
25017
+ //# sourceMappingURL=chunk-MV3ZI5ZT.js.map