@clipform/mcp-server 1.10.1 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22277,7 +22277,9 @@ async function callInternalApi(path, options = {}) {
22277
22277
  const headers = {
22278
22278
  "Content-Type": "application/json"
22279
22279
  };
22280
- if (INTERNAL_SECRET) {
22280
+ if (_apiKey) {
22281
+ headers["Authorization"] = `Bearer ${_apiKey}`;
22282
+ } else if (INTERNAL_SECRET) {
22281
22283
  headers["Authorization"] = `Bearer ${INTERNAL_SECRET}`;
22282
22284
  }
22283
22285
  const fetchOptions = { method, headers };
@@ -23232,6 +23234,65 @@ If neither native web search nor this tool is available and the topic is post-cu
23232
23234
  );
23233
23235
  }
23234
23236
 
23237
+ // src/tools/youtube-transcript.ts
23238
+ function registerYouTubeTranscriptTool(server) {
23239
+ server.registerTool(
23240
+ "clipform_youtube_transcript",
23241
+ {
23242
+ title: "Get YouTube Transcript",
23243
+ description: `Extract the transcript, title, and channel info from a YouTube video.
23244
+
23245
+ Use this as the first step when creating a comprehension quiz about a YouTube video. Returns the full transcript text plus metadata (title, channel, duration). Feed this into your quiz-writing process to create questions that test whether someone actually watched and understood the video.
23246
+
23247
+ WHEN TO USE:
23248
+ - User provides a YouTube URL and wants a quiz, summary, or content based on the video
23249
+ - You need to understand what a YouTube video covers before creating content about it
23250
+
23251
+ DOES NOT WORK FOR:
23252
+ - Private or age-restricted videos
23253
+ - Videos with captions disabled
23254
+ - Non-YouTube URLs`,
23255
+ inputSchema: {
23256
+ url: external_exports.string().url().describe(
23257
+ "YouTube video URL (any format: watch?v=, youtu.be/, shorts/)"
23258
+ ),
23259
+ lang: external_exports.string().optional().default("en").describe(
23260
+ "Transcript language code (default: 'en'). Try 'en' first, omit for auto-detect if English unavailable."
23261
+ ),
23262
+ max_chars: external_exports.number().optional().default(15e3).describe(
23263
+ "Maximum transcript characters to return (default: 15000). Increase for longer videos where full context matters."
23264
+ )
23265
+ },
23266
+ annotations: {
23267
+ readOnlyHint: true,
23268
+ destructiveHint: false,
23269
+ idempotentHint: true,
23270
+ openWorldHint: true
23271
+ }
23272
+ },
23273
+ async ({ url, lang, max_chars }) => {
23274
+ const result = await callInternalApi("/internal/youtube-transcript", {
23275
+ body: { url, lang, max_chars }
23276
+ });
23277
+ if (!result.ok) return errorResult(result.error);
23278
+ const data = result.data;
23279
+ const meta = data.metadata;
23280
+ const lines = [
23281
+ `## Video: ${meta.title}`,
23282
+ `Channel: ${meta.author}`,
23283
+ `Duration: ~${Math.round(data.durationSeconds / 60)} minutes`,
23284
+ `Language: ${data.language}`,
23285
+ `Transcript length: ${data.transcript.length} characters`,
23286
+ ``,
23287
+ `## Transcript`,
23288
+ ``,
23289
+ data.transcript
23290
+ ];
23291
+ return textResult(lines.join("\n"));
23292
+ }
23293
+ );
23294
+ }
23295
+
23235
23296
  // src/tools/generate-tts.ts
23236
23297
  var TtsItemSchema = external_exports.object({
23237
23298
  text: external_exports.string().min(1).max(5e3).describe("Narration text"),
@@ -23592,17 +23653,16 @@ function registerRenderCompositionTool(server) {
23592
23653
  "clipform_render_composition",
23593
23654
  {
23594
23655
  title: "Render Composition",
23595
- description: `Render a video composition to MP4, PNG, or GIF.
23656
+ description: `Render a video composition to MP4 or PNG.
23596
23657
 
23597
23658
  Output formats:
23598
- - mp4: Video file (H.264 codec, best for social media)
23659
+ - mp4: Video file (H.264 codec, correct BT.709 colors, best for social media)
23599
23660
  - png: Still image (single frame)
23600
- - gif: Animated GIF (looping)
23601
23661
 
23602
23662
  For narrated quiz slideshows, prefer clipform_generate_slideshow which handles the full workflow (focal point detection, audio sync, storage upload). Use this tool for custom compositions like ScorecardQuiz, ShortFormQuiz, or PresenterDirected.`,
23603
23663
  inputSchema: {
23604
23664
  compositionId: external_exports.string().describe("The composition ID (e.g. 'ScorecardQuiz', 'ShortFormQuiz', 'PresenterDirected')"),
23605
- outputFormat: external_exports.enum(["mp4", "png", "gif"]).default("mp4").describe("Output format (default: mp4)"),
23665
+ outputFormat: external_exports.enum(["mp4", "png"]).default("mp4").describe("Output format (default: mp4)"),
23606
23666
  inputProps: external_exports.record(external_exports.unknown()).optional().describe("Props object matching the composition's expected schema")
23607
23667
  },
23608
23668
  annotations: { readOnlyHint: false, destructiveHint: false, idempotentHint: false, openWorldHint: false }
@@ -24333,6 +24393,94 @@ There are NO correct answers. Each option maps to one or more outcome categories
24333
24393
  };
24334
24394
  }
24335
24395
  );
24396
+ server.registerPrompt(
24397
+ "create-comprehension-quiz",
24398
+ {
24399
+ title: "Create a YouTube Comprehension Quiz",
24400
+ description: "Build a comprehension quiz from a YouTube video - tests whether the viewer actually watched and understood the content"
24401
+ },
24402
+ async () => {
24403
+ const sessionContext = await getSessionContext();
24404
+ return {
24405
+ messages: [
24406
+ {
24407
+ role: "user",
24408
+ content: {
24409
+ type: "text",
24410
+ text: "I want to create a comprehension quiz based on a YouTube video. What's the best approach?"
24411
+ }
24412
+ },
24413
+ {
24414
+ role: "assistant",
24415
+ content: {
24416
+ type: "text",
24417
+ text: `${sessionContext ? sessionContext + "\n\n" : ""}Here's how to build a comprehension quiz from a YouTube video. Read the quiz guide (clipform://guides/quiz) for general craft, and the comprehension guide (clipform://guides/comprehension-quiz) for video-specific techniques.
24418
+
24419
+ ## Comprehension Quiz Workflow
24420
+
24421
+ 1. **Extract the transcript** with clipform_youtube_transcript - pass the YouTube URL. Returns transcript, title, channel, and duration.
24422
+ 2. **Analyse the content** - identify:
24423
+ - Key claims, facts, or arguments made in the video
24424
+ - Specific details a casual viewer might miss
24425
+ - The video's main thesis or conclusion
24426
+ - Any surprising or counterintuitive points
24427
+ 3. **Write comprehension questions** - these test whether someone WATCHED the video, not general knowledge:
24428
+ - "According to the video, what is the main reason...?"
24429
+ - "What example does the video use to illustrate...?"
24430
+ - Include 1-2 inference questions: "Based on the video, why does the presenter believe...?"
24431
+ - Avoid questions answerable without watching (e.g., common knowledge about the topic)
24432
+ 4. **Adapt to the audience** - if specified (e.g., "for a 5-year-old"), simplify language, reduce option count, focus on concrete/visual details rather than abstract arguments
24433
+ 5. **Create the form** with clipform_create_form:
24434
+ - show_step_counter: true
24435
+ - disable_back_navigation: true
24436
+ 6. **Add questions** with clipform_add_node (type: "choice"):
24437
+ - config: { choice: { show_answer_feedback: true } }
24438
+ - randomise_options: true in config
24439
+ - score: 1 on correct option, score: 0 on wrong
24440
+ - 3-4 wrong answers per question - make distractors plausible (things someone might guess without watching)
24441
+ 7. **Generate narration** with clipform_generate_tts - reference the video naturally: "If you watched closely, you'll know this one..." Keep each narration 5-10 seconds.
24442
+ 8. **Build video** for each question:
24443
+ - clipform_search_media (kind: "image") - 3 images per question
24444
+ - clipform_generate_video - Ken Burns video synced to audio
24445
+ 9. **Attach media** with clipform_upload_node_media. Include captions, set show_captions: true.
24446
+ 10. **Update end screen** with clipform_update_node:
24447
+ - show_score: true, icon: "trophy"
24448
+ - show_share_button: true
24449
+ - cta_type: "restart", cta_text: "Rewatch and try again?"
24450
+ - score_ranges with messages that reference the video:
24451
+ \`\`\`json
24452
+ { "min": 0, "max": 2, "title": "Were you even watching?", "message": "Time for a rewatch - this video is worth it." },
24453
+ { "min": 3, "max": 5, "title": "Casual Viewer", "message": "You caught the highlights but missed some details." },
24454
+ { "min": 6, "max": 8, "title": "Focused Student", "message": "You were paying attention - impressive." }
24455
+ \`\`\`
24456
+ 11. **Publish** with clipform_update_form
24457
+ 12. **Tag** - tags: ["quiz", "comprehension", "youtube"] + 2-3 topic words from the video
24458
+ 13. **Log** with clipform_log_generation - include the YouTube URL, video title, and channel as sources
24459
+
24460
+ ## Question Types for Comprehension
24461
+
24462
+ | Type | Example | Tests |
24463
+ |------|---------|-------|
24464
+ | Detail recall | "What specific number did the presenter mention?" | Active listening |
24465
+ | Sequence | "What was discussed BEFORE the section about...?" | Following the structure |
24466
+ | Inference | "Based on the video, why does the presenter believe...?" | Understanding arguments |
24467
+ | Contrast | "The video compares X and Y. What was the key difference?" | Comprehension depth |
24468
+ | Conclusion | "What was the presenter's final point?" | Watched to the end |
24469
+
24470
+ Wrong answers should sound right to someone who didn't watch but googled the topic. The quiz should be unfair to non-watchers and fair to watchers.
24471
+
24472
+ ## Before building, ask
24473
+
24474
+ 1. What's the YouTube URL?
24475
+ 2. How many questions? (default: 8)
24476
+ 3. Who's the audience? (age, knowledge level)
24477
+ 4. Media style: text only, still images, or slideshow video with narration?`
24478
+ }
24479
+ }
24480
+ ]
24481
+ };
24482
+ }
24483
+ );
24336
24484
  server.registerPrompt(
24337
24485
  "create-funnel",
24338
24486
  {
@@ -24456,6 +24604,7 @@ Each question is a micro variable-reward event - the same dopamine loop that kee
24456
24604
 
24457
24605
  ## Question Design
24458
24606
 
24607
+ - **Randomize correct answer position** - never put the correct answer in the same slot for every question. Vary it across A/B/C/D so there's no pattern to guess.
24459
24608
  - **Myth-busters**: "Sushi means raw fish - True or False?" (False - it means seasoned rice)
24460
24609
  - **Sounds fake but true**: counterintuitive correct answers make people rewatch
24461
24610
  - **Common misconceptions**: "Capital of Australia?" (not Sydney - Canberra)
@@ -24466,6 +24615,29 @@ Each question is a micro variable-reward event - the same dopamine loop that kee
24466
24615
 
24467
24616
  For numeric questions (population, speed, weight), scale the real answer by random multipliers (0.3x to 3x) rounded to the same magnitude. Makes wrong answers plausible but clearly different.
24468
24617
 
24618
+ ## Color Brain Questions (ColorSwatch composition)
24619
+
24620
+ Inspired by the Color Brain board game - every answer is identified by its colours. Show flat colour chips, ask "what has these colours?". Use the \`ColorSwatch\` composition for the question card.
24621
+
24622
+ **Colour palette constraint:** Swatches are solid flat chips. Only use clearly distinguishable basic colours: red, blue, green, yellow, white, black, orange, purple, pink, brown, grey. No navy vs blue, no teal vs cyan - they look the same as flat chips. The skill is picking subjects where a combo of basic colours is unique enough to identify.
24623
+
24624
+ **Question categories:**
24625
+ - Flags: "Which country's flag has these colours?" (pair with FlagReveal for answer)
24626
+ - Brand logos: red + yellow = McDonald's, red + white = Coca-Cola
24627
+ - Sports teams: red + white = Arsenal, red + blue + white = Barcelona
24628
+ - Superheroes/characters: red + blue = Spider-Man, yellow + black = Batman
24629
+ - Food: red + green = watermelon, yellow + brown = banana
24630
+
24631
+ **Difficulty scaling:**
24632
+ - Easy: iconic subjects with unique colour combos (Japan flag: red + white)
24633
+ - Medium: common subjects but colours shared with others (Italy vs Ireland: both green + white + one more)
24634
+ - Hard: obscure subjects or very common colour combos that fit many answers
24635
+
24636
+ **Design rules:**
24637
+ - 2-4 colours per question works best. 5+ gets messy and hard to distinguish.
24638
+ - If two answer options would produce identical swatches, don't use that question.
24639
+ - Pair with a reveal composition (FlagReveal, image, or text) for the answer.
24640
+
24469
24641
  ## Narration Style
24470
24642
 
24471
24643
  You're a quiz master, not a question reader. Each question's narration should:
@@ -24552,6 +24724,77 @@ Do NOT say "let's see if you get this right" - there is no right answer.
24552
24724
 
24553
24725
  ${WRITING_PRINCIPLES}
24554
24726
 
24727
+ ${MEDIA_WORKFLOW}`
24728
+ }
24729
+ ]
24730
+ })
24731
+ );
24732
+ server.registerResource(
24733
+ "guide-comprehension-quiz",
24734
+ "clipform://guides/comprehension-quiz",
24735
+ {
24736
+ description: "Craft knowledge for YouTube comprehension quizzes - extracting questions from transcripts, distractor design, audience adaptation",
24737
+ mimeType: "text/markdown"
24738
+ },
24739
+ async () => ({
24740
+ contents: [
24741
+ {
24742
+ uri: "clipform://guides/comprehension-quiz",
24743
+ mimeType: "text/markdown",
24744
+ text: `# Comprehension Quiz Guide
24745
+
24746
+ ## How it differs from a trivia quiz
24747
+
24748
+ A trivia quiz tests general knowledge. A comprehension quiz tests whether someone watched a specific piece of content. The questions should be **unfair to non-watchers and fair to watchers**.
24749
+
24750
+ | | Trivia Quiz | Comprehension Quiz |
24751
+ |---|---|---|
24752
+ | Source | Research + your knowledge | The video transcript |
24753
+ | Questions | General facts | Specific claims from the video |
24754
+ | Wrong answers | Common misconceptions | Things you'd guess without watching |
24755
+ | Goal | Entertainment + learning | Proof of watching + retention |
24756
+
24757
+ ## Extracting questions from transcripts
24758
+
24759
+ Read the transcript looking for:
24760
+
24761
+ 1. **Specific numbers or data** - "The presenter says it takes X days to..." (detail recall)
24762
+ 2. **Causal claims** - "According to the video, this happens because..." (comprehension)
24763
+ 3. **Examples used** - "What example does the presenter use to explain...?" (attention)
24764
+ 4. **Sequence of topics** - "What does the presenter discuss right after...?" (structure following)
24765
+ 5. **The main argument** - "What is the presenter's main point about...?" (thesis comprehension)
24766
+ 6. **Counterintuitive points** - anything the presenter says is surprising or commonly misunderstood
24767
+
24768
+ ## Distractor design (wrong answers)
24769
+
24770
+ Make wrong answers plausible to someone who **didn't watch**:
24771
+
24772
+ - Use correct facts from other sources about the same topic (tests whether they watched THIS video)
24773
+ - Include things that sound likely based on the title alone
24774
+ - For number questions, use nearby values that seem reasonable
24775
+ - Never include obviously joke answers - every option should feel possible
24776
+
24777
+ ## Audience adaptation
24778
+
24779
+ | Audience | Question style | Language | Count |
24780
+ |----------|---------------|----------|-------|
24781
+ | Young children (5-8) | Concrete details, visual moments | Simple, short sentences | 4-6 |
24782
+ | Older children (9-12) | Details + basic inference | Clear, direct | 6-8 |
24783
+ | Teens (13-17) | Inference + sequence + argument | Natural, conversational | 6-10 |
24784
+ | Adults | Full range including critical analysis | Match the video's register | 6-10 |
24785
+
24786
+ For young children: focus on "What did you SEE?" and "Who did what?" rather than abstract arguments.
24787
+
24788
+ ## Narration style
24789
+
24790
+ Reference the video naturally but don't spoil:
24791
+
24792
+ - "If you were paying attention during the bit about..."
24793
+ - "This is one of those details most people miss..."
24794
+ - "The presenter made a really specific claim here..."
24795
+
24796
+ ${WRITING_PRINCIPLES}
24797
+
24555
24798
  ${MEDIA_WORKFLOW}`
24556
24799
  }
24557
24800
  ]
@@ -24775,6 +25018,7 @@ function createServer() {
24775
25018
  registerAttachNodeAudioTool(server);
24776
25019
  registerLogGenerationTool(server);
24777
25020
  registerSearchNewsTool(server);
25021
+ registerYouTubeTranscriptTool(server);
24778
25022
  registerGenerateTtsTool(server);
24779
25023
  registerGenerateSlideshowTool(server);
24780
25024
  registerGenerateVideoTool(server);
@@ -24795,4 +25039,4 @@ export {
24795
25039
  setApiKey,
24796
25040
  createServer
24797
25041
  };
24798
- //# sourceMappingURL=chunk-MWNHJLHD.js.map
25042
+ //# sourceMappingURL=chunk-P6TIRFDL.js.map