npm - headless-youtube-captions - Versions diffs - 1.0.2 → 1.2.0 - Mend

headless-youtube-captions 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/CHANGELOG.md +13 -0
package/LICENSE +21 -0
package/README.md +213 -10
package/package.json +7 -3
package/src/channel.js +243 -0
package/src/comments.js +156 -0
package/src/index.d.ts +145 -1
package/src/index.js +11 -38
package/src/utils/browser.js +53 -0
package/src/utils/extract.js +101 -0
package/src/utils/scroll.js +48 -0
package/.claude/settings.local.json +0 -25
package/test/index.test.js +0 -23

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,18 @@
 # Changelog
+## [1.2.0] - 2025-07-06
+### Added
+- Docker support with `PUPPETEER_EXECUTABLE_PATH` environment variable
+- Ability to specify custom Chrome/Chromium executable path
+- Comprehensive Docker usage documentation with example Dockerfile
+- npm badges in README (version, license, Node.js version)
+- Features section highlighting key capabilities
+### Changed
+- Enhanced README with better organization and documentation
+- Updated Node.js requirement documentation to correctly show v18+
 ## [1.0.1] - 2025-06-10
 ### Added

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 andrewlwn77
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md CHANGED Viewed

@@ -1,6 +1,21 @@
 # Headless YouTube Captions
-> Extract YouTube video transcripts by interacting with YouTube's UI using Puppeteer
+[![npm version](https://badge.fury.io/js/headless-youtube-captions.svg)](https://www.npmjs.com/package/headless-youtube-captions)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![Node.js Version](https://img.shields.io/node/v/headless-youtube-captions.svg)](https://nodejs.org)
+> Extract YouTube video transcripts, channel videos, and comments by interacting with YouTube's UI using Puppeteer
+## Features
+- 🎯 Extract video transcripts/captions in multiple languages
+- 📺 Get channel videos with pagination support
+- 🔍 Search videos within a specific channel
+- 💬 Extract video comments with sorting options
+- 🐳 Docker support with configurable Chrome executable path
+- 📦 Zero build dependencies - runs directly from source
+- 🚀 Modern ES modules with async/await
+- 🛡️ Handles cookie consent and ad skipping automatically
 ## Installation
@@ -12,7 +27,7 @@ yarn add headless-youtube-captions
 ## Usage
-### ES6 / TypeScript
+### Extract Video Transcripts
 ```js
 import { getSubtitles } from 'headless-youtube-captions';
@@ -24,16 +39,42 @@ const captions = await getSubtitles({
 console.log(captions);
 ```
-### ES5 / CommonJS
+### Get Channel Videos
 ```js
-const { getSubtitles } = require('headless-youtube-captions');
+import { getChannelVideos } from 'headless-youtube-captions';
-getSubtitles({
-  videoID: 'JueUvj6X3DA', // YouTube video ID
-  lang: 'en' // Optional, default: 'en'
-}).then(captions => {
-  console.log(captions);
+const result = await getChannelVideos({
+  channelURL: '@mkbhd',  // or full URL like 'https://youtube.com/@mkbhd'
+  limit: 30              // Optional, default: 30
+});
+console.log(result.videos);
+```
+### Search Channel Videos
+```js
+import { searchChannelVideos } from 'headless-youtube-captions';
+const result = await searchChannelVideos({
+  channelURL: '@mkbhd',
+  query: 'iphone review',
+  limit: 20              // Optional, default: 30
+});
+console.log(result.results);
+```
+### Get Video Comments
+```js
+import { getVideoComments } from 'headless-youtube-captions';
+const result = await getVideoComments({
+  videoID: 'JueUvj6X3DA',
+  limit: 50,             // Optional, default: 50
+  sortBy: 'top'          // Optional, 'top' or 'newest', default: 'top'
 });
+console.log(result.comments);
 ```
 ## API
@@ -100,9 +141,58 @@ This library uses Puppeteer to:
 ## Requirements
-- Node.js 12 or higher
+- Node.js 18 or higher (ES modules support required)
 - Puppeteer (installed as a dependency)
+## Docker Usage
+When running in Docker containers, you may need to specify the Chrome executable path using the `PUPPETEER_EXECUTABLE_PATH` environment variable:
+```bash
+# Set the environment variable
+export PUPPETEER_EXECUTABLE_PATH=/usr/bin/google-chrome-stable
+# Or run directly
+PUPPETEER_EXECUTABLE_PATH=/usr/bin/google-chrome-stable node your-script.js
+```
+Example Dockerfile configuration:
+```dockerfile
+# Install Chrome dependencies
+RUN apt-get update && apt-get install -y \
+    wget \
+    gnupg \
+    ca-certificates \
+    fonts-liberation \
+    libasound2 \
+    libatk-bridge2.0-0 \
+    libatk1.0-0 \
+    libatspi2.0-0 \
+    libcups2 \
+    libdbus-1-3 \
+    libdrm2 \
+    libgbm1 \
+    libgtk-3-0 \
+    libnspr4 \
+    libnss3 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxfixes3 \
+    libxkbcommon0 \
+    libxrandr2 \
+    xdg-utils
+# Install Chrome
+RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
+    && echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list \
+    && apt-get update \
+    && apt-get install -y google-chrome-stable \
+    && rm -rf /var/lib/apt/lists/*
+# Set the Chrome executable path
+ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/google-chrome-stable
+```
 ## Error Handling
 The function will throw an error if:
@@ -129,6 +219,119 @@ try {
 - The library respects YouTube's UI structure as of the last update
 - Some videos may not have transcripts available
+### `getChannelVideos(options)`
+Extracts videos from a YouTube channel with pagination support.
+#### Parameters
+- `options` (Object):
+  - `channelURL` (String, required): Channel identifier (@handle, channel ID, or full URL)
+  - `limit` (Number, optional): Maximum videos to return. Default: `30`
+  - `pageToken` (String, optional): For pagination (future use)
+#### Returns
+```js
+{
+  channel: {
+    name: "Channel Name",
+    subscribers: "1.2M subscribers",
+    videoCount: "500 videos"
+  },
+  videos: [
+    {
+      id: "videoId123",
+      title: "Video Title",
+      views: "1.2M views",
+      uploadTime: "2 days ago",
+      duration: "10:45",
+      thumbnail: "https://...",
+      url: "https://youtube.com/watch?v=videoId123"
+    }
+    // ... more videos
+  ],
+  totalLoaded: 30,
+  hasMore: true
+}
+```
+### `searchChannelVideos(options)`
+Search for videos within a specific YouTube channel.
+#### Parameters
+- `options` (Object):
+  - `channelURL` (String, required): Channel identifier (@handle, channel ID, or full URL)
+  - `query` (String, required): Search query
+  - `limit` (Number, optional): Maximum results. Default: `30`
+#### Returns
+```js
+{
+  query: "iphone review",
+  results: [
+    {
+      id: "videoId123",
+      title: "iPhone 15 Review",
+      views: "2.5M views",
+      uploadTime: "1 week ago",
+      duration: "15:23",
+      thumbnail: "https://...",
+      url: "https://youtube.com/watch?v=videoId123"
+    }
+    // ... more results
+  ],
+  totalFound: 25
+}
+```
+### `getVideoComments(options)`
+Extract comments from a YouTube video with pagination support.
+#### Parameters
+- `options` (Object):
+  - `videoID` (String, required): YouTube video ID
+  - `limit` (Number, optional): Maximum comments to return. Default: `50`
+  - `sortBy` (String, optional): Sort order - `'top'` or `'newest'`. Default: `'top'`
+  - `pageToken` (String, optional): For pagination (future use)
+#### Returns
+```js
+{
+  video: {
+    id: "JueUvj6X3DA",
+    title: "Video Title",
+    channel: {
+      name: "Channel Name",
+      url: "https://youtube.com/@channel"
+    },
+    views: "1.5M views"
+  },
+  comments: [
+    {
+      author: "Username",
+      authorUrl: "https://youtube.com/@username",
+      authorAvatar: "https://...",
+      text: "Great video! Thanks for sharing...",
+      time: "2 days ago",
+      likes: "245",
+      replyCount: "12"
+    }
+    // ... more comments
+  ],
+  totalComments: 1566,
+  totalLoaded: 50,
+  hasMore: true,
+  sortBy: "top"
+}
+```
 ## License
 MIT

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "headless-youtube-captions",
-  "version": "1.0.2",
-  "description": "Extract YouTube video transcripts using headless browser automation",
+  "version": "1.2.0",
+  "description": "Extract YouTube video transcripts, channel videos, and comments using headless browser automation",
   "main": "src/index.js",
   "types": "src/index.d.ts",
   "type": "module",
@@ -35,8 +35,12 @@
     "captions",
     "subtitles",
     "transcript",
+    "comments",
+    "channel",
+    "videos",
     "puppeteer",
     "headless",
-    "scraper"
+    "scraper",
+    "api"
   ]
 }

package/src/channel.js ADDED Viewed

@@ -0,0 +1,243 @@
+import { createBrowser, createPage, handleCookieConsent } from './utils/browser.js';
+import { scrollAndWaitForMore } from './utils/scroll.js';
+import { extractVideoData, extractChannelInfo } from './utils/extract.js';
+export async function getChannelVideos({ channelURL, limit = 30, pageToken = null }) {
+  const browser = await createBrowser();
+  try {
+    const page = await createPage(browser);
+    // Construct the full URL
+    let fullURL;
+    if (channelURL.startsWith('http')) {
+      // Ensure we're on the videos tab
+      fullURL = channelURL.includes('/videos') ? channelURL : channelURL.replace(/\/?$/, '/videos');
+    } else if (channelURL.startsWith('@')) {
+      fullURL = `https://youtube.com/${channelURL}/videos`;
+    } else if (channelURL.startsWith('UC')) {
+      fullURL = `https://youtube.com/channel/${channelURL}/videos`;
+    } else {
+      fullURL = `https://youtube.com/c/${channelURL}/videos`;
+    }
+    console.error(`Navigating to ${fullURL}`);
+    await page.goto(fullURL, {
+      waitUntil: 'networkidle2',
+      timeout: 60000
+    });
+    // Handle cookie consent
+    await handleCookieConsent(page);
+    // Wait a bit for dynamic content
+    await new Promise(resolve => setTimeout(resolve, 3000));
+    // Wait for initial videos to load
+    await page.waitForSelector('ytd-rich-item-renderer', { timeout: 30000 });
+    console.error('Initial videos loaded');
+    // Extract channel info
+    const channelInfo = await extractChannelInfo(page);
+    let allVideos = [];
+    let currentCount = 0;
+    // Load videos up to the limit
+    while (allVideos.length < limit) {
+      const videos = await extractVideoData(page);
+      allVideos = videos;
+      if (videos.length === currentCount) {
+        // No more videos to load
+        break;
+      }
+      currentCount = videos.length;
+      if (currentCount < limit) {
+        // Try to load more videos
+        const newCount = await scrollAndWaitForMore(page, 'ytd-rich-item-renderer', currentCount);
+        if (newCount === currentCount) {
+          break; // No new videos loaded
+        }
+      }
+    }
+    // Trim to requested limit
+    const resultVideos = allVideos.slice(0, limit);
+    console.error(`Successfully extracted ${resultVideos.length} videos`);
+    return {
+      channel: channelInfo,
+      videos: resultVideos,
+      totalLoaded: allVideos.length,
+      hasMore: allVideos.length > limit
+    };
+  } catch (error) {
+    console.error('Error extracting channel videos:', error);
+    throw error;
+  } finally {
+    await browser.close();
+  }
+}
+export async function searchChannelVideos({ channelURL, query, limit = 30 }) {
+  const browser = await createBrowser();
+  try {
+    const page = await createPage(browser);
+    // Navigate to channel page
+    let fullURL;
+    if (channelURL.startsWith('http')) {
+      // Remove /videos if present to get to main channel page
+      fullURL = channelURL.replace(/\/videos\/?$/, '');
+    } else if (channelURL.startsWith('@')) {
+      fullURL = `https://youtube.com/${channelURL}`;
+    } else if (channelURL.startsWith('UC')) {
+      fullURL = `https://youtube.com/channel/${channelURL}`;
+    } else {
+      fullURL = `https://youtube.com/c/${channelURL}`;
+    }
+    console.error(`Navigating to ${fullURL}`);
+    await page.goto(fullURL, {
+      waitUntil: 'networkidle2',
+      timeout: 60000
+    });
+    // Handle cookie consent
+    await handleCookieConsent(page);
+    // Wait for page to load
+    await new Promise(resolve => setTimeout(resolve, 3000));
+    // Look for search icon in channel header
+    const searchButtonSelectors = [
+      'ytd-channel-header-renderer yt-icon-button[aria-label*="Search"]',
+      'ytd-channel-header-renderer button[aria-label*="Search"]',
+      '#channel-header yt-icon-button[aria-label*="Search"]',
+      'yt-icon[icon="yt-icons:search"]'
+    ];
+    let searchClicked = false;
+    for (const selector of searchButtonSelectors) {
+      try {
+        const searchButton = await page.$(selector);
+        if (searchButton) {
+          const isVisible = await searchButton.evaluate(el => {
+            const rect = el.getBoundingClientRect();
+            return rect.width > 0 && rect.height > 0;
+          });
+          if (isVisible) {
+            await searchButton.click();
+            console.error('Clicked search button');
+            searchClicked = true;
+            break;
+          }
+        }
+      } catch (e) {
+        // Try next selector
+      }
+    }
+    if (!searchClicked) {
+      // Try clicking on the search icon itself
+      const clicked = await page.evaluate(() => {
+        const icons = document.querySelectorAll('yt-icon');
+        for (const icon of icons) {
+          if (icon.getAttribute('icon') === 'yt-icons:search') {
+            const button = icon.closest('button') || icon.closest('yt-icon-button');
+            if (button) {
+              button.click();
+              return true;
+            }
+          }
+        }
+        return false;
+      });
+      if (clicked) {
+        console.error('Clicked search icon');
+        searchClicked = true;
+      }
+    }
+    if (!searchClicked) {
+      throw new Error('Could not find channel search button');
+    }
+    // Wait for search input to appear
+    await page.waitForSelector('input[placeholder*="Search"]', { timeout: 5000 });
+    // Type search query
+    await page.type('input[placeholder*="Search"]', query);
+    await page.keyboard.press('Enter');
+    // Wait for search results
+    await new Promise(resolve => setTimeout(resolve, 3000));
+    await page.waitForSelector('ytd-video-renderer, ytd-rich-item-renderer', { timeout: 10000 });
+    // Extract search results
+    const searchResults = await page.evaluate(() => {
+      // Try different selectors for search results
+      let videos = document.querySelectorAll('ytd-video-renderer');
+      if (videos.length === 0) {
+        videos = document.querySelectorAll('ytd-rich-item-renderer');
+      }
+      return Array.from(videos).map(video => {
+        // Extract video ID
+        const link = video.querySelector('a#video-title, a#video-title-link');
+        const href = link ? link.href : '';
+        const videoId = href.match(/watch\?v=([^&]+)/)?.[1] || '';
+        // Extract title
+        const titleElement = video.querySelector('#video-title');
+        const title = titleElement ? titleElement.textContent.trim() : '';
+        // Extract metadata
+        const viewsElement = video.querySelector('#metadata-line span:first-child, .view-count');
+        const views = viewsElement ? viewsElement.textContent : '';
+        const timeElement = video.querySelector('#metadata-line span:last-child, .published-time');
+        const uploadTime = timeElement ? timeElement.textContent : '';
+        // Extract duration
+        const durationElement = video.querySelector('ytd-thumbnail-overlay-time-status-renderer span, .video-time');
+        const duration = durationElement ? durationElement.textContent.trim() : '';
+        // Extract thumbnail
+        const thumbnail = video.querySelector('img#img')?.src || '';
+        return {
+          id: videoId,
+          title,
+          views,
+          uploadTime,
+          duration,
+          thumbnail,
+          url: `https://youtube.com/watch?v=${videoId}`
+        };
+      }).filter(video => video.id && video.title);
+    });
+    console.error(`Found ${searchResults.length} videos matching "${query}"`);
+    return {
+      query,
+      results: searchResults.slice(0, limit),
+      totalFound: searchResults.length
+    };
+  } catch (error) {
+    console.error('Error searching channel videos:', error);
+    throw error;
+  } finally {
+    await browser.close();
+  }
+}

package/src/comments.js ADDED Viewed

@@ -0,0 +1,156 @@
+import { createBrowser, createPage, handleCookieConsent, skipAds } from './utils/browser.js';
+import { scrollToLoadComments, scrollAndWaitForMore } from './utils/scroll.js';
+import { extractCommentData } from './utils/extract.js';
+export async function getVideoComments({ videoID, limit = 50, sortBy = 'top', pageToken = null }) {
+  const browser = await createBrowser();
+  try {
+    const page = await createPage(browser);
+    // Navigate to the YouTube video page
+    console.error(`Navigating to https://youtube.com/watch?v=${videoID}`);
+    await page.goto(`https://youtube.com/watch?v=${videoID}`, {
+      waitUntil: 'networkidle2',
+      timeout: 60000
+    });
+    // Wait for video player to load
+    await page.waitForSelector('#movie_player, video', { timeout: 30000 });
+    console.error('Video player loaded');
+    // Handle cookie consent
+    await handleCookieConsent(page);
+    // Skip ads if present
+    await skipAds(page);
+    // Wait for page to stabilize
+    await new Promise(resolve => setTimeout(resolve, 3000));
+    // Scroll to load comments
+    const commentsLoaded = await scrollToLoadComments(page);
+    if (!commentsLoaded) {
+      throw new Error('Could not load comments section');
+    }
+    // Wait for comment threads to load
+    await page.waitForSelector('ytd-comment-thread-renderer', {
+      timeout: 10000,
+      visible: true
+    });
+    console.error('Comments section loaded');
+    // Extract total comment count
+    const commentCount = await page.evaluate(() => {
+      const countElement = document.querySelector('ytd-comments-header-renderer h2 yt-formatted-string');
+      if (countElement) {
+        const text = countElement.textContent;
+        const match = text.match(/[\d,]+/);
+        return match ? match[0].replace(/,/g, '') : '0';
+      }
+      return '0';
+    });
+    // Check if we need to change sort order
+    if (sortBy === 'newest') {
+      // Click on sort menu
+      const sortMenuClicked = await page.evaluate(() => {
+        const sortButton = document.querySelector('ytd-comments-header-renderer tp-yt-paper-dropdown-menu-light');
+        if (sortButton) {
+          sortButton.click();
+          return true;
+        }
+        return false;
+      });
+      if (sortMenuClicked) {
+        await new Promise(resolve => setTimeout(resolve, 1000));
+        // Click on "Newest first" option
+        await page.evaluate(() => {
+          const menuItems = document.querySelectorAll('tp-yt-paper-listbox tp-yt-paper-item');
+          for (const item of menuItems) {
+            if (item.textContent.includes('Newest') || item.textContent.includes('newest')) {
+              item.click();
+              break;
+            }
+          }
+        });
+        // Wait for comments to reload
+        await new Promise(resolve => setTimeout(resolve, 3000));
+      }
+    }
+    let allComments = [];
+    let currentCount = 0;
+    // Load comments up to the limit
+    while (allComments.length < limit) {
+      const comments = await extractCommentData(page);
+      allComments = comments;
+      if (comments.length === currentCount) {
+        // No more comments to load
+        break;
+      }
+      currentCount = comments.length;
+      if (currentCount < limit) {
+        // Try to load more comments
+        const newCount = await scrollAndWaitForMore(page, 'ytd-comment-thread-renderer', currentCount, 3000);
+        if (newCount === currentCount) {
+          break; // No new comments loaded
+        }
+      }
+    }
+    // Trim to requested limit
+    const resultComments = allComments.slice(0, limit);
+    console.error(`Successfully extracted ${resultComments.length} comments`);
+    // Extract video info
+    const videoInfo = await page.evaluate(() => {
+      const titleElement = document.querySelector('h1.ytd-video-primary-info-renderer');
+      const title = titleElement ? titleElement.textContent.trim() : '';
+      const channelElement = document.querySelector('ytd-channel-name a');
+      const channelName = channelElement ? channelElement.textContent.trim() : '';
+      const channelUrl = channelElement ? channelElement.href : '';
+      const viewsElement = document.querySelector('.view-count');
+      const views = viewsElement ? viewsElement.textContent : '';
+      return {
+        title,
+        channel: {
+          name: channelName,
+          url: channelUrl
+        },
+        views
+      };
+    });
+    return {
+      video: {
+        id: videoID,
+        ...videoInfo
+      },
+      comments: resultComments,
+      totalComments: parseInt(commentCount),
+      totalLoaded: allComments.length,
+      hasMore: allComments.length > limit,
+      sortBy
+    };
+  } catch (error) {
+    console.error('Error extracting comments:', error);
+    throw error;
+  } finally {
+    await browser.close();
+  }
+}

package/src/index.d.ts CHANGED Viewed

@@ -19,4 +19,148 @@ export interface GetSubtitlesOptions {
  * @param options - Configuration options
  * @returns Promise that resolves to an array of subtitle segments
  */
-export function getSubtitles(options: GetSubtitlesOptions): Promise<SubtitleSegment[]>;
+export function getSubtitles(options: GetSubtitlesOptions): Promise<SubtitleSegment[]>;
+// New types for channel videos
+export interface VideoInfo {
+  /** YouTube video ID */
+  id: string;
+  /** Video title */
+  title: string;
+  /** View count text */
+  views: string;
+  /** Upload time text (e.g., "2 days ago") */
+  uploadTime: string;
+  /** Video duration text (e.g., "10:45") */
+  duration: string;
+  /** Thumbnail URL */
+  thumbnail: string;
+  /** Full YouTube video URL */
+  url: string;
+}
+export interface ChannelInfo {
+  /** Channel name */
+  name: string;
+  /** Subscriber count text */
+  subscribers: string;
+  /** Total video count text */
+  videoCount: string;
+}
+export interface GetChannelVideosOptions {
+  /** Channel URL, @handle, or channel ID */
+  channelURL: string;
+  /** Maximum number of videos to return (default: 30) */
+  limit?: number;
+  /** Page token for pagination (optional) */
+  pageToken?: string | null;
+}
+export interface ChannelVideosResult {
+  /** Channel information */
+  channel: ChannelInfo;
+  /** Array of videos */
+  videos: VideoInfo[];
+  /** Total videos loaded */
+  totalLoaded: number;
+  /** Whether there are more videos available */
+  hasMore: boolean;
+}
+/**
+ * Get videos from a YouTube channel with pagination support
+ * @param options - Configuration options
+ * @returns Promise that resolves to channel videos result
+ */
+export function getChannelVideos(options: GetChannelVideosOptions): Promise<ChannelVideosResult>;
+export interface SearchChannelVideosOptions {
+  /** Channel URL, @handle, or channel ID */
+  channelURL: string;
+  /** Search query */
+  query: string;
+  /** Maximum number of videos to return (default: 30) */
+  limit?: number;
+}
+export interface SearchChannelVideosResult {
+  /** Search query used */
+  query: string;
+  /** Array of matching videos */
+  results: VideoInfo[];
+  /** Total videos found */
+  totalFound: number;
+}
+/**
+ * Search for videos within a YouTube channel
+ * @param options - Configuration options
+ * @returns Promise that resolves to search results
+ */
+export function searchChannelVideos(options: SearchChannelVideosOptions): Promise<SearchChannelVideosResult>;
+// Types for comments
+export interface Comment {
+  /** Comment author name */
+  author: string;
+  /** Author channel URL */
+  authorUrl: string;
+  /** Author avatar URL */
+  authorAvatar: string;
+  /** Comment text */
+  text: string;
+  /** Time ago text (e.g., "2 days ago") */
+  time: string;
+  /** Like count */
+  likes: string;
+  /** Number of replies */
+  replyCount: string;
+}
+export interface VideoDetails {
+  /** Video ID */
+  id: string;
+  /** Video title */
+  title: string;
+  /** Channel information */
+  channel: {
+    name: string;
+    url: string;
+  };
+  /** View count text */
+  views: string;
+}
+export interface GetVideoCommentsOptions {
+  /** YouTube video ID */
+  videoID: string;
+  /** Maximum number of comments to return (default: 50) */
+  limit?: number;
+  /** Sort order: 'top' or 'newest' (default: 'top') */
+  sortBy?: 'top' | 'newest';
+  /** Page token for pagination (optional) */
+  pageToken?: string | null;
+}
+export interface VideoCommentsResult {
+  /** Video information */
+  video: VideoDetails;
+  /** Array of comments */
+  comments: Comment[];
+  /** Total comment count */
+  totalComments: number;
+  /** Total comments loaded */
+  totalLoaded: number;
+  /** Whether there are more comments available */
+  hasMore: boolean;
+  /** Sort order used */
+  sortBy: 'top' | 'newest';
+}
+/**
+ * Get comments from a YouTube video with pagination support
+ * @param options - Configuration options
+ * @returns Promise that resolves to video comments result
+ */
+export function getVideoComments(options: GetVideoCommentsOptions): Promise<VideoCommentsResult>;

package/src/index.js CHANGED Viewed

@@ -1,24 +1,11 @@
-import he from 'he';
-import lodash from 'lodash';
-import striptags from 'striptags';
-import puppeteer from 'puppeteer';
-const { find } = lodash;
+import { createBrowser, createPage, handleCookieConsent, skipAds } from './utils/browser.js';
+// Export existing function
 export async function getSubtitles({ videoID, lang = 'en' }) {
-  const browser = await puppeteer.launch({
-    headless: true,
-    args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080', '--disable-dev-shm-usage']
-  });
+  const browser = await createBrowser();
   try {
-    const page = await browser.newPage();
-    // Set viewport to a standard desktop size
-    await page.setViewport({ width: 1920, height: 1080 });
-    // Set a realistic user agent
-    await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
+    const page = await createPage(browser);
     // Navigate to the YouTube video page
     console.error(`Navigating to https://youtube.com/watch?v=${videoID}`);
@@ -35,28 +22,10 @@ export async function getSubtitles({ videoID, lang = 'en' }) {
     await new Promise(resolve => setTimeout(resolve, 5000));
     // Handle cookie consent if present
-    try {
-      const consentButton = await page.$('[aria-label*="Accept all"], [aria-label*="Accept cookies"], button:has-text("Accept all")');
-      if (consentButton) {
-        await consentButton.click();
-        console.error('Accepted cookies');
-        await new Promise(resolve => setTimeout(resolve, 1000));
-      }
-    } catch (e) {
-      // Cookie consent not present or already accepted
-    }
+    await handleCookieConsent(page);
     // Skip ads if present
-    try {
-      const skipButton = await page.$('.ytp-ad-skip-button, .ytp-skip-ad-button');
-      if (skipButton) {
-        await skipButton.click();
-        console.error('Skipped ad');
-        await new Promise(resolve => setTimeout(resolve, 2000));
-      }
-    } catch (e) {
-      // No skip button
-    }
+    await skipAds(page);
     // Scroll down to load more content
     await page.evaluate(() => window.scrollBy(0, 800));
@@ -285,4 +254,8 @@ export async function getSubtitles({ videoID, lang = 'en' }) {
   } finally {
     await browser.close();
   }
-}
+}
+// Export new functions
+export { getChannelVideos, searchChannelVideos } from './channel.js';
+export { getVideoComments } from './comments.js';

package/src/utils/browser.js ADDED Viewed

@@ -0,0 +1,53 @@
+import puppeteer from 'puppeteer';
+export async function createBrowser() {
+  const options = {
+    headless: true,
+    args: ['--no-sandbox', '--disable-setuid-sandbox', '--window-size=1920,1080', '--disable-dev-shm-usage']
+  };
+  // Add executablePath if environment variable is set
+  if (process.env.PUPPETEER_EXECUTABLE_PATH) {
+    options.executablePath = process.env.PUPPETEER_EXECUTABLE_PATH;
+  }
+  return await puppeteer.launch(options);
+}
+export async function createPage(browser) {
+  const page = await browser.newPage();
+  // Set viewport to a standard desktop size
+  await page.setViewport({ width: 1920, height: 1080 });
+  // Set a realistic user agent
+  await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
+  return page;
+}
+export async function handleCookieConsent(page) {
+  try {
+    const consentButton = await page.$('[aria-label*="Accept all"], [aria-label*="Accept cookies"], button:has-text("Accept all")');
+    if (consentButton) {
+      await consentButton.click();
+      console.error('Accepted cookies');
+      await new Promise(resolve => setTimeout(resolve, 1000));
+    }
+  } catch (e) {
+    // Cookie consent not present or already accepted
+  }
+}
+export async function skipAds(page) {
+  try {
+    const skipButton = await page.$('.ytp-ad-skip-button, .ytp-skip-ad-button');
+    if (skipButton) {
+      await skipButton.click();
+      console.error('Skipped ad');
+      await new Promise(resolve => setTimeout(resolve, 2000));
+    }
+  } catch (e) {
+    // No skip button
+  }
+}

package/src/utils/extract.js ADDED Viewed

@@ -0,0 +1,101 @@
+export async function extractVideoData(page) {
+  return await page.evaluate(() => {
+    const videos = document.querySelectorAll('ytd-rich-item-renderer');
+    return Array.from(videos).map(video => {
+      const link = video.querySelector('a#video-title-link');
+      const href = link ? link.href : '';
+      const videoId = href.match(/watch\?v=([^&]+)/)?.[1] || '';
+      const titleElement = video.querySelector('#video-title');
+      const title = titleElement ? titleElement.textContent.trim() : '';
+      const metadataLine = video.querySelector('#metadata-line');
+      const metadataSpans = metadataLine ? metadataLine.querySelectorAll('span') : [];
+      const views = metadataSpans[0]?.textContent || '';
+      const uploadTime = metadataSpans[metadataSpans.length - 1]?.textContent || '';
+      const durationElement = video.querySelector('ytd-thumbnail-overlay-time-status-renderer span');
+      const duration = durationElement ? durationElement.textContent.trim() : '';
+      const thumbnail = video.querySelector('img#img')?.src || '';
+      return {
+        id: videoId,
+        title,
+        views,
+        uploadTime,
+        duration,
+        thumbnail,
+        url: `https://youtube.com/watch?v=${videoId}`
+      };
+    }).filter(video => video.id && video.title);
+  });
+}
+export async function extractCommentData(page) {
+  return await page.evaluate(() => {
+    const threads = document.querySelectorAll('ytd-comment-thread-renderer');
+    return Array.from(threads).map(thread => {
+      const authorElement = thread.querySelector('#author-text');
+      const author = authorElement ? authorElement.textContent.trim() : '';
+      const authorUrl = authorElement ? authorElement.href : '';
+      const textElement = thread.querySelector('#content-text');
+      const text = textElement ? textElement.textContent.trim() : '';
+      const timeElement = thread.querySelector('#published-time-text');
+      const time = timeElement ? timeElement.textContent.trim() : '';
+      const likesElement = thread.querySelector('#vote-count-middle');
+      const likes = likesElement ? likesElement.textContent.trim() : '0';
+      const replyElement = thread.querySelector('#more-replies');
+      const replyText = replyElement ? replyElement.textContent : '';
+      const replyCount = replyText.match(/\d+/)?.[0] || '0';
+      const avatarElement = thread.querySelector('#author-thumbnail img');
+      const authorAvatar = avatarElement ? avatarElement.src : '';
+      return {
+        author,
+        authorUrl,
+        authorAvatar,
+        text,
+        time,
+        likes,
+        replyCount
+      };
+    }).filter(comment => comment.text && comment.author);
+  });
+}
+export async function extractChannelInfo(page) {
+  return await page.evaluate(() => {
+    // Try multiple selectors for channel name
+    const nameSelectors = [
+      'ytd-channel-name yt-formatted-string',
+      '#channel-name yt-formatted-string',
+      '.ytd-channel-name',
+      '#text.ytd-channel-name',
+      'yt-formatted-string.ytd-channel-name'
+    ];
+    let channelName = '';
+    for (const selector of nameSelectors) {
+      const element = document.querySelector(selector);
+      if (element && element.textContent) {
+        channelName = element.textContent.trim();
+        break;
+      }
+    }
+    const subscriberCount = document.querySelector('#subscriber-count')?.textContent?.trim() || '';
+    const videoCount = document.querySelector('#videos-count')?.textContent?.trim() || '';
+    return {
+      name: channelName,
+      subscribers: subscriberCount,
+      videoCount: videoCount
+    };
+  });
+}

package/src/utils/scroll.js ADDED Viewed

@@ -0,0 +1,48 @@
+export async function scrollToBottom(page) {
+  await page.evaluate(() => {
+    window.scrollTo(0, document.documentElement.scrollHeight);
+  });
+}
+export async function scrollToElement(page, selector) {
+  await page.evaluate((sel) => {
+    const element = document.querySelector(sel);
+    if (element) {
+      element.scrollIntoView({ behavior: 'smooth', block: 'center' });
+    }
+  }, selector);
+}
+export async function scrollAndWaitForMore(page, itemSelector, currentCount, maxWaitTime = 5000) {
+  await scrollToBottom(page);
+  const startTime = Date.now();
+  while (Date.now() - startTime < maxWaitTime) {
+    await new Promise(resolve => setTimeout(resolve, 1000));
+    const newCount = await page.evaluate((selector) => {
+      return document.querySelectorAll(selector).length;
+    }, itemSelector);
+    if (newCount > currentCount) {
+      return newCount;
+    }
+  }
+  return currentCount;
+}
+export async function scrollToLoadComments(page) {
+  // Scroll down to trigger comment loading
+  await page.evaluate(() => window.scrollBy(0, 800));
+  await new Promise(resolve => setTimeout(resolve, 2000));
+  // Wait for comments section to appear
+  try {
+    await page.waitForSelector('ytd-comments', { timeout: 10000 });
+    return true;
+  } catch (e) {
+    console.error('Comments section not found');
+    return false;
+  }
+}

package/.claude/settings.local.json DELETED Viewed

@@ -1,25 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "Bash(find:*)",
-      "Bash(npm test)",
-      "Bash(node:*)",
-      "Bash(npm run build:*)",
-      "WebFetch(domain:github.com)",
-      "Bash(npm install:*)",
-      "Bash(npm run test:*)",
-      "mcp__server-sequential-thinking__sequentialthinking",
-      "Bash(ls:*)",
-      "mcp__puppeteer__puppeteer_navigate",
-      "mcp__puppeteer__puppeteer_screenshot",
-      "mcp__puppeteer__puppeteer_evaluate",
-      "mcp__puppeteer__puppeteer_click",
-      "Bash(rm:*)",
-      "Bash(npm audit:*)",
-      "Bash(npm whoami:*)",
-      "Bash(npm publish:*)",
-      "Bash(npm view:*)"
-    ],
-    "deny": []
-  }
-}

package/test/index.test.js DELETED Viewed

@@ -1,23 +0,0 @@
-import { test } from 'node:test';
-import assert from 'node:assert';
-import { getSubtitles } from '../src/index.js';
-test('Extract passive income video captions', async () => {
-  const captions = await getSubtitles({ videoID: 'JueUvj6X3DA' });
-  // Check that captions were extracted
-  assert(Array.isArray(captions), 'Captions should be an array');
-  assert(captions.length > 0, 'Should extract at least one caption');
-  // Check structure of first caption
-  const firstCaption = captions[0];
-  assert(typeof firstCaption.start === 'string', 'Start time should be a string');
-  assert(typeof firstCaption.dur === 'string', 'Duration should be a string');
-  assert(typeof firstCaption.text === 'string', 'Text should be a string');
-  // Check that the first caption contains expected content
-  assert(
-    firstCaption.text.toLowerCase().includes('creating passive income'),
-    `First caption should contain "creating passive income", got: "${firstCaption.text}"`
-  );
-});