npm - @mintlify/scraping - Versions diffs - 3.0.14 → 3.0.16 - Mend

@mintlify/scraping 3.0.14 → 3.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/src/scraping/scrapePage.ts CHANGED Viewed

@@ -1,5 +1,6 @@
-import path from "path";
-import { createPage, getOrigin } from "../util.js";
+import path from 'path';
+import { createPage, getOrigin } from '../util.js';
 export async function scrapePage(
   scrapeFunc: ScrapePageFn,
@@ -10,7 +11,7 @@ export async function scrapePage(
 ) {
   const origin = getOrigin(href);
   const cwd = process.cwd();
-  const imageBaseDir = path.join(cwd, "images");
+  const imageBaseDir = path.join(cwd, 'images');
   const { title, description, markdown } = await scrapeFunc(
     html,

package/src/scraping/scrapePageCommands.ts CHANGED Viewed

@@ -1,23 +1,22 @@
-import axios from "axios";
-import { scrapePage } from "./scrapePage.js";
-import { scrapeDocusaurusPage } from "./site-scrapers/scrapeDocusaurusPage.js";
-import { scrapeGitBookPage } from "./site-scrapers/scrapeGitBookPage.js";
-import { scrapeReadMePage } from "./site-scrapers/scrapeReadMePage.js";
-import { detectFramework, Frameworks } from "./detectFramework.js";
-import { getHrefFromArgs } from "../util.js";
-import { getHtmlWithPuppeteer } from "../browser.js";
-import { ArgumentsCamelCase } from "yargs";
-import { scrapeIntercomPage } from "./site-scrapers/Intercom/scrapeIntercomPage.js";
+import axios from 'axios';
+import { ArgumentsCamelCase } from 'yargs';
+import { getHtmlWithPuppeteer } from '../browser.js';
+import { getHrefFromArgs } from '../util.js';
+import { detectFramework, Frameworks } from './detectFramework.js';
+import { scrapePage } from './scrapePage.js';
+import { scrapeIntercomPage } from './site-scrapers/Intercom/scrapeIntercomPage.js';
+import { scrapeDocusaurusPage } from './site-scrapers/scrapeDocusaurusPage.js';
+import { scrapeGitBookPage } from './site-scrapers/scrapeGitBookPage.js';
+import { scrapeReadMePage } from './site-scrapers/scrapeReadMePage.js';
 function validateFramework(framework) {
   if (!framework) {
-    console.log(
-      "Could not detect the framework automatically. Please use one of:"
-    );
-    console.log("scrape-page-docusaurus");
-    console.log("scrape-page-gitbook");
-    console.log("scrape-page-readme");
-    console.log("scrape-page-intercom");
+    console.log('Could not detect the framework automatically. Please use one of:');
+    console.log('scrape-page-docusaurus');
+    console.log('scrape-page-gitbook');
+    console.log('scrape-page-readme');
+    console.log('scrape-page-intercom');
     return process.exit(1);
   }
 }
@@ -47,7 +46,7 @@ export async function scrapePageAutomatically(argv: any) {
   validateFramework(framework);
-  console.log("Detected framework: " + framework);
+  console.log('Detected framework: ' + framework);
   switch (framework) {
     case Frameworks.DOCUSAURUS:

package/src/scraping/scrapeSection.ts CHANGED Viewed

@@ -1,5 +1,6 @@
-import path from "path";
-import { objToReadableString } from "../util.js";
+import path from 'path';
+import { objToReadableString } from '../util.js';
 export async function scrapeSection(
   scrapeFunc: ScrapeSectionFn,
@@ -8,21 +9,12 @@ export async function scrapeSection(
   overwrite: boolean,
   version: string | undefined
 ) {
-  console.log(
-    `Started scraping${overwrite ? ", overwrite mode is on" : ""}...`
-  );
+  console.log(`Started scraping${overwrite ? ', overwrite mode is on' : ''}...`);
   const cwd = process.cwd();
-  const imageBaseDir = path.join(cwd, "images");
+  const imageBaseDir = path.join(cwd, 'images');
-  const groupsConfig = await scrapeFunc(
-    html,
-    origin,
-    cwd,
-    imageBaseDir,
-    overwrite,
-    version
-  );
-  console.log("Finished scraping.");
-  console.log("Add the following to your navigation in mint.json:");
+  const groupsConfig = await scrapeFunc(html, origin, cwd, imageBaseDir, overwrite, version);
+  console.log('Finished scraping.');
+  console.log('Add the following to your navigation in mint.json:');
   console.log(objToReadableString(groupsConfig));
 }

package/src/scraping/scrapeSectionCommands.ts CHANGED Viewed

@@ -1,15 +1,16 @@
-import axios from "axios";
-import { detectFramework, Frameworks } from "./detectFramework.js";
-import { getHrefFromArgs, getOrigin } from "../util.js";
-import { scrapeSection } from "./scrapeSection.js";
-import { scrapeDocusaurusSection } from "./site-scrapers/scrapeDocusaurusSection.js";
-import openNestedDocusaurusMenus from "./site-scrapers/openNestedDocusaurusMenus.js";
-import { scrapeGitBookSection } from "./site-scrapers/scrapeGitBookSection.js";
-import openNestedGitbookMenus from "./site-scrapers/openNestedGitbookMenus.js";
-import { scrapeReadMeSection } from "./site-scrapers/scrapeReadMeSection.js";
-import { startBrowser } from "../browser.js";
-import { ArgumentsCamelCase } from "yargs";
-import { scrapeIntercomSection } from "./site-scrapers/Intercom/scrapeIntercomSection.js";
+import axios from 'axios';
+import { ArgumentsCamelCase } from 'yargs';
+import { startBrowser } from '../browser.js';
+import { getHrefFromArgs, getOrigin } from '../util.js';
+import { detectFramework, Frameworks } from './detectFramework.js';
+import { scrapeSection } from './scrapeSection.js';
+import { scrapeIntercomSection } from './site-scrapers/Intercom/scrapeIntercomSection.js';
+import openNestedDocusaurusMenus from './site-scrapers/openNestedDocusaurusMenus.js';
+import openNestedGitbookMenus from './site-scrapers/openNestedGitbookMenus.js';
+import { scrapeDocusaurusSection } from './site-scrapers/scrapeDocusaurusSection.js';
+import { scrapeGitBookSection } from './site-scrapers/scrapeGitBookSection.js';
+import { scrapeReadMeSection } from './site-scrapers/scrapeReadMeSection.js';
 export async function scrapeSectionAxiosWrapper(
   argv: ArgumentsCamelCase,
@@ -18,13 +19,7 @@ export async function scrapeSectionAxiosWrapper(
   const href = getHrefFromArgs(argv);
   const res = await axios.get(href);
   const html = res.data;
-  await scrapeSection(
-    scrapeFunc,
-    html,
-    getOrigin(href),
-    !!argv.overwrite,
-    undefined
-  );
+  await scrapeSection(scrapeFunc, html, getOrigin(href), !!argv.overwrite, undefined);
   process.exit(0);
 }
@@ -41,11 +36,7 @@ export async function scrapeDocusaurusSectionCommand(
 }
 export async function scrapeGitbookSectionCommand(argv: any) {
-  await scrapeSectionOpeningAllNested(
-    argv,
-    openNestedGitbookMenus,
-    scrapeGitBookSection
-  );
+  await scrapeSectionOpeningAllNested(argv, openNestedGitbookMenus, scrapeGitBookSection);
 }
 async function scrapeSectionOpeningAllNested(
@@ -59,18 +50,12 @@ async function scrapeSectionOpeningAllNested(
   const browser = await startBrowser();
   const page = await browser.newPage();
   await page.goto(href, {
-    waitUntil: "networkidle2",
+    waitUntil: 'networkidle2',
   });
   const html = await openLinks(page);
   browser.close();
-  await scrapeSection(
-    scrapeFunc,
-    html,
-    getOrigin(href),
-    !!argv.overwrite,
-    version
-  );
+  await scrapeSection(scrapeFunc, html, getOrigin(href), !!argv.overwrite, version);
   process.exit(0);
 }
@@ -81,7 +66,7 @@ export async function scrapeSectionAutomatically(argv: any) {
   const { framework, version } = detectFramework(html);
   validateFramework(framework);
-  console.log("Detected framework: " + framework);
+  console.log('Detected framework: ' + framework);
   switch (framework) {
     case Frameworks.DOCUSAURUS:
@@ -102,7 +87,7 @@ export async function scrapeSectionAutomatically(argv: any) {
 function validateFramework(framework: Frameworks | undefined) {
   if (!framework) {
     console.log(
-      "Could not detect the framework automatically. We only support Docusaurus (V2 and V3), GitBook, and ReadMe."
+      'Could not detect the framework automatically. We only support Docusaurus (V2 and V3), GitBook, and ReadMe.'
     );
     process.exit();
   }

package/src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts CHANGED Viewed

@@ -1,7 +1,8 @@
-import cheerio from "cheerio";
-import { NodeHtmlMarkdown } from "node-html-markdown";
-import downloadAllImages from "../../downloadAllImages.js";
-import replaceImagePaths from "../../replaceImagePaths.js";
+import cheerio from 'cheerio';
+import { NodeHtmlMarkdown } from 'node-html-markdown';
+import downloadAllImages from '../../downloadAllImages.js';
+import replaceImagePaths from '../../replaceImagePaths.js';
 export async function scrapeIntercomPage(
   html: string,
@@ -13,11 +14,11 @@ export async function scrapeIntercomPage(
 ) {
   const $ = cheerio.load(html);
-  const titleComponent = $(".t__h1").first();
+  const titleComponent = $('.t__h1').first();
   const title = titleComponent.text().trim();
-  const description = $(".article__desc", titleComponent.parent()).text().trim();
+  const description = $('.article__desc', titleComponent.parent()).text().trim();
-  const content = $("article").first();
+  const content = $('article').first();
   const contentHtml = $.html(content);
   const origToWritePath = await downloadAllImages(
@@ -33,16 +34,16 @@ export async function scrapeIntercomPage(
   let markdown = nhm.translate(contentHtml);
   // Keep headers on one line
-  markdown = markdown.replace(/# \n\n/g, "# ");
+  markdown = markdown.replace(/# \n\n/g, '# ');
   // Remove unnecessary nonwidth blank space characters
-  markdown = markdown.replace(/\u200b/g, "");
+  markdown = markdown.replace(/\u200b/g, '');
   // Reduce unnecessary blank lines
-  markdown = markdown.replace(/\n\n\n/g, "\n\n");
+  markdown = markdown.replace(/\n\n\n/g, '\n\n');
   // Mintlify doesn't support bolded headers, remove the asterisks
-  markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
+  markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, '$1 $2\n');
   if (origToWritePath) {
     markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
   }

package/src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts CHANGED Viewed

@@ -1,8 +1,9 @@
-import cheerio from "cheerio";
-import { scrapeIntercomPage } from "./scrapeIntercomPage.js";
-import { scrapeGettingFileNameFromUrl } from "../../scrapeGettingFileNameFromUrl.js";
-import downloadLogoImage from "../../downloadLogoImage.js";
-import axios from "axios";
+import axios from 'axios';
+import cheerio from 'cheerio';
+import downloadLogoImage from '../../downloadLogoImage.js';
+import { scrapeGettingFileNameFromUrl } from '../../scrapeGettingFileNameFromUrl.js';
+import { scrapeIntercomPage } from './scrapeIntercomPage.js';
 export async function scrapeIntercomSection(
   html: string,
@@ -14,27 +15,25 @@ export async function scrapeIntercomSection(
 ): Promise<MintNavigationEntry[]> {
   let $ = cheerio.load(html);
-  const logoSrc = $(".header__logo img").first().attr("src");
+  const logoSrc = $('.header__logo img').first().attr('src');
   downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
-  const collectionsLink = $(".section .g__space a");
-  const collectionsMap = collectionsLink
-    .toArray()
-    .map(async (s: cheerio.Element) => {
-      const href = $(s).attr("href");
-      const res = await axios.get(`${origin}${href}`);
-      const html = res.data;
-      $ = cheerio.load(html);
-      const sectionTitle = $(".collection h1").first().text().trim();
-      const sectionPages = $(".section .g__space a")
-        .toArray()
-        .map((s: cheerio.Element) => $(s).attr("href"))
-        .filter((page) => page !== undefined) as string[];
-      return {
-        group: sectionTitle,
-        pages: sectionPages,
-      };
-    });
+  const collectionsLink = $('.section .g__space a');
+  const collectionsMap = collectionsLink.toArray().map(async (s: cheerio.Element) => {
+    const href = $(s).attr('href');
+    const res = await axios.get(`${origin}${href}`);
+    const html = res.data;
+    $ = cheerio.load(html);
+    const sectionTitle = $('.collection h1').first().text().trim();
+    const sectionPages = $('.section .g__space a')
+      .toArray()
+      .map((s: cheerio.Element) => $(s).attr('href'))
+      .filter((page) => page !== undefined) as string[];
+    return {
+      group: sectionTitle,
+      pages: sectionPages,
+    };
+  });
   const collections: MintNavigation[] = await Promise.all(collectionsMap);

package/src/scraping/site-scrapers/alternateGroupTitle.ts CHANGED Viewed

@@ -4,5 +4,5 @@ export default function alternateGroupTitle(firstLink: cheerio.Cheerio, pages) {
   if (pages.length > 0) {
     return firstLink?.text();
   }
-  return "";
+  return '';
 }

package/src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts CHANGED Viewed

@@ -1,12 +1,12 @@
-import alternateGroupTitle from "../alternateGroupTitle.js";
-import getLinksRecursively from "./getLinksRecursively.js";
+import alternateGroupTitle from '../alternateGroupTitle.js';
+import getLinksRecursively from './getLinksRecursively.js';
 export function getDocusaurusLinksPerGroup(
   navigationSections: any,
   $: any,
   version: string | undefined
 ) {
-  if (version === "3" || version === "2") {
+  if (version === '3' || version === '2') {
     return getDocusaurusLinksPerGroupLoop(navigationSections, $);
   }
   return [];
@@ -17,21 +17,18 @@ function getDocusaurusLinksPerGroupLoop(navigationSections: any, $: any) {
     const section = $(s);
     // Links without a group
-    if (section.hasClass("theme-doc-sidebar-item-link")) {
-      const linkHref = section.find("a[href]").first().attr("href");
+    if (section.hasClass('theme-doc-sidebar-item-link')) {
+      const linkHref = section.find('a[href]').first().attr('href');
       return {
-        group: "",
+        group: '',
         pages: [linkHref],
       };
     }
-    const firstLink = section
-      .find(".menu__list-item-collapsible")
-      .first()
-      .find("a[href]");
+    const firstLink = section.find('.menu__list-item-collapsible').first().find('a[href]');
     const sectionTitle = firstLink.text();
-    const firstHref = firstLink.attr("href");
+    const firstHref = firstLink.attr('href');
     const linkSections = section.children().eq(1).children();
     const pages = getLinksRecursively(linkSections, $);

package/src/scraping/site-scrapers/links-per-group/getLinksRecursively.ts CHANGED Viewed

@@ -9,20 +9,20 @@ export default function getLinksRecursively(linkSections: any, $: any) {
       const subsection = $(s);
       let link = subsection.children().first();
-      if (!link.attr("href")) {
+      if (!link.attr('href')) {
         // Docusaurus nests the <a> inside a <div>
-        link = link.find("a[href]").first();
+        link = link.find('a[href]').first();
       }
-      const linkHref = link.attr("href");
+      const linkHref = link.attr('href');
       // Skip missing links. For example, GitBook uses
       // empty divs are used for styling a line beside the nav.
       // Skip external links until Mintlify supports them
       if (
         !linkHref ||
-        linkHref === "#" ||
-        linkHref.startsWith("https://") ||
-        linkHref.startsWith("http://")
+        linkHref === '#' ||
+        linkHref.startsWith('https://') ||
+        linkHref.startsWith('http://')
       ) {
         return undefined;
       }

package/src/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.ts CHANGED Viewed

@@ -14,16 +14,16 @@ export default function getLinksRecursivelyGitBook(linkSections: any, $: any) {
       }
       const link = subsection.children().first();
-      const linkHref = link.attr("href");
+      const linkHref = link.attr('href');
       // Skip missing links. For example, GitBook uses
       // empty divs are used for styling a line beside the nav.
       // Skip external links until Mintlify supports them
       if (
         !linkHref ||
-        linkHref === "#" ||
-        linkHref.startsWith("https://") ||
-        linkHref.startsWith("http://")
+        linkHref === '#' ||
+        linkHref.startsWith('https://') ||
+        linkHref.startsWith('http://')
       ) {
         return undefined;
       }

package/src/scraping/site-scrapers/openNestedDocusaurusMenus.ts CHANGED Viewed

@@ -1,8 +1,8 @@
-import { Page } from "puppeteer";
+import { Page } from 'puppeteer';
 export default async function openNestedDocusaurusMenus(page: Page) {
   let prevEncountered: string[] = [];
-  let encounteredHref = ["fake-href-to-make-loop-run-at-least-once"];
+  let encounteredHref = ['fake-href-to-make-loop-run-at-least-once'];
   // Loop until we've encountered every link
   while (!encounteredHref.every((href) => prevEncountered.includes(href))) {
@@ -10,15 +10,15 @@ export default async function openNestedDocusaurusMenus(page: Page) {
     encounteredHref = await page.evaluate(
       (encounteredHref) => {
         const collapsible: HTMLElement[] = Array.from(
-          document.querySelectorAll(".menu__link.menu__link--sublist")
+          document.querySelectorAll('.menu__link.menu__link--sublist')
         );
         const linksFound: string[] = [];
         collapsible.forEach(async (collapsibleItem: HTMLElement) => {
-          const href = collapsibleItem?.getAttribute("href");
+          const href = collapsibleItem?.getAttribute('href');
           // Should never occur but we keep it as a fail-safe
-          if (href?.startsWith("https://") || href?.startsWith("http://")) {
+          if (href?.startsWith('https://') || href?.startsWith('http://')) {
             return;
           }

package/src/scraping/site-scrapers/openNestedGitbookMenus.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { Page } from "puppeteer";
+import { Page } from 'puppeteer';
 export default async function openNestedGitbookMenus(page: Page) {
   let clickedAny = true;
@@ -7,9 +7,7 @@ export default async function openNestedGitbookMenus(page: Page) {
   while (clickedAny) {
     clickedAny = await page.evaluate(() => {
       // Right pointing arrow. Only closed menus have this icon
-      const icons: HTMLElement[] = Array.from(
-        document.querySelectorAll('path[d="M9 18l6-6-6-6"]')
-      );
+      const icons: HTMLElement[] = Array.from(document.querySelectorAll('path[d="M9 18l6-6-6-6"]'));
       icons.forEach(async (icon: HTMLElement) => {
         const toClick = icon?.parentElement?.parentElement;

package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts CHANGED Viewed

@@ -1,7 +1,8 @@
-import cheerio from "cheerio";
-import { NodeHtmlMarkdown } from "node-html-markdown";
-import downloadAllImages from "../downloadAllImages.js";
-import replaceImagePaths from "../replaceImagePaths.js";
+import cheerio from 'cheerio';
+import { NodeHtmlMarkdown } from 'node-html-markdown';
+import downloadAllImages from '../downloadAllImages.js';
+import replaceImagePaths from '../replaceImagePaths.js';
 export async function scrapeDocusaurusPage(
   html: string,
@@ -17,24 +18,22 @@ export async function scrapeDocusaurusPage(
 }> {
   const $ = cheerio.load(html);
-  const article =
-    version === "3" ? $(".theme-doc-markdown").first() : $("article").first();
+  const article = version === '3' ? $('.theme-doc-markdown').first() : $('article').first();
   if (article.length === 0) {
     // Index pages with no additional text don't have the markdown class
     return {
-      title: ''
+      title: '',
     };
   }
-  const titleComponent = article.find("h1");
+  const titleComponent = article.find('h1');
   const title = titleComponent.text().trim();
   // Do not include title in the content when we insert it in our metadata
   titleComponent.remove();
-  const markdownContent =
-    version === "3" ? article : article.find(".markdown").first();
+  const markdownContent = version === '3' ? article : article.find('.markdown').first();
   const origToWritePath = await downloadAllImages(
     $,
@@ -50,10 +49,8 @@ export async function scrapeDocusaurusPage(
   let markdown = markdownHtml ? nhm.translate(markdownHtml) : null;
   if (markdown == null) {
-    console.error(
-      "We do not support scraping this page. Content will be empty"
-    );
-    return { title, description: undefined, markdown: "" };
+    console.error('We do not support scraping this page. Content will be empty');
+    return { title, description: undefined, markdown: '' };
   }
   // Description only exists in meta tags. The code is commented out because its prone to incorrectly
@@ -70,16 +67,16 @@ export async function scrapeDocusaurusPage(
   // When we parse their HTML the parser adds things like:
   // [](#setup "Direct link to heading")
   // to the end of each header.
-  markdown = markdown.replace(/\[\]\(#.+ ".+"\)\n/g, "\n");
+  markdown = markdown.replace(/\[\]\(#.+ ".+"\)\n/g, '\n');
   // Remove unnecessary nonwidth blank space characters
-  markdown = markdown.replace(/\u200b/g, "");
+  markdown = markdown.replace(/\u200b/g, '');
   // Reduce unnecessary blank lines
-  markdown = markdown.replace(/\n\n\n/g, "\n\n");
+  markdown = markdown.replace(/\n\n\n/g, '\n\n');
   // Mintlify doesn't support bolded headers, remove the asterisks
-  markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
+  markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, '$1 $2\n');
   if (origToWritePath) {
     markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
   }

package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts CHANGED Viewed

@@ -1,9 +1,10 @@
-import cheerio from "cheerio";
-import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
-import combineNavWithEmptyGroupTitles from "../combineNavWithEmptyGroupTitles.js";
-import { scrapeDocusaurusPage } from "./scrapeDocusaurusPage.js";
-import { getDocusaurusLinksPerGroup } from "./links-per-group/getDocusaurusLinksPerGroup.js";
-import downloadLogoImage from "../downloadLogoImage.js";
+import cheerio from 'cheerio';
+import combineNavWithEmptyGroupTitles from '../combineNavWithEmptyGroupTitles.js';
+import downloadLogoImage from '../downloadLogoImage.js';
+import { scrapeGettingFileNameFromUrl } from '../scrapeGettingFileNameFromUrl.js';
+import { getDocusaurusLinksPerGroup } from './links-per-group/getDocusaurusLinksPerGroup.js';
+import { scrapeDocusaurusPage } from './scrapeDocusaurusPage.js';
 export async function scrapeDocusaurusSection(
   html: string,
@@ -16,18 +17,14 @@ export async function scrapeDocusaurusSection(
   const $ = cheerio.load(html);
   // Download the logo
-  const logoSrc = $(".navbar__logo img").attr("src");
+  const logoSrc = $('.navbar__logo img').attr('src');
   downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite);
   // Get all the navigation sections
-  const navigationSections = $(".theme-doc-sidebar-menu").first().children();
+  const navigationSections = $('.theme-doc-sidebar-menu').first().children();
   // Get all links per group
-  const groupsConfig: MintNavigation[] = getDocusaurusLinksPerGroup(
-    navigationSections,
-    $,
-    version
-  );
+  const groupsConfig: MintNavigation[] = getDocusaurusLinksPerGroup(navigationSections, $, version);
   // Merge groups with empty titles together
   const reducedGroupsConfig = combineNavWithEmptyGroupTitles(groupsConfig);
@@ -48,7 +45,7 @@ export async function scrapeDocusaurusSection(
               scrapeDocusaurusPage,
               false,
               version,
-              "/docs"
+              '/docs'
             )
           )
         )

package/src/scraping/site-scrapers/scrapeGitBookPage.ts CHANGED Viewed

@@ -1,7 +1,8 @@
-import cheerio from "cheerio";
-import { NodeHtmlMarkdown } from "node-html-markdown";
-import downloadAllImages from "../downloadAllImages.js";
-import replaceImagePaths from "../replaceImagePaths.js";
+import cheerio from 'cheerio';
+import { NodeHtmlMarkdown } from 'node-html-markdown';
+import downloadAllImages from '../downloadAllImages.js';
+import replaceImagePaths from '../replaceImagePaths.js';
 export async function scrapeGitBookPage(
   html: string,
@@ -16,9 +17,7 @@ export async function scrapeGitBookPage(
   const titleComponent = $('[data-testid="page.title"]').first();
   const titleAndDescription = titleComponent.parent().parent().parent().text();
-  const description = titleAndDescription
-    .replace(titleComponent.text(), "")
-    .trim();
+  const description = titleAndDescription.replace(titleComponent.text(), '').trim();
   const title = titleComponent.text().trim();
   const content = $('[data-testid="page.contentEditor"]').first();
@@ -32,8 +31,8 @@ export async function scrapeGitBookPage(
       .children()
       .toArray()
       .map((d) => $(d).text())
-      .filter((text) => text !== "")
-      .join("\n");
+      .filter((text) => text !== '')
+      .join('\n');
     code.replaceWith(`<pre><code>${codeContent}</code></pre>`);
   });
@@ -42,7 +41,7 @@ export async function scrapeGitBookPage(
   const modifyFileName = (fileName: string) =>
     // Remove GitBook metadata from the start
     // The first four %2F split metadata fields. Remaining ones are part of the file name.
-    fileName.split("%2F").slice(4).join("%2F");
+    fileName.split('%2F').slice(4).join('%2F');
   const origToWritePath = await downloadAllImages(
     $,
@@ -57,16 +56,16 @@ export async function scrapeGitBookPage(
   let markdown = nhm.translate(contentHtml);
   // Keep headers on one line
-  markdown = markdown.replace(/# \n\n/g, "# ");
+  markdown = markdown.replace(/# \n\n/g, '# ');
   // Remove unnecessary nonwidth blank space characters
-  markdown = markdown.replace(/\u200b/g, "");
+  markdown = markdown.replace(/\u200b/g, '');
   // Reduce unnecessary blank lines
-  markdown = markdown.replace(/\n\n\n/g, "\n\n");
+  markdown = markdown.replace(/\n\n\n/g, '\n\n');
   // Mintlify doesn't support bolded headers, remove the asterisks
-  markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
+  markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, '$1 $2\n');
   if (origToWritePath) {
     markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
   }