npm - @mintlify/scraping - Versions diffs - 3.0.14 → 3.0.16 - Mend

@mintlify/scraping 3.0.14 → 3.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/src/scraping/site-scrapers/scrapeGitBookSection.ts CHANGED Viewed

@@ -1,10 +1,11 @@
-import cheerio from "cheerio";
-import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
-import { scrapeGitBookPage } from "./scrapeGitBookPage.js";
-import combineNavWithEmptyGroupTitles from "../combineNavWithEmptyGroupTitles.js";
-import getLinksRecursivelyGitBook from "./links-per-group/getLinksRecursivelyGitBook.js";
-import alternateGroupTitle from "./alternateGroupTitle.js";
-import downloadLogoImage from "../downloadLogoImage.js";
+import cheerio from 'cheerio';
+import combineNavWithEmptyGroupTitles from '../combineNavWithEmptyGroupTitles.js';
+import downloadLogoImage from '../downloadLogoImage.js';
+import { scrapeGettingFileNameFromUrl } from '../scrapeGettingFileNameFromUrl.js';
+import alternateGroupTitle from './alternateGroupTitle.js';
+import getLinksRecursivelyGitBook from './links-per-group/getLinksRecursivelyGitBook.js';
+import { scrapeGitBookPage } from './scrapeGitBookPage.js';
 export async function scrapeGitBookSection(
   html: string,
@@ -17,9 +18,7 @@ export async function scrapeGitBookSection(
   const $ = cheerio.load(html);
   // Download the logo
-  const logoSrc = $('a[data-testid="public.headerHomeLink"] img')
-    .first()
-    .attr("src");
+  const logoSrc = $('a[data-testid="public.headerHomeLink"] img').first().attr('src');
   downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite).catch(console.error);
   // Get all the navigation sections
@@ -39,14 +38,11 @@ export async function scrapeGitBookSection(
     .toArray()
     .map((s: cheerio.Element) => {
       const section = $(s);
-      const sectionTitle = $(section)
-        .find('div > div[dir="auto"]')
-        .first()
-        .text();
+      const sectionTitle = $(section).find('div > div[dir="auto"]').first().text();
       // Only present if the nested navigation is not in a group
       const firstLink = section.children().eq(0);
-      const firstHref = firstLink.attr("href");
+      const firstHref = firstLink.attr('href');
       const linkSections: cheerio.Cheerio = section.children().eq(1).children();
       const pages = getLinksRecursivelyGitBook(linkSections, $);

package/src/scraping/site-scrapers/scrapeReadMePage.ts CHANGED Viewed

@@ -1,7 +1,8 @@
-import cheerio from "cheerio";
-import { NodeHtmlMarkdown } from "node-html-markdown";
-import downloadAllImages from "../downloadAllImages.js";
-import replaceImagePaths from "../replaceImagePaths.js";
+import cheerio from 'cheerio';
+import { NodeHtmlMarkdown } from 'node-html-markdown';
+import downloadAllImages from '../downloadAllImages.js';
+import replaceImagePaths from '../replaceImagePaths.js';
 export async function scrapeReadMePage(
   html: string,
@@ -13,46 +14,40 @@ export async function scrapeReadMePage(
 ) {
   const $ = cheerio.load(html);
-  const titleComponent = $("h1").first();
+  const titleComponent = $('h1').first();
   const title = titleComponent.text().trim();
-  let description = $(".markdown-body", titleComponent.parent()).text().trim();
+  let description = $('.markdown-body', titleComponent.parent()).text().trim();
   if (!description) {
-    description = $(".rm-Article > header p").text().trim();
+    description = $('.rm-Article > header p').text().trim();
   }
-  let content = $(".content-body .markdown-body").first();
+  let content = $('.content-body .markdown-body').first();
   if (content.length === 0) {
-    content = $(".rm-Article > .markdown-body");
+    content = $('.rm-Article > .markdown-body');
   }
   // API Pages don't have a markdown body in the same position so there's no HTML
-  const contentHtml = content.html() || "";
+  const contentHtml = content.html() || '';
-  const origToWritePath = await downloadAllImages(
-    $,
-    content,
-    origin,
-    imageBaseDir,
-    overwrite
-  );
+  const origToWritePath = await downloadAllImages($, content, origin, imageBaseDir, overwrite);
   const nhm = new NodeHtmlMarkdown({ useInlineLinks: false });
   let markdown = nhm.translate(contentHtml);
   // Keep headers on one line and increase their depth by one
-  markdown = markdown.replace(/# \n\n/g, "## ");
+  markdown = markdown.replace(/# \n\n/g, '## ');
   // Remove unnecessary nonwidth blank space characters
-  markdown = markdown.replace(/\u200b/g, "");
+  markdown = markdown.replace(/\u200b/g, '');
   // Remove ReadMe anchor links
-  markdown = markdown.replace(/\n\[\]\(#.+\)\n/g, "\n");
+  markdown = markdown.replace(/\n\[\]\(#.+\)\n/g, '\n');
   // Reduce unnecessary blank lines
-  markdown = markdown.replace(/\n\n\n/g, "\n\n");
+  markdown = markdown.replace(/\n\n\n/g, '\n\n');
   // Mintlify doesn't support bolded headers, remove the asterisks
-  markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, "$1 $2\n");
+  markdown = markdown.replace(/(\n#+) \*\*(.*)\*\*\n/g, '$1 $2\n');
   if (origToWritePath) {
     markdown = replaceImagePaths(origToWritePath, cliDir, markdown);
   }

package/src/scraping/site-scrapers/scrapeReadMeSection.ts CHANGED Viewed

@@ -1,8 +1,9 @@
-import cheerio from "cheerio";
-import { scrapeReadMePage } from "./scrapeReadMePage.js";
-import { scrapeGettingFileNameFromUrl } from "../scrapeGettingFileNameFromUrl.js";
-import getLinksRecursively from "./links-per-group/getLinksRecursively.js";
-import downloadLogoImage from "../downloadLogoImage.js";
+import cheerio from 'cheerio';
+import downloadLogoImage from '../downloadLogoImage.js';
+import { scrapeGettingFileNameFromUrl } from '../scrapeGettingFileNameFromUrl.js';
+import getLinksRecursively from './links-per-group/getLinksRecursively.js';
+import { scrapeReadMePage } from './scrapeReadMePage.js';
 export async function scrapeReadMeSection(
   html: string,
@@ -15,39 +16,34 @@ export async function scrapeReadMeSection(
   const $ = cheerio.load(html);
   // Download the logo
-  const logoSrc = $(".rm-Logo-img").first().attr("src");
+  const logoSrc = $('.rm-Logo-img').first().attr('src');
   downloadLogoImage(logoSrc, imageBaseDir, origin, overwrite).catch(console.error);
   // Get all the navigation sections, but only from the first
   // sidebar found. There are multiple in the HTML for mobile
   // responsiveness but they all have the same links.
-  const navigationSections = $(".rm-Sidebar")
-    .first()
-    .find(".rm-Sidebar-section");
+  const navigationSections = $('.rm-Sidebar').first().find('.rm-Sidebar-section');
-  const groupsConfig: MintNavigation[] = navigationSections
-    .toArray()
-    .map((s: cheerio.Element) => {
-      const section = $(s);
-      const sectionTitle = section.find("h3").first().text();
+  const groupsConfig: MintNavigation[] = navigationSections.toArray().map((s: cheerio.Element) => {
+    const section = $(s);
+    const sectionTitle = section.find('h3').first().text();
-      // Get all links, then use filter to remove duplicates.
-      // There are duplicates because of nested navigation, eg:
-      // subgroupTitle -> /first-page
-      // -- First Page -> /first-page   ** DUPLICATE **
-      // -- Second Page -> /second-page
-      const linkSections = section.find(".rm-Sidebar-list").first().children();
-      const pages = getLinksRecursively(linkSections, $).filter(
-        (value: string, index: number, self: any) =>
-          self.indexOf(value) === index
-      );
+    // Get all links, then use filter to remove duplicates.
+    // There are duplicates because of nested navigation, eg:
+    // subgroupTitle -> /first-page
+    // -- First Page -> /first-page   ** DUPLICATE **
+    // -- Second Page -> /second-page
+    const linkSections = section.find('.rm-Sidebar-list').first().children();
+    const pages = getLinksRecursively(linkSections, $).filter(
+      (value: string, index: number, self: any) => self.indexOf(value) === index
+    );
-      // Follows the same structure as mint.json
-      return {
-        group: sectionTitle,
-        pages: pages,
-      };
-    });
+    // Follows the same structure as mint.json
+    return {
+      group: sectionTitle,
+      pages: pages,
+    };
+  });
   // Scrape each link in the navigation.
   return Promise.all(
@@ -62,7 +58,7 @@ export async function scrapeReadMeSection(
         scrapeReadMePage,
         false,
         version,
-        "/docs"
+        '/docs'
       );
     })
   );

package/src/util.ts CHANGED Viewed

@@ -1,8 +1,9 @@
-import { mkdirSync, writeFileSync } from "fs";
-import Ora, { Ora as OraType } from "ora";
-import path from "path";
-import shell from "shelljs";
-import stopIfInvalidLink from "./validation/stopIfInvalidLink.js";
+import { mkdirSync, writeFileSync } from 'fs';
+import Ora, { Ora as OraType } from 'ora';
+import path from 'path';
+import shell from 'shelljs';
+import stopIfInvalidLink from './validation/stopIfInvalidLink.js';
 export const MintConfig = (
   name: string,
@@ -13,8 +14,8 @@ export const MintConfig = (
 ) => {
   return {
     name,
-    logo: "",
-    favicon: "",
+    logo: '',
+    favicon: '',
     colors: {
       primary: color,
     },
@@ -26,7 +27,7 @@ export const MintConfig = (
     anchors: [],
     navigation: [
       {
-        group: "Home",
+        group: 'Home',
         pages: [filename],
       },
     ],
@@ -34,11 +35,7 @@ export const MintConfig = (
   };
 };
-export const Page = (
-  title: string,
-  description?: string,
-  markdown?: string
-) => {
+export const Page = (title: string, description?: string, markdown?: string) => {
   // If we are an empty String we want to add two quotes,
   // if we added as we went we would detect the first quote
   // as the closing quote.
@@ -51,9 +48,7 @@ export const Page = (
     title = title + '"';
   }
-  const optionalDescription = description
-    ? `\ndescription: "${description}"`
-    : "";
+  const optionalDescription = description ? `\ndescription: "${description}"` : '';
   return `---\ntitle: ${title}${optionalDescription}\n---\n\n${markdown}`;
 };
@@ -65,24 +60,24 @@ export function getOrigin(url: string) {
 export function objToReadableString(objs: MintNavigationEntry[]) {
   // Two spaces as indentation
-  return objs.map((obj) => JSON.stringify(obj, null, 2)).join(",\n");
+  return objs.map((obj) => JSON.stringify(obj, null, 2)).join(',\n');
 }
 export const toFilename = (title: string) => {
   // Gets rid of special characters at the start and end
   // of the name by converting to spaces then using trim.
   return title
-    .replace(/[^a-z0-9]/gi, " ")
+    .replace(/[^a-z0-9]/gi, ' ')
     .trim()
-    .replace(/ /g, "-")
+    .replace(/ /g, '-')
     .toLowerCase();
 };
 export const addMdx = (fileName: string) => {
-  if (fileName.endsWith(".mdx")) {
+  if (fileName.endsWith('.mdx')) {
     return fileName;
   }
-  return fileName + ".mdx";
+  return fileName + '.mdx';
 };
 export const createPage = (
@@ -90,7 +85,7 @@ export const createPage = (
   description?: string,
   markdown?: string,
   overwrite = false,
-  rootDir = "",
+  rootDir = '',
   fileName?: string
 ) => {
   const writePath = path.join(rootDir, addMdx(fileName || toFilename(title)));
@@ -101,17 +96,17 @@ export const createPage = (
   // Write the page to memory
   if (overwrite) {
     writeFileSync(writePath, Page(title, description, markdown));
-    console.log("✏️ - " + writePath);
+    console.log('✏️ - ' + writePath);
   } else {
     try {
       writeFileSync(writePath, Page(title, description, markdown), {
-        flag: "wx",
+        flag: 'wx',
       });
-      console.log("✏️ - " + writePath);
+      console.log('✏️ - ' + writePath);
     } catch (e) {
       // We do a try-catch instead of an if-statement to avoid a race condition
       // of the file being created after we started writing.
-      if ((e as {  code: string })?.code === "EEXIST") {
+      if ((e as { code: string })?.code === 'EEXIST') {
         console.log(`❌ Skipping existing file ${writePath}`);
       } else {
         console.error(e);
@@ -126,30 +121,24 @@ export function getHrefFromArgs(argv: any) {
   return href;
 }
-export const buildLogger = (startText = ""): OraType => {
+export const buildLogger = (startText = ''): OraType => {
   const logger = Ora().start(startText);
   return logger;
 };
 export const getFileExtension = (filename: string) => {
-  const ext = filename.substring(
-    filename.lastIndexOf(".") + 1,
-    filename.length
-  );
+  const ext = filename.substring(filename.lastIndexOf('.') + 1, filename.length);
   if (filename === ext) return undefined;
   return ext.toLowerCase();
 };
 export const fileBelongsInPagesFolder = (filename: string) => {
   const extension = getFileExtension(filename);
-  return (
-    extension &&
-    (extension === "mdx" || extension === "md" || extension === "tsx")
-  );
+  return extension && (extension === 'mdx' || extension === 'md' || extension === 'tsx');
 };
 export const ensureYarn = (logger: OraType) => {
-  const yarnInstalled = shell.which("yarn");
+  const yarnInstalled = shell.which('yarn');
   if (!yarnInstalled) {
     logger.fail(`yarn must be installed, run

package/src/validation/stopIfInvalidLink.ts CHANGED Viewed

@@ -1,9 +1,9 @@
-import isValidLink from "./isValidLink.js";
+import isValidLink from './isValidLink.js';
 export default function stopIfInvalidLink(href: string) {
   if (!isValidLink(href)) {
-    console.log("Invalid link: " + href);
-    console.log("Make sure the link starts with http:// or https://");
+    console.log('Invalid link: ' + href);
+    console.log('Make sure the link starts with http:// or https://');
     process.exit(1);
   }
 }