npm - markdown_link_checker_sc - Versions diffs - 0.0.13 → 0.0.116 - Mend

markdown_link_checker_sc 0.0.13 → 0.0.116

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/src/process_markdown.js ADDED Viewed

@@ -0,0 +1,400 @@
+import { Link } from "./links.js";
+import { sharedData } from "./shared_data.js";
+// Returns slug for a string (markdown heading) using Vuepress algorithm.
+// Algorithm from chatgpt - needs testing.
+const processMarkdown = (contents, page) => {
+  sharedData.options.log.includes("functions")
+    ? console.log(`Function: processMarkdown(): page: ${page}`)
+    : null;
+  const headings = [];
+  //const anchors = [];
+  const htmlAnchors = []; //{};
+  const relativeLinks = [];
+  const urlLinks = [];
+  const urlLocalLinks = [];
+  const urlImageLinks = [];
+  const relativeImageLinks = [];
+  const unHandledLinkTypes = [];
+  let redirectTo; //Pages that contain <Redirect to="string"/> links
+  //console.log("SHARED_DATA");
+  //console.log(sharedData);
+  // Check if page is a redirect.
+  // If it is, add to list then return.
+  // Otherwise do other file processing.
+  const regex = /<Redirect to="(.+?)" \/>/;
+  const matches = contents.match(regex);
+  matches ? (redirectTo = matches[1]) : (redirectTo = null);
+  if (redirectTo) {
+    //console.log(`REDIRECT: ${file}`)
+  } else {
+    // Don't do anything else for redirects pages
+    const lines = contents.split(/\r?\n/);
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i];
+      // match headings
+      const matches = line.match(/^#+\s+(.+)$/);
+      if (matches) {
+        headings.push(matches[1]);
+      }
+      // TODO - have to slugify later.
+      const links = processLineMarkdownLinks(
+        line,
+        relativeLinks,
+        relativeImageLinks,
+        urlLinks,
+        urlLocalLinks,
+        urlImageLinks,
+        unHandledLinkTypes,
+        page
+      );
+    }
+    // Match html tags that have an id element
+    // (another way an anchor can be created)
+    const htmlTagsWithIdsMatches = contents.match(
+      /<([a-z]+)(?:\s+[^>]*?\bid=(["'])(.*?)\2[^>]*?)?>/gi
+    );
+    if (htmlTagsWithIdsMatches) {
+      htmlTagsWithIdsMatches.forEach((match) => {
+        const tagMatches = match.match(/^<([a-z]+)/i);
+        const idMatches = match.match(/id=(["'])(.*?)\1/);
+        if (tagMatches && idMatches) {
+          const tag = tagMatches[1].toLowerCase();
+          const id = idMatches[2];
+          if (tag && id) {
+            htmlAnchors.push(id);
+          }
+        }
+      });
+    }
+  }
+  return {
+    //page_file: file,
+    headings: headings,
+    //anchors_auto_headings: anchors,
+    anchors_tag_ids: htmlAnchors,
+    relativeLinks,
+    urlLinks,
+    urlLocalLinks,
+    urlImageLinks,
+    relativeImageLinks,
+    unHandledLinkTypes,
+    redirectTo,
+  };
+};
+// Processes line, taking arrays of different link types.
+// Update the incoming values and return
+// Note, assumption is all links are on one line, not split across lines.
+// This is generally true, but does not have to be.
+const processLineMarkdownLinks = (
+  line,
+  relativeLinks,
+  relativeImageLinks,
+  urlLinks,
+  urlLocalLinks,
+  urlImageLinks,
+  unHandledLinkTypes,
+  page
+) => {
+  sharedData.options.log.includes("functions")
+    ? console.log(`Function: processLineMarkdownLinks(): page: ${page}`)
+    : null;
+  //const regex = /(?<prefix>[!@]?)\[(?<text>[^\]]+)\]\((?<url>\S+?)(?:\s+"(?<title>[^"]+)")?\)/g;
+  const regex =
+    /(?<prefix>[!@]?)\[(?<text>[^\]]*)\]\((?<url>\S+?)(?:\s+"(?<title>[^"]+)")?\)/g;
+  const matches = line.matchAll(regex);
+  // TODO - THIS matches @[youtube](gjHj6YsxcZk) valid link which is used for vuepress plugin URLs. We probably want to exclude it and deal with it separately
+  // Maybe a backwards lookup on @
+  // Not sure if we can generalize
+  for (const match of matches) {
+    const { prefix, text, url, title } = match.groups;
+    const isMarkdownImageLink = prefix == "!" ? true : false;
+    const isVuepressYouTubeLink = prefix == "@" ? true : false;
+    const linkText = text;
+    const linkUrl = url;
+    const linkTitle = title ? title : "";
+    // Work out Link type
+    let linkType = "";
+    if (isVuepressYouTubeLink) {
+      if (linkUrl.startsWith("http")) {
+        linkType = "urlLink";
+      } else {
+        // Not going to handle this (yet)
+        // TODO - prepend the standard URL
+      }
+    } else if (
+      sharedData.options.site_url &&
+      (linkUrl.startsWith(`http://${sharedData.options.site_url}`) ||
+        linkUrl.startsWith(`https://${sharedData.options.site_url}`))
+    ) {
+      //console.log(link);
+      linkType = "urlLocalLink";
+    }
+    if (!linkUrl) {
+      // We should never get to this logging
+      console.log(
+        `WWregexMarkdownLinkAndImage: page: ${page}, linkUrl: ${linkUrl}, linkText: ${linkText}, linkTitle: ${linkTitle}, linkType: ${linkType}`
+      );
+    }
+    //Create link
+    const link = new Link({
+      page: page,
+      url: linkUrl,
+      text: linkText,
+      title: linkTitle,
+      type: linkType,
+    });
+    //console.log(`XXLINKTESTnewLink: ${JSON.stringify(link, null, 2)}`);
+    // For now, dump in different arrays. Might just add to one array eventually
+    switch (link.type) {
+      case "urlLink": {
+        urlLinks.push(link);
+        //console.log("This is a URL link");
+        break;
+      }
+      case "urlLocalLink": {
+        urlLocalLinks.push(link);
+        //console.log("This is a URL local link");
+        break;
+      }
+      case "urlImageLink": {
+        urlImageLinks.push(link);
+        //console.log("This is a URL image link");
+        break;
+      }
+      case "relativeImageLink": {
+        relativeImageLinks.push(link);
+        //console.log("This is a relative image link");
+        break;
+      }
+      case "relativeLink": {
+        relativeLinks.push(link);
+        //console.log("This is a relative link");
+        break;
+      }
+      case "relativeAnchorLink": {
+        relativeLinks.push(link); // This is an anchor link - but currently handled in the same code.
+        //console.log("This is a relative link");
+        break;
+      }
+      case "relativeHTMLLink": {
+        relativeLinks.push(link); // This is HTML link handled in same code.
+        //console.log("This is a relative link");
+        break;
+      }
+      default: {
+        unHandledLinkTypes.push(link);
+        sharedData.options.log.includes("todo") ? console.log(`TODO: 3Unhandled link.type: ${link.type}`) : null;
+        break;
+      }
+    }
+  }
+  //Match for html a - append to the lists
+  const regexHTMLLinkTotal = /<a\s+(?<attributes>.*?)>(?<linktext>.*?)<\/a>/gi;
+  const regexHTMLTitle =
+    /title\s*[=]\s*(?<quote>['"])(?<title>.*?)(?<!\\)\k<quote>/i;
+  //title\s*[=]\s*(?<title>['"]?)([^'"\s>]+)\k<title>/i;
+  const regexHTMLhref =
+    /href\s*[=]\s*(?<quote>['"])(?<href>.*?)(?<!\\)\k<quote>/i;
+  const regexHTMLid = /id\s*[=]\s*(?<quote>['"])(?<id>.*?)(?<!\\)\k<quote>/i;
+  for (const match of line.matchAll(regexHTMLLinkTotal)) {
+    const attributes = match.groups.attributes;
+    //console.log(`XXXXXattributes_s: ${attributes}`)
+    const linkText =
+      match && match.groups.linktext ? match.groups.linktext : "";
+    //console.log(`XXXXXlinktext: ${linktext}`)
+    let linkTitle = "";
+    let linkUrl = "";
+    let linkId = "";
+    if (attributes) {
+      const titlematch = attributes.match(regexHTMLTitle);
+      linkTitle = titlematch && titlematch.groups.title ? titlematch.groups.title : "";
+      const hrefmatch = attributes.match(regexHTMLhref);
+      linkUrl = hrefmatch && hrefmatch.groups.href ? hrefmatch.groups.href : "";
+      const idMatch = attributes.match(regexHTMLid);
+      linkId = idMatch && idMatch.groups.id ? idMatch.groups.id : "";
+    }
+    // If not linkUrl then this is probably and anchor link.
+    //
+    if (!linkUrl && linkId) {
+      // This is an anchor-only link. Skip to next found link
+      continue;
+    }
+    let linkType = "";
+    if (
+      sharedData.options.site_url &&
+      (linkUrl.startsWith(`http://${sharedData.options.site_url}`) ||
+        linkUrl.startsWith(`https://${sharedData.options.site_url}`))
+    ) {
+      //console.log(link);
+      linkType = "urlLocalLink";
+    }
+    //const link = new Link(linkUrl, linkText, linkTitle);
+    if (!linkUrl) {
+      //We should only get here for empty links.
+      console.log(         `WWregexHTMLmatchAtag: page: ${page}, linkUrl: ${linkUrl}, linkText: ${linkText}, linkTitle: ${linkTitle}, linkType: ${linkType}`      );
+    }
+    const link = new Link({
+      page: page,
+      url: linkUrl,
+      type: linkType,
+      text: linkText,
+      title: linkTitle /* type: linkType */,
+    });
+    // For now, dump in different arrays. Might just add to one array eventually
+    switch (link.type) {
+      case "urlLink": {
+        urlLinks.push(link);
+        //console.log("This is a URL link");
+        break;
+      }
+      case "urlLocalLink": {
+        urlLocalLinks.push(link);
+        //console.log("This is a URL local link");
+        break;
+      }
+      case "urlImageLink": {
+        urlImageLinks.push(link);
+        //console.log("This is a URL image link");
+        break;
+      }
+      case "relativeImageLink": {
+        relativeImageLinks.push(link);
+        //console.log("This is a relative image link");
+        break;
+      }
+      case "relativeLink": {
+        relativeLinks.push(link);
+        //console.log("This is a relative link");
+        break;
+      }
+      case "relativeAnchorLink": {
+        relativeLinks.push(link); // This is an anchor link - but currently handled in the same code.
+        //console.log("This is a relative link");
+        break;
+      }
+      case "relativeHTMLLink": {
+        relativeLinks.push(link); // This is an anchor link - but currently handled in the same code.
+        //console.log("This is a relative link");
+        break;
+      }
+      default: {
+        unHandledLinkTypes.push(link);
+        sharedData.options.log.includes("todo") ? console.log(`TODO: 2Unhandled link.type: ${link.type}`) : null;
+        break;
+      }
+    }
+  }
+  //Might further parse this to catch img in anchor.
+  //Match for html img - append to the lists
+  const regexHTMLImgTotal = /<img\s+(?<attributes>.*?)\/>/gi;
+  const regex_htmlattr_src =
+    /src\s*[=]\s*(?<quote>['"])(?<src>.*?)(?<!\\)\k<quote>/i;
+  for (const match of line.matchAll(regexHTMLImgTotal)) {
+    const attributes = match.groups.attributes;
+    //console.log(`XXXXXImageattributes_s: ${attributes}`)
+    const linkText = "";
+    let linkTitle = "";
+    let linkUrl = "";
+    if (attributes) {
+      const titlematch = attributes.match(regexHTMLTitle);
+      linkTitle =
+        titlematch && titlematch.groups.title ? titlematch.groups.title : "";
+      const srcmatch = attributes.match(regex_htmlattr_src);
+      linkUrl = srcmatch && srcmatch.groups.src ? srcmatch.groups.src : "";
+    }
+    //const link = new Link(linkUrl, linkText, linkTitle);
+    //console.log(`WWregexHTML_matchImage: page: ${page}, linkUrl: ${linkUrl}, linkText: ${linkText}, linkTitle: ${linkTitle},`);
+    const link = new Link({
+      page: page,
+      url: linkUrl,
+      text: linkText,
+      title: linkTitle /* type: linkType */,
+    });
+    /*
+    if (linkUrl) {
+      linkUrl.startsWith("http")
+        ? urlImageLinks.push(link)
+        : relativeImageLinks.push(link);
+    }
+*/
+    // For now, dump in different arrays. Might just add to one array eventually
+    switch (link.type) {
+      case "urlLink": {
+        urlLinks.push(link);
+        //console.log("This is a URL link");
+        break;
+      }
+      case "urlLocalLink": {
+        urlLocalLinks.push(link);
+        //console.log("This is a URL local link");
+        break;
+      }
+      case "urlImageLink": {
+        urlImageLinks.push(link);
+        //console.log("This is a URL image link");
+        break;
+      }
+      case "relativeImageLink": {
+        relativeImageLinks.push(link);
+        //console.log("This is a relative image link");
+        break;
+      }
+      case "relativeLink": {
+        relativeLinks.push(link);
+        //console.log("This is a relative link");
+        break;
+      }
+      case "relativeAnchorLink": {
+        relativeLinks.push(link); // This is an anchor link - but currently handled in the same code.
+        //console.log("This is a relative link");
+        break;
+      }
+      case "relativeHTMLLink": {
+        relativeLinks.push(link); // This is an HTML link.
+        break;
+      }
+      default: {
+        unHandledLinkTypes.push(link);
+        sharedData.options.log.includes("todo") ? console.log(`TODO: 1Unhandled link.type: ${link.type}`) : null;
+        break;
+      }
+    }
+    //console.log(link);
+  }
+  return {
+    relativeLinks,
+    urlLinks,
+    urlImageLinks,
+    relativeImageLinks,
+  };
+};
+export { processMarkdown };

package/src/process_orphans.js ADDED Viewed

@@ -0,0 +1,145 @@
+import { logToFile } from "./helpers.js";
+import path from "path";
+import { sharedData } from "./shared_data.js";
+import { PageNotInTOCError, PageNotLinkedInternallyError } from "./errors.js";
+// Gets page with most links. Supposed to be used on the allResults object that is an array of objects about each page.
+// Will use to get the summary.
+function getPageWithMostLinks(pages) {
+  if (sharedData.options.log.includes("functions")) {
+    console.log("Function: getPageWithMostLinks");
+  }
+  return pages.reduce(
+    (maxLinksPage, currentPage) => {
+      if (
+        currentPage.relativeLinks.length > maxLinksPage.relativeLinks.length
+      ) {
+        return currentPage;
+      } else {
+        return maxLinksPage;
+      }
+    },
+    { relativeLinks: [] }
+  ).page_file;
+}
+// Get any orphans (no links from summary and no links at all)
+//
+function checkPageOrphans(results) {
+  const resultObj = {};
+  const allInternalAbsLinks = [];
+  //Create result object that has page as property
+  // And value is an array of links in/from that page converted to absolute.
+  results.forEach((obj) => {
+    const filePath = obj.page_file;
+    const relativeLinks = obj.relativeLinks;
+    const absLinks = [];
+    relativeLinks.forEach((linkObj) => {
+      const linkUrl = linkObj.url;
+      const absLink = path.resolve(path.dirname(filePath), linkUrl);
+      absLinks.push(absLink);
+      allInternalAbsLinks.push(absLink);
+    });
+    resultObj[filePath] = absLinks;
+  });
+  // Invert resultObj to get all objects to link to page.
+  // Add the links to to the big results object we process later.
+  const pagesObj = {};
+  for (const [page, links] of Object.entries(resultObj)) {
+    for (const link of links) {
+      if (!pagesObj[link]) {
+        pagesObj[link] = [];
+      }
+      pagesObj[link].push(page);
+    }
+  }
+  results.forEach((obj) => {
+    obj["linkedFrom"] = pagesObj[obj.page_file];
+  });
+  // Check that every filepath has at least one object in some absLink that matches it
+  let allFilesReferenced = true;
+  let allFilesSummaryReferenced = true;
+  const allFilesNoReference = [];
+  const allFilesNoSummaryReference = [];
+  results.forEach((obj) => {
+    const filePath = obj.page_file;
+    if (!allInternalAbsLinks.some((absLink) => absLink === filePath)) {
+      if (obj.redirectTo) {
+        //do nothing
+      } else if (obj.page_file === sharedData.options.toc) {
+        //do nothing
+      } else {
+        //if it a redirect file then it shouldn't be linked.
+        allFilesNoReference.push(filePath);
+        //console.log(`File "${filePath}" not referenced by any absolute link`);
+        const error = new PageNotLinkedInternallyError({file: obj.page_file});
+        results.allErrors.push(error);
+        allFilesReferenced = false;
+      }
+    }
+    const summaryFileLinks = resultObj[sharedData.options.toc];
+    if (summaryFileLinks && !summaryFileLinks.some((absLink) => absLink === filePath)) {
+      if (obj.redirectTo) {
+        // do nothing /-if it a redirect file then it shouldn't be linked.
+		//console.log(`EXECUTED: ${obj.page_file} in redirect`)
+      } else if (obj.page_file === sharedData.options.toc) {
+        //do nothing - summary shouldt be error for summary.
+      } else {
+        allFilesNoSummaryReference.push(filePath);
+        const error = new PageNotInTOCError({file: obj.page_file});
+        if (!results.allErrors) {
+          results["allErrors"] = [];
+        }
+        results.allErrors.push(error);
+        allFilesSummaryReferenced = false;
+      }
+    }
+  });
+  if (!allFilesReferenced) {
+    const jsonAllFilesNotReferenced = JSON.stringify(
+      allFilesNoReference,
+      null,
+      2
+    );
+    logToFile("./logs/allFilesNoReference.json", jsonAllFilesNotReferenced);
+  } else {
+    //console.log("All files referenced at least once");
+  }
+  if (!allFilesSummaryReferenced) {
+    const jsonAllFilesNotSummaryReferenced = JSON.stringify(
+      allFilesNoSummaryReference,
+      null,
+      2
+    );
+    logToFile(
+      "./logs/allFilesNoSummaryReference.json",
+      jsonAllFilesNotSummaryReferenced
+    );
+  } else {
+    //console.log("All files referenced at least once");
+  }
+  if (sharedData.options.log.includes("quick")) {
+    //console.log(resultObj);
+    const jsonFilesWithAbsoluteLinks = JSON.stringify(resultObj, null, 2);
+    logToFile(
+      "./logs/pagesResolvedAbsoluteLinks.json",
+      jsonFilesWithAbsoluteLinks
+    );
+  }
+}
+export { checkPageOrphans, getPageWithMostLinks };

package/src/process_relative_links.js ADDED Viewed

@@ -0,0 +1,116 @@
+import path from "path";
+import {
+  /*LinkError,*/ CurrentFileMissingAnchorError,
+  LinkedFileMissingAnchorError,
+  LinkedInternalPageMissingError,
+  InternalLinkToHTMLError,
+  UrlToLocalSiteError,
+} from "./errors.js";
+import { sharedData } from "./shared_data.js";
+// An array of errors given a results object that contains our array of objects containing relativeLinks (and other information).
+function processRelativeLinks(results) {
+  sharedData.options.log.includes("functions")
+    ? console.log("Function: processRelativeLinks")
+    : null;
+  const errors = [];
+  //console.log(sharedData);
+  results.forEach((page, index, array) => {
+    //console.log(`PAGE:${JSON.stringify(page, null, 2)}`);
+    page.relativeLinks.forEach((link, index, array) => {
+      //console.log(`LINK: ${JSON.stringify(link, null, 2)}`);
+      if (link.address === "") {
+        // This is a page-local link
+        // Verify the link goes to either heading or id defined in page.
+        if (
+          !(
+            page.anchors_auto_headings.includes(link.anchor) ||
+            page.anchors_tag_ids.includes(link.anchor)
+          )
+        ) {
+          // There is no heading link to specified anchor in current page
+          const error = new CurrentFileMissingAnchorError({ link: link });
+          //console.log(`XXX_LMA_Error: ${JSON.stringify(error, null, 2)}`);
+          errors.push(error);
+        }
+      } else {
+        // This is a link to another page
+        // See if that page is in our results
+        // Report error if not. Otherwise check if anchor is in page.
+        //find the path of the linked page.
+        //console.log(`LINK: ${JSON.stringify(link, null, 2)}`);
+        //console.log(`LINKADDRESS: ${link.address}`);
+        const linkAbsoluteFilePath = link.getAbsolutePath();
+        //console.log(link);
+        // Get the matching file matching our link, if it exists
+        let linkedFile =
+          results.find(
+            (linkedFile) =>
+              linkedFile.hasOwnProperty("page_file") &&
+              path.normalize(linkedFile.page_file) === linkAbsoluteFilePath
+          ) || null;
+        if (!linkedFile) {
+          if (sharedData.options.tryMarkdownforHTML && link.isHTML) {
+            // The file was HTML so it might be a file extension mistake (linking to html instead of md)
+            // In this case we'll try find it.
+            const markdownAbsoluteFilePath = `${
+              linkAbsoluteFilePath.split(".html")[0]
+            }.md`;
+            const linkedHTMLFile =
+              results.find(
+                (linkedHTMLFile) =>
+                  linkedHTMLFile.hasOwnProperty("page_file") &&
+                  path.normalize(linkedHTMLFile.page_file) ===
+                    markdownAbsoluteFilePath
+              ) || null;
+            if (linkedHTMLFile) {
+              const error = new InternalLinkToHTMLError({ link: link });
+              //console.log(error);
+              errors.push(error);
+              linkedFile = linkedHTMLFile;
+            }
+          }
+        }
+        if (!linkedFile) {
+          //File not found as .html or md
+          const error = new LinkedInternalPageMissingError({ link: link });
+          //console.log(error);
+          errors.push(error);
+        } else {
+          // There is a linked file, so now see if there are anchors, and whether they work
+          if (!link.anchor) {
+            // No anchors, so go to next step
+            //null
+          } else if (
+            //List of anchors in linked file includes the anchor
+            linkedFile.anchors_auto_headings.includes(link.anchor) ||
+            linkedFile.anchors_tag_ids.includes(link.anchor)
+          ) {
+            //
+            //do nothing - the linked page includes the anchor from this link
+          } else {
+            // File exists but does not contain matching anchor
+            const error = new LinkedFileMissingAnchorError({ link: link });
+            errors.push(error);
+          }
+        }
+      }
+    });
+  });
+  return errors;
+}
+export { processRelativeLinks };

package/src/shared_data.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export const sharedData = {
2	+ };

package/src/slugify.js ADDED Viewed

@@ -0,0 +1,17 @@
+// Returns slug for a string (markdown heading) using Vuepress algorithm.
+// Algorithm from chatgpt - needs testing.
+function slugifyVuepress(str) {
+  const slug = str
+    .toLowerCase()
+    .replace(/\/+/g, "-") // replace / with hyphens
+    .replace(/[^A-Za-z0-9/]+/g, "-") // replace non-word characters except / with hyphens
+    .replace(/[\s_-]+/g, "-") // Replace spaces and underscores with hyphens
+    .replace(/^-+|-+$/g, ""); // Remove extra hyphens from the beginning or end of the string
+  if (str.includes("/")) {
+    //console.log(`DEBUG: SLUG: str: ${str} slug: ${slug}`);
+  }
+  return `${slug}`;
+}
+export { slugifyVuepress };

package/tests/errortype/current_file_missing_anchor/heading_present_for_anchor.md ADDED Viewed

@@ -0,0 +1,13 @@
+# Tests if a heading present for anchor link
+Run like: `node .\index.js -d tests/errortype/current_file_missing_anchor`
+This is URL to anchor that should be present: [Url to anchor with matching heading - show no eror](#heading-to-match) yeah!
+No error should show up
+## Heading to Match
+Yeah baby!

package/tests/errortype/current_file_missing_anchor/missing_heading.md ADDED Viewed

@@ -0,0 +1,5 @@
+# Tests if a heading present for anchor link
+Run like: `node .\index.js -d tests/errortype/current_file_missing_anchor`
+This is URL to anchor that should NOT be present: [Url to anchor no matching heading or id - show Error](#there_should_be_no_match) yeah!