@mintlify/scraping 4.0.39 → 4.0.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ import type { Element } from 'hast';
2
+ export declare function unifiedRemoveBreadCrumbs(): (node: Element) => undefined;
3
+ /**
4
+ * Docusaurus-specific function since their breadcrumbs
5
+ * are within the content in the `article` itself instead
6
+ * of outside of the `article` element
7
+ */
8
+ export declare function removeBreadCrumbs(node: Element): undefined;
@@ -0,0 +1,26 @@
1
+ import { visit } from 'unist-util-visit';
2
+ import { framework } from './detectFramework.js';
3
+ export function unifiedRemoveBreadCrumbs() {
4
+ return function (node) {
5
+ return removeBreadCrumbs(node);
6
+ };
7
+ }
8
+ /**
9
+ * Docusaurus-specific function since their breadcrumbs
10
+ * are within the content in the `article` itself instead
11
+ * of outside of the `article` element
12
+ */
13
+ export function removeBreadCrumbs(node) {
14
+ return visit(node, 'element', function (subNode, index, parent) {
15
+ if (framework.vendor === 'docusaurus' &&
16
+ subNode.tagName === 'nav' &&
17
+ subNode.properties.className &&
18
+ Array.isArray(subNode.properties.className) &&
19
+ subNode.properties.className.includes('theme-doc-breadcrumbs') &&
20
+ parent &&
21
+ typeof index === 'number') {
22
+ parent.children.splice(index, 1);
23
+ }
24
+ });
25
+ }
26
+ //# sourceMappingURL=breadcrumbs.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"breadcrumbs.js","sourceRoot":"","sources":["../../src/utils/breadcrumbs.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAEzC,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAEjD,MAAM,UAAU,wBAAwB;IACtC,OAAO,UAAU,IAAa;QAC5B,OAAO,iBAAiB,CAAC,IAAI,CAAC,CAAC;IACjC,CAAC,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAa;IAC7C,OAAO,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,OAAO,EAAE,KAAK,EAAE,MAAM;QAC5D,IACE,SAAS,CAAC,MAAM,KAAK,YAAY;YACjC,OAAO,CAAC,OAAO,KAAK,KAAK;YACzB,OAAO,CAAC,UAAU,CAAC,SAAS;YAC5B,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC;YAC3C,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,uBAAuB,CAAC;YAC9D,MAAM;YACN,OAAO,KAAK,KAAK,QAAQ,EACzB,CAAC;YACD,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QACnC,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -1,3 +1,8 @@
1
1
  import type { Element } from 'hast';
2
2
  export declare function unifiedRemoveBreaks(): (node: Element) => undefined;
3
+ /**
4
+ * ReadMe-specific function since they use breaks in between
5
+ * every element, but either way our parser adds whitespace
6
+ * automatically
7
+ */
3
8
  export declare function removeBreaks(node: Element): undefined;
@@ -4,9 +4,11 @@ export function unifiedRemoveBreaks() {
4
4
  return removeBreaks(node);
5
5
  };
6
6
  }
7
- // ReadMe-specific function since they use breaks in between
8
- // every element, but either way our parser adds whitespace
9
- // automatically
7
+ /**
8
+ * ReadMe-specific function since they use breaks in between
9
+ * every element, but either way our parser adds whitespace
10
+ * automatically
11
+ */
10
12
  export function removeBreaks(node) {
11
13
  return visit(node, 'element', function (subNode, index, parent) {
12
14
  if (subNode.tagName === 'br' && parent && typeof index === 'number') {
@@ -1 +1 @@
1
- {"version":3,"file":"breaks.js","sourceRoot":"","sources":["../../src/utils/breaks.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAEzC,MAAM,UAAU,mBAAmB;IACjC,OAAO,UAAU,IAAa;QAC5B,OAAO,YAAY,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC,CAAC;AACJ,CAAC;AAED,4DAA4D;AAC5D,2DAA2D;AAC3D,gBAAgB;AAChB,MAAM,UAAU,YAAY,CAAC,IAAa;IACxC,OAAO,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,OAAO,EAAE,KAAK,EAAE,MAAM;QAC5D,IAAI,OAAO,CAAC,OAAO,KAAK,IAAI,IAAI,MAAM,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YACpE,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QACnC,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC"}
1
+ {"version":3,"file":"breaks.js","sourceRoot":"","sources":["../../src/utils/breaks.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAEzC,MAAM,UAAU,mBAAmB;IACjC,OAAO,UAAU,IAAa;QAC5B,OAAO,YAAY,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,YAAY,CAAC,IAAa;IACxC,OAAO,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,OAAO,EAAE,KAAK,EAAE,MAAM;QAC5D,IAAI,OAAO,CAAC,OAAO,KAAK,IAAI,IAAI,MAAM,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YACpE,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QACnC,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,8 @@
1
+ import type { Element } from 'hast';
2
+ export declare function unifiedRemoveTableOfContents(): (node: Element) => undefined;
3
+ /**
4
+ * Docusaurus-specific function since their mobile ToC
5
+ * is within the content in the `article` itself instead
6
+ * of outside of the `article` element
7
+ */
8
+ export declare function removeTableOfContents(node: Element): undefined;
@@ -0,0 +1,26 @@
1
+ import { visit } from 'unist-util-visit';
2
+ import { framework } from './detectFramework.js';
3
+ export function unifiedRemoveTableOfContents() {
4
+ return function (node) {
5
+ return removeTableOfContents(node);
6
+ };
7
+ }
8
+ /**
9
+ * Docusaurus-specific function since their mobile ToC
10
+ * is within the content in the `article` itself instead
11
+ * of outside of the `article` element
12
+ */
13
+ export function removeTableOfContents(node) {
14
+ return visit(node, 'element', function (subNode, index, parent) {
15
+ if (framework.vendor === 'docusaurus' &&
16
+ subNode.tagName === 'div' &&
17
+ subNode.properties.className &&
18
+ Array.isArray(subNode.properties.className) &&
19
+ subNode.properties.className.includes('theme-doc-toc-mobile') &&
20
+ parent &&
21
+ typeof index === 'number') {
22
+ parent.children.splice(index, 1);
23
+ }
24
+ });
25
+ }
26
+ //# sourceMappingURL=toc.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"toc.js","sourceRoot":"","sources":["../../src/utils/toc.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAEzC,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAEjD,MAAM,UAAU,4BAA4B;IAC1C,OAAO,UAAU,IAAa;QAC5B,OAAO,qBAAqB,CAAC,IAAI,CAAC,CAAC;IACrC,CAAC,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAa;IACjD,OAAO,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,OAAO,EAAE,KAAK,EAAE,MAAM;QAC5D,IACE,SAAS,CAAC,MAAM,KAAK,YAAY;YACjC,OAAO,CAAC,OAAO,KAAK,KAAK;YACzB,OAAO,CAAC,UAAU,CAAC,SAAS;YAC5B,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC;YAC3C,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,sBAAsB,CAAC;YAC7D,MAAM;YACN,OAAO,KAAK,KAAK,QAAQ,EACzB,CAAC;YACD,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QACnC,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mintlify/scraping",
3
- "version": "4.0.39",
3
+ "version": "4.0.40",
4
4
  "description": "Scrape documentation frameworks to Mintlify docs",
5
5
  "engines": {
6
6
  "node": ">=18.0.0"
@@ -78,5 +78,5 @@
78
78
  "typescript": "^5.5.3",
79
79
  "vitest": "^2.0.4"
80
80
  },
81
- "gitHead": "bfcb32c90db7521bbfa54a7c31d8fe77594425e7"
81
+ "gitHead": "f1d4cc0ef3018924aff0a5c69f4664df24824c37"
82
82
  }
@@ -37,10 +37,7 @@ export async function downloadImagesFromFile(
37
37
  const localRootPath = filename ? dirname(filename) : rootPath;
38
38
 
39
39
  const imageResults = await Promise.all(
40
- imageUrls.map(async (imageUrl) => {
41
- const result = await downloadImage(imageUrl, localRootPath);
42
- return result;
43
- })
40
+ imageUrls.map(async (imageUrl) => await downloadImage(imageUrl, localRootPath))
44
41
  );
45
42
 
46
43
  const imagePathsMap = new Map<string, string>(
@@ -49,7 +46,11 @@ export async function downloadImagesFromFile(
49
46
 
50
47
  visit(root, function (node, index, parent) {
51
48
  if (node.type === 'image') {
52
- node.url = imagePathsMap.get(node.url) ?? node.url;
49
+ if (node.url.startsWith('/')) {
50
+ node.url = imagePathsMap.get(new URL(node.url, url.origin).toString()) ?? node.url;
51
+ } else {
52
+ node.url = imagePathsMap.get(node.url) ?? node.url;
53
+ }
53
54
  if (parent && typeof index === 'number') parent.children[index] = node;
54
55
  } else if (node.type === 'mdxJsxFlowElement') {
55
56
  const urlAttr = (node.attributes as Array<MdxJsxAttribute>).find(
@@ -21,6 +21,7 @@ import { rehypeToRemarkCustomComponents } from '../customComponents/plugin.js';
21
21
  import { selectiveRehypeRemark } from '../customComponents/selective.js';
22
22
  import { retrieveRootContent } from '../root/retrieve.js';
23
23
  import type { Result } from '../types/result.js';
24
+ import { unifiedRemoveBreadCrumbs } from '../utils/breadcrumbs.js';
24
25
  import { unifiedRemoveBreaks } from '../utils/breaks.js';
25
26
  import { unifiedRemoveClassNames } from '../utils/className.js';
26
27
  import { unifiedRemoveCopyButtons } from '../utils/copyButton.js';
@@ -39,6 +40,7 @@ import { unifiedRemovePositions } from '../utils/position.js';
39
40
  import { removeLeadingSlash, removeTrailingSlash } from '../utils/strings.js';
40
41
  import { remarkRemoveCodeBlocksInCells } from '../utils/tableCells.js';
41
42
  import { getDescriptionFromRoot, getTitleFromHeading } from '../utils/title.js';
43
+ import { unifiedRemoveTableOfContents } from '../utils/toc.js';
42
44
  import { remarkRemoveUpdatedAt } from '../utils/updatedAt.js';
43
45
  import { downloadImagesFromFile } from './images.js';
44
46
  import { htmlToHast } from './root.js';
@@ -78,6 +80,8 @@ export async function scrapePage(
78
80
 
79
81
  const mdastTree: MdastRoot = unified()
80
82
  .use(unifiedRemoveBreaks)
83
+ .use(unifiedRemoveBreadCrumbs)
84
+ .use(unifiedRemoveTableOfContents)
81
85
  .use(unifiedRemoveCopyButtons)
82
86
  .use(createCard)
83
87
  .use(createAccordion)
@@ -0,0 +1,31 @@
1
+ import type { Element } from 'hast';
2
+ import { visit } from 'unist-util-visit';
3
+
4
+ import { framework } from './detectFramework.js';
5
+
6
+ export function unifiedRemoveBreadCrumbs() {
7
+ return function (node: Element) {
8
+ return removeBreadCrumbs(node);
9
+ };
10
+ }
11
+
12
+ /**
13
+ * Docusaurus-specific function since their breadcrumbs
14
+ * are within the content in the `article` itself instead
15
+ * of outside of the `article` element
16
+ */
17
+ export function removeBreadCrumbs(node: Element) {
18
+ return visit(node, 'element', function (subNode, index, parent) {
19
+ if (
20
+ framework.vendor === 'docusaurus' &&
21
+ subNode.tagName === 'nav' &&
22
+ subNode.properties.className &&
23
+ Array.isArray(subNode.properties.className) &&
24
+ subNode.properties.className.includes('theme-doc-breadcrumbs') &&
25
+ parent &&
26
+ typeof index === 'number'
27
+ ) {
28
+ parent.children.splice(index, 1);
29
+ }
30
+ });
31
+ }
@@ -7,9 +7,11 @@ export function unifiedRemoveBreaks() {
7
7
  };
8
8
  }
9
9
 
10
- // ReadMe-specific function since they use breaks in between
11
- // every element, but either way our parser adds whitespace
12
- // automatically
10
+ /**
11
+ * ReadMe-specific function since they use breaks in between
12
+ * every element, but either way our parser adds whitespace
13
+ * automatically
14
+ */
13
15
  export function removeBreaks(node: Element) {
14
16
  return visit(node, 'element', function (subNode, index, parent) {
15
17
  if (subNode.tagName === 'br' && parent && typeof index === 'number') {
@@ -0,0 +1,31 @@
1
+ import type { Element } from 'hast';
2
+ import { visit } from 'unist-util-visit';
3
+
4
+ import { framework } from './detectFramework.js';
5
+
6
+ export function unifiedRemoveTableOfContents() {
7
+ return function (node: Element) {
8
+ return removeTableOfContents(node);
9
+ };
10
+ }
11
+
12
+ /**
13
+ * Docusaurus-specific function since their mobile ToC
14
+ * is within the content in the `article` itself instead
15
+ * of outside of the `article` element
16
+ */
17
+ export function removeTableOfContents(node: Element) {
18
+ return visit(node, 'element', function (subNode, index, parent) {
19
+ if (
20
+ framework.vendor === 'docusaurus' &&
21
+ subNode.tagName === 'div' &&
22
+ subNode.properties.className &&
23
+ Array.isArray(subNode.properties.className) &&
24
+ subNode.properties.className.includes('theme-doc-toc-mobile') &&
25
+ parent &&
26
+ typeof index === 'number'
27
+ ) {
28
+ parent.children.splice(index, 1);
29
+ }
30
+ });
31
+ }