@mintlify/scraping 4.0.5 → 4.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/bin/components/AccordionGroup.d.ts +3 -3
  2. package/bin/components/AccordionGroup.js +54 -27
  3. package/bin/components/AccordionGroup.js.map +1 -1
  4. package/bin/components/Card.js +3 -2
  5. package/bin/components/Card.js.map +1 -1
  6. package/bin/components/CardGroup.js +3 -6
  7. package/bin/components/CardGroup.js.map +1 -1
  8. package/bin/components/CodeGroup.d.ts +1 -1
  9. package/bin/components/CodeGroup.js +107 -79
  10. package/bin/components/CodeGroup.js.map +1 -1
  11. package/bin/components/Tabs.d.ts +1 -1
  12. package/bin/components/Tabs.js +50 -23
  13. package/bin/components/Tabs.js.map +1 -1
  14. package/bin/constants.js +3 -3
  15. package/bin/constants.js.map +1 -1
  16. package/bin/nav/listItems.js +0 -1
  17. package/bin/nav/listItems.js.map +1 -1
  18. package/bin/scrapingPipeline/color.d.ts +8 -0
  19. package/bin/scrapingPipeline/color.js +91 -0
  20. package/bin/scrapingPipeline/color.js.map +1 -0
  21. package/bin/scrapingPipeline/group.js +1 -3
  22. package/bin/scrapingPipeline/group.js.map +1 -1
  23. package/bin/scrapingPipeline/icon.d.ts +1 -1
  24. package/bin/scrapingPipeline/icon.js +7 -6
  25. package/bin/scrapingPipeline/icon.js.map +1 -1
  26. package/bin/scrapingPipeline/logo.js +13 -9
  27. package/bin/scrapingPipeline/logo.js.map +1 -1
  28. package/bin/scrapingPipeline/page.js +28 -9
  29. package/bin/scrapingPipeline/page.js.map +1 -1
  30. package/bin/scrapingPipeline/site.js +64 -7
  31. package/bin/scrapingPipeline/site.js.map +1 -1
  32. package/bin/scrapingPipeline/tabs.js +15 -10
  33. package/bin/scrapingPipeline/tabs.js.map +1 -1
  34. package/bin/scrapingPipeline/title.d.ts +2 -0
  35. package/bin/scrapingPipeline/title.js +34 -0
  36. package/bin/scrapingPipeline/title.js.map +1 -0
  37. package/bin/tabs/retrieveReadme.js +0 -1
  38. package/bin/tabs/retrieveReadme.js.map +1 -1
  39. package/bin/tsconfig.build.tsbuildinfo +1 -1
  40. package/bin/types/result.d.ts +1 -0
  41. package/bin/utils/breaks.d.ts +3 -0
  42. package/bin/utils/breaks.js +17 -0
  43. package/bin/utils/breaks.js.map +1 -0
  44. package/bin/utils/children.js +9 -3
  45. package/bin/utils/children.js.map +1 -1
  46. package/bin/utils/className.d.ts +0 -1
  47. package/bin/utils/className.js +1 -1
  48. package/bin/utils/className.js.map +1 -1
  49. package/bin/utils/copyButton.d.ts +3 -0
  50. package/bin/utils/copyButton.js +30 -0
  51. package/bin/utils/copyButton.js.map +1 -0
  52. package/bin/utils/emptyEmphasis.d.ts +2 -0
  53. package/bin/utils/emptyEmphasis.js +18 -0
  54. package/bin/utils/emptyEmphasis.js.map +1 -0
  55. package/bin/utils/emptyParagraphs.d.ts +0 -1
  56. package/bin/utils/emptyParagraphs.js +1 -1
  57. package/bin/utils/emptyParagraphs.js.map +1 -1
  58. package/bin/utils/formatEmphasis.d.ts +2 -0
  59. package/bin/utils/formatEmphasis.js +32 -0
  60. package/bin/utils/formatEmphasis.js.map +1 -0
  61. package/bin/utils/images.js +9 -1
  62. package/bin/utils/images.js.map +1 -1
  63. package/bin/utils/lists.d.ts +2 -0
  64. package/bin/utils/lists.js +21 -0
  65. package/bin/utils/lists.js.map +1 -0
  66. package/bin/utils/log.d.ts +17 -0
  67. package/bin/utils/log.js +15 -5
  68. package/bin/utils/log.js.map +1 -1
  69. package/bin/utils/metadata.d.ts +2 -0
  70. package/bin/utils/metadata.js +23 -0
  71. package/bin/utils/metadata.js.map +1 -0
  72. package/bin/utils/nestedRoots.d.ts +0 -1
  73. package/bin/utils/nestedRoots.js +1 -1
  74. package/bin/utils/nestedRoots.js.map +1 -1
  75. package/bin/utils/position.d.ts +0 -1
  76. package/bin/utils/position.js +1 -1
  77. package/bin/utils/position.js.map +1 -1
  78. package/bin/utils/tableCells.d.ts +2 -0
  79. package/bin/utils/tableCells.js +22 -0
  80. package/bin/utils/tableCells.js.map +1 -0
  81. package/bin/utils/title.d.ts +1 -0
  82. package/bin/utils/title.js +9 -3
  83. package/bin/utils/title.js.map +1 -1
  84. package/bin/utils/updatedAt.d.ts +2 -0
  85. package/bin/utils/updatedAt.js +21 -0
  86. package/bin/utils/updatedAt.js.map +1 -0
  87. package/package.json +2 -2
  88. package/src/components/AccordionGroup.ts +55 -25
  89. package/src/components/Card.ts +3 -2
  90. package/src/components/CardGroup.ts +3 -6
  91. package/src/components/CodeGroup.ts +127 -83
  92. package/src/components/Tabs.ts +57 -24
  93. package/src/constants.ts +3 -3
  94. package/src/nav/listItems.ts +1 -2
  95. package/src/scrapingPipeline/color.ts +107 -0
  96. package/src/scrapingPipeline/group.ts +1 -4
  97. package/src/scrapingPipeline/icon.ts +8 -6
  98. package/src/scrapingPipeline/logo.ts +14 -9
  99. package/src/scrapingPipeline/page.ts +30 -9
  100. package/src/scrapingPipeline/site.ts +83 -7
  101. package/src/scrapingPipeline/tabs.ts +15 -13
  102. package/src/scrapingPipeline/title.ts +38 -0
  103. package/src/tabs/retrieveReadme.ts +1 -2
  104. package/src/types/result.ts +1 -1
  105. package/src/utils/breaks.ts +19 -0
  106. package/src/utils/children.ts +10 -3
  107. package/src/utils/className.ts +1 -1
  108. package/src/utils/copyButton.ts +35 -0
  109. package/src/utils/emptyEmphasis.ts +18 -0
  110. package/src/utils/emptyParagraphs.ts +1 -1
  111. package/src/utils/formatEmphasis.ts +37 -0
  112. package/src/utils/images.ts +13 -2
  113. package/src/utils/lists.ts +22 -0
  114. package/src/utils/log.ts +18 -5
  115. package/src/utils/metadata.ts +26 -0
  116. package/src/utils/nestedRoots.ts +1 -1
  117. package/src/utils/position.ts +1 -1
  118. package/src/utils/tableCells.ts +23 -0
  119. package/src/utils/title.ts +10 -4
  120. package/src/utils/updatedAt.ts +25 -0
  121. package/bin/utils/escape.d.ts +0 -2
  122. package/bin/utils/escape.js +0 -25
  123. package/bin/utils/escape.js.map +0 -1
  124. package/src/utils/escape.ts +0 -30
@@ -7,7 +7,7 @@ export function unifiedRemoveEmptyParagraphs() {
7
7
  };
8
8
  }
9
9
 
10
- export function removeEmptyParagraphs(node: MdxJsxFlowElement) {
10
+ function removeEmptyParagraphs(node: MdxJsxFlowElement) {
11
11
  return visit(node, 'paragraph', function (subNode, index, parent) {
12
12
  let emptyChildrenCount = 0;
13
13
  for (const child of subNode.children) {
@@ -0,0 +1,37 @@
1
+ import type { Root as MdastRoot } from 'mdast';
2
+ import { CONTINUE, visit } from 'unist-util-visit';
3
+
4
+ export function remarkProperlyFormatEmphasis() {
5
+ return function (root: MdastRoot) {
6
+ return properlyFormatEmphasis(root);
7
+ };
8
+ }
9
+
10
+ const spaceNode = {
11
+ type: 'text' as const,
12
+ value: ' ',
13
+ };
14
+
15
+ function properlyFormatEmphasis(root: MdastRoot) {
16
+ visit(root, ['emphasis', 'strong'], function (node, index, parent) {
17
+ if (node.type !== 'emphasis' && node.type !== 'strong') return CONTINUE;
18
+ if (
19
+ node.children.length !== 1 ||
20
+ !node.children[0] ||
21
+ node.children[0].type !== 'text' ||
22
+ !parent ||
23
+ typeof index !== 'number'
24
+ )
25
+ return CONTINUE;
26
+
27
+ const child = node.children[0];
28
+ if (child.value.startsWith(' ') || child.value.endsWith(' ')) {
29
+ if (index !== 0 && child.value.startsWith(' ')) parent.children.splice(index, 0, spaceNode);
30
+ if (parent.children.length > index + 1 && child.value.endsWith(' '))
31
+ parent.children.splice(index + 1, 0, spaceNode);
32
+
33
+ child.value = child.value.trim();
34
+ node.children[0] = child;
35
+ }
36
+ });
37
+ }
@@ -84,7 +84,7 @@ export function getFilenameBeforeMetadata(src: string, ext: string): string {
84
84
  return src.slice(0, lengthUntilMetadata);
85
85
  }
86
86
 
87
- export function removeMetadataFromImageSrc(src: string) {
87
+ export function removeMetadataFromImageSrc(src: string): string {
88
88
  let filename = '';
89
89
  if (src.includes('gitbook/image')) {
90
90
  for (const ext of SUPPORTED_MEDIA_EXTENSIONS) {
@@ -93,8 +93,19 @@ export function removeMetadataFromImageSrc(src: string) {
93
93
  }
94
94
  }
95
95
  }
96
+
96
97
  if (!filename) {
97
- return src.split('#')[0]!.split('?')[0]!;
98
+ if (src.startsWith('http')) {
99
+ src = new URL(src).pathname;
100
+ }
101
+ filename =
102
+ decodeURIComponent(
103
+ src
104
+ .split('#')[0]!
105
+ .split('?')[0]!
106
+ .replace(/[\/]{2,}/g, '/')
107
+ ).replace(/(?:_{2,}|[\s%#&{}\\<>*?$!'":@+`|=])/g, '-') || 'image';
108
+ return filename;
98
109
  }
99
110
 
100
111
  return filename.split('%2F').slice(4).join('%2F');
@@ -0,0 +1,22 @@
1
+ import type { Root as MdastRoot } from 'mdast';
2
+ import { CONTINUE, visit } from 'unist-util-visit';
3
+
4
+ export function remarkSpaceListsOut() {
5
+ return function (root: MdastRoot) {
6
+ return spaceListsOut(root);
7
+ };
8
+ }
9
+
10
+ // ReadMe-specific function since they sometimes stack `<ol>`
11
+ // elements right on top of each other
12
+ function spaceListsOut(root: MdastRoot) {
13
+ return visit(root, 'list', (_, index, parent) => {
14
+ if (!parent || typeof index !== 'number') return CONTINUE;
15
+ if (index && index > 0 && parent.children[index - 1]?.type === 'list') {
16
+ parent.children.splice(index, 0, {
17
+ type: 'paragraph',
18
+ children: [{ type: 'text', value: '' }],
19
+ });
20
+ }
21
+ });
22
+ }
package/src/utils/log.ts CHANGED
@@ -7,8 +7,21 @@ export const colors = {
7
7
  cyan: '\x1b[36m',
8
8
  default: '\x1b[0m',
9
9
  } as const;
10
+
11
+ const noColors = {
12
+ red: '',
13
+ green: '',
14
+ yellow: '',
15
+ blue: '',
16
+ magenta: '',
17
+ cyan: '',
18
+ default: '',
19
+ } as const;
20
+
10
21
  const statuses = ['success', 'failure', 'error', 'warn', 'warning', 'info'] as const;
11
22
 
23
+ export const activeColors = process.stdout.isTTY ? colors : noColors;
24
+
12
25
  export type Status = (typeof statuses)[number];
13
26
  export type Color = keyof typeof colors;
14
27
 
@@ -18,7 +31,7 @@ export const infoIcon = 'ⓘ ' as const;
18
31
  export const warningIcon = '⚠ ' as const;
19
32
 
20
33
  export function log(message: string | unknown, statusOrColor?: Status): void {
21
- let color: (typeof colors)[Color] = colors.blue;
34
+ let color: (typeof activeColors)[Color] = activeColors.blue;
22
35
  let statusMsg: string = 'INFO ';
23
36
  let icon: string = infoIcon;
24
37
 
@@ -49,28 +62,28 @@ export function log(message: string | unknown, statusOrColor?: Status): void {
49
62
  break;
50
63
 
51
64
  case 'success':
52
- color = colors.green;
65
+ color = activeColors.green;
53
66
  statusMsg = 'SUCCESS';
54
67
  icon = checkIcon;
55
68
  break;
56
69
 
57
70
  case 'warn':
58
71
  case 'warning':
59
- color = colors.yellow;
72
+ color = activeColors.yellow;
60
73
  statusMsg = 'WARNING';
61
74
  icon = warningIcon;
62
75
  break;
63
76
 
64
77
  case 'failure':
65
78
  case 'error':
66
- color = colors.red;
79
+ color = activeColors.red;
67
80
  statusMsg = 'ERROR ';
68
81
  icon = xIcon;
69
82
  break;
70
83
  }
71
84
 
72
85
  console.log(
73
- `${color}${icon} ${statusMsg}${colors.default} - ${
86
+ `${color}${icon} ${statusMsg}${activeColors.default} - ${
74
87
  typeof message === 'string' ||
75
88
  typeof message === 'bigint' ||
76
89
  typeof message === 'number' ||
@@ -0,0 +1,26 @@
1
+ import type { Root as MdastRoot } from 'mdast';
2
+ import { visit } from 'unist-util-visit';
3
+
4
+ export function remarkRemoveBottomMetadata() {
5
+ return function (root: MdastRoot) {
6
+ return removeBottomMetadata(root);
7
+ };
8
+ }
9
+
10
+ function removeBottomMetadata(root: MdastRoot) {
11
+ if (root.children.at(-1)?.type === 'thematicBreak') {
12
+ root.children.pop();
13
+ }
14
+ if (root.children.at(-1)?.type === 'paragraph') {
15
+ let shouldDelete = false as boolean;
16
+ visit(root, 'text', function (node) {
17
+ if (
18
+ (node.value.startsWith('Updated') || node.value.startsWith('Last updated')) &&
19
+ node.value.endsWith('ago')
20
+ ) {
21
+ shouldDelete = true;
22
+ }
23
+ });
24
+ if (shouldDelete) root.children.pop();
25
+ }
26
+ }
@@ -11,7 +11,7 @@ export function unifiedRemoveNestedRoots() {
11
11
  };
12
12
  }
13
13
 
14
- export function removeNestedRoots(root: MdastRoot) {
14
+ function removeNestedRoots(root: MdastRoot) {
15
15
  visit(root, 'root', function (node, _, parent) {
16
16
  // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
17
17
  if (!parent) return CONTINUE;
@@ -7,7 +7,7 @@ export function unifiedRemovePositions() {
7
7
  };
8
8
  }
9
9
 
10
- export function removePositions(node: Element) {
10
+ function removePositions(node: Element) {
11
11
  return visit(node, function (subNode) {
12
12
  delete subNode.position;
13
13
  });
@@ -0,0 +1,23 @@
1
+ import type { Code, Parent, InlineCode, Root as MdastRoot } from 'mdast';
2
+ import { CONTINUE, visit } from 'unist-util-visit';
3
+
4
+ export function remarkRemoveCodeBlocksInCells() {
5
+ return function (root: MdastRoot) {
6
+ return removeCodeBlocksInCells(root);
7
+ };
8
+ }
9
+
10
+ // ReadMe specific, since they allow for `<pre>` blocks inside
11
+ // of table cells, could be supported elsewhere, but haven't seen it
12
+ function removeCodeBlocksInCells(root: MdastRoot) {
13
+ visit(root, 'tableCell', function (node) {
14
+ visit(node, 'code', function (subNode: Code, index, parent: Parent | undefined) {
15
+ if (!parent || typeof index !== 'number') return CONTINUE;
16
+ const newNode: InlineCode = {
17
+ type: 'inlineCode',
18
+ value: subNode.value,
19
+ };
20
+ (parent as Parent).children[index] = newNode;
21
+ });
22
+ });
23
+ }
@@ -4,7 +4,7 @@ import { visit, CONTINUE, EXIT } from 'unist-util-visit';
4
4
 
5
5
  export function findTitle(
6
6
  node: Element | ElementContent | BlockContent | MdastRoot | HastRoot | undefined,
7
- opts: { delete: boolean; nodeType?: string; tagName?: string } = {
7
+ opts: { delete: boolean; nodeType?: string; tagName?: string; escaped?: boolean } = {
8
8
  delete: true,
9
9
  nodeType: undefined,
10
10
  tagName: undefined,
@@ -25,7 +25,13 @@ export function findTitle(
25
25
  }
26
26
  });
27
27
  });
28
- return title.trim();
28
+
29
+ title = title.trim();
30
+ if (opts.escaped) {
31
+ return title.replace(/"/g, '\\"');
32
+ } else {
33
+ return title;
34
+ }
29
35
  }
30
36
 
31
37
  export function getTitleFromHeading(root: MdastRoot): string {
@@ -37,7 +43,7 @@ export function getTitleFromHeading(root: MdastRoot): string {
37
43
  }
38
44
  return EXIT;
39
45
  });
40
- return findTitle(headingElement);
46
+ return findTitle(headingElement, { delete: true, escaped: true });
41
47
  }
42
48
 
43
49
  export function getDescriptionFromRoot(root: MdastRoot): string {
@@ -51,7 +57,7 @@ export function getDescriptionFromRoot(root: MdastRoot): string {
51
57
  }
52
58
  return EXIT;
53
59
  });
54
- return findTitle(descriptionElement);
60
+ return findTitle(descriptionElement, { delete: true, escaped: true });
55
61
  }
56
62
 
57
63
  export function getTitleFromLink(url: string): string {
@@ -0,0 +1,25 @@
1
+ import type { Root as MdastRoot } from 'mdast';
2
+ import { CONTINUE, visit } from 'unist-util-visit';
3
+
4
+ export function remarkRemoveUpdatedAt() {
5
+ return function (root: MdastRoot) {
6
+ return removeUpdatedAt(root);
7
+ };
8
+ }
9
+
10
+ function removeUpdatedAt(root: MdastRoot) {
11
+ visit(root, 'paragraph', function (node) {
12
+ visit(node, 'text', function (subNode, index, parent) {
13
+ if (
14
+ (subNode.value.trim().startsWith('Updated') ||
15
+ subNode.value.trim().startsWith('Last updated')) &&
16
+ subNode.value.endsWith('ago')
17
+ ) {
18
+ if (parent && typeof index === 'number') {
19
+ parent.children.splice(index, 1);
20
+ return [CONTINUE, index];
21
+ }
22
+ }
23
+ });
24
+ });
25
+ }
@@ -1,2 +0,0 @@
1
- import type { Root } from 'hast';
2
- export declare function escapeCharactersOutsideCodeBlocks(): (tree: Root) => void;
@@ -1,25 +0,0 @@
1
- import { CONTINUE, SKIP, visit } from 'unist-util-visit';
2
- export function escapeCharactersOutsideCodeBlocks() {
3
- const charsToEscape = ['<', '{', '`'];
4
- const escapeRegex = new RegExp(`[${charsToEscape.join('')}]`, 'g');
5
- return function (tree) {
6
- visit(tree, function (node) {
7
- if (node.type === 'element') {
8
- if (node.tagName === 'pre' || node.tagName === 'code') {
9
- return SKIP;
10
- }
11
- }
12
- if (node.type === 'text') {
13
- const escapedText = node.value.replace(escapeRegex, (match) => {
14
- return `\\${match}`;
15
- });
16
- if (escapedText !== node.value) {
17
- node.value = escapedText;
18
- return CONTINUE;
19
- }
20
- }
21
- return CONTINUE;
22
- });
23
- };
24
- }
25
- //# sourceMappingURL=escape.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"escape.js","sourceRoot":"","sources":["../../src/utils/escape.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAEzD,MAAM,UAAU,iCAAiC;IAC/C,MAAM,aAAa,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;IACtC,MAAM,WAAW,GAAG,IAAI,MAAM,CAAC,IAAI,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAEnE,OAAO,UAAU,IAAU;QACzB,KAAK,CAAC,IAAI,EAAE,UAAU,IAAI;YACxB,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;gBAC5B,IAAI,IAAI,CAAC,OAAO,KAAK,KAAK,IAAI,IAAI,CAAC,OAAO,KAAK,MAAM,EAAE,CAAC;oBACtD,OAAO,IAAI,CAAC;gBACd,CAAC;YACH,CAAC;YAED,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACzB,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,KAAK,EAAE,EAAE;oBAC5D,OAAO,KAAK,KAAK,EAAE,CAAC;gBACtB,CAAC,CAAC,CAAC;gBAEH,IAAI,WAAW,KAAK,IAAI,CAAC,KAAK,EAAE,CAAC;oBAC/B,IAAI,CAAC,KAAK,GAAG,WAAW,CAAC;oBACzB,OAAO,QAAQ,CAAC;gBAClB,CAAC;YACH,CAAC;YAED,OAAO,QAAQ,CAAC;QAClB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC;AACJ,CAAC"}
@@ -1,30 +0,0 @@
1
- import type { Root } from 'hast';
2
- import { CONTINUE, SKIP, visit } from 'unist-util-visit';
3
-
4
- export function escapeCharactersOutsideCodeBlocks() {
5
- const charsToEscape = ['<', '{', '`'];
6
- const escapeRegex = new RegExp(`[${charsToEscape.join('')}]`, 'g');
7
-
8
- return function (tree: Root) {
9
- visit(tree, function (node) {
10
- if (node.type === 'element') {
11
- if (node.tagName === 'pre' || node.tagName === 'code') {
12
- return SKIP;
13
- }
14
- }
15
-
16
- if (node.type === 'text') {
17
- const escapedText = node.value.replace(escapeRegex, (match) => {
18
- return `\\${match}`;
19
- });
20
-
21
- if (escapedText !== node.value) {
22
- node.value = escapedText;
23
- return CONTINUE;
24
- }
25
- }
26
-
27
- return CONTINUE;
28
- });
29
- };
30
- }