@mintlify/scraping 4.0.5 → 4.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/bin/components/AccordionGroup.d.ts +3 -3
  2. package/bin/components/AccordionGroup.js +54 -27
  3. package/bin/components/AccordionGroup.js.map +1 -1
  4. package/bin/components/Card.js +3 -2
  5. package/bin/components/Card.js.map +1 -1
  6. package/bin/components/CardGroup.js +3 -6
  7. package/bin/components/CardGroup.js.map +1 -1
  8. package/bin/components/CodeGroup.d.ts +1 -1
  9. package/bin/components/CodeGroup.js +107 -79
  10. package/bin/components/CodeGroup.js.map +1 -1
  11. package/bin/components/Tabs.d.ts +1 -1
  12. package/bin/components/Tabs.js +50 -23
  13. package/bin/components/Tabs.js.map +1 -1
  14. package/bin/constants.js +3 -3
  15. package/bin/constants.js.map +1 -1
  16. package/bin/nav/listItems.js +0 -1
  17. package/bin/nav/listItems.js.map +1 -1
  18. package/bin/scrapingPipeline/color.d.ts +8 -0
  19. package/bin/scrapingPipeline/color.js +91 -0
  20. package/bin/scrapingPipeline/color.js.map +1 -0
  21. package/bin/scrapingPipeline/group.js +1 -3
  22. package/bin/scrapingPipeline/group.js.map +1 -1
  23. package/bin/scrapingPipeline/icon.d.ts +1 -1
  24. package/bin/scrapingPipeline/icon.js +7 -6
  25. package/bin/scrapingPipeline/icon.js.map +1 -1
  26. package/bin/scrapingPipeline/logo.js +13 -9
  27. package/bin/scrapingPipeline/logo.js.map +1 -1
  28. package/bin/scrapingPipeline/page.js +28 -9
  29. package/bin/scrapingPipeline/page.js.map +1 -1
  30. package/bin/scrapingPipeline/site.js +64 -7
  31. package/bin/scrapingPipeline/site.js.map +1 -1
  32. package/bin/scrapingPipeline/tabs.js +15 -10
  33. package/bin/scrapingPipeline/tabs.js.map +1 -1
  34. package/bin/scrapingPipeline/title.d.ts +2 -0
  35. package/bin/scrapingPipeline/title.js +34 -0
  36. package/bin/scrapingPipeline/title.js.map +1 -0
  37. package/bin/tabs/retrieveReadme.js +0 -1
  38. package/bin/tabs/retrieveReadme.js.map +1 -1
  39. package/bin/tsconfig.build.tsbuildinfo +1 -1
  40. package/bin/types/result.d.ts +1 -0
  41. package/bin/utils/breaks.d.ts +3 -0
  42. package/bin/utils/breaks.js +17 -0
  43. package/bin/utils/breaks.js.map +1 -0
  44. package/bin/utils/children.js +9 -3
  45. package/bin/utils/children.js.map +1 -1
  46. package/bin/utils/className.d.ts +0 -1
  47. package/bin/utils/className.js +1 -1
  48. package/bin/utils/className.js.map +1 -1
  49. package/bin/utils/copyButton.d.ts +3 -0
  50. package/bin/utils/copyButton.js +30 -0
  51. package/bin/utils/copyButton.js.map +1 -0
  52. package/bin/utils/emptyEmphasis.d.ts +2 -0
  53. package/bin/utils/emptyEmphasis.js +18 -0
  54. package/bin/utils/emptyEmphasis.js.map +1 -0
  55. package/bin/utils/emptyParagraphs.d.ts +0 -1
  56. package/bin/utils/emptyParagraphs.js +1 -1
  57. package/bin/utils/emptyParagraphs.js.map +1 -1
  58. package/bin/utils/formatEmphasis.d.ts +2 -0
  59. package/bin/utils/formatEmphasis.js +32 -0
  60. package/bin/utils/formatEmphasis.js.map +1 -0
  61. package/bin/utils/images.js +9 -1
  62. package/bin/utils/images.js.map +1 -1
  63. package/bin/utils/lists.d.ts +2 -0
  64. package/bin/utils/lists.js +21 -0
  65. package/bin/utils/lists.js.map +1 -0
  66. package/bin/utils/log.d.ts +17 -0
  67. package/bin/utils/log.js +15 -5
  68. package/bin/utils/log.js.map +1 -1
  69. package/bin/utils/metadata.d.ts +2 -0
  70. package/bin/utils/metadata.js +23 -0
  71. package/bin/utils/metadata.js.map +1 -0
  72. package/bin/utils/nestedRoots.d.ts +0 -1
  73. package/bin/utils/nestedRoots.js +1 -1
  74. package/bin/utils/nestedRoots.js.map +1 -1
  75. package/bin/utils/position.d.ts +0 -1
  76. package/bin/utils/position.js +1 -1
  77. package/bin/utils/position.js.map +1 -1
  78. package/bin/utils/tableCells.d.ts +2 -0
  79. package/bin/utils/tableCells.js +22 -0
  80. package/bin/utils/tableCells.js.map +1 -0
  81. package/bin/utils/title.d.ts +1 -0
  82. package/bin/utils/title.js +9 -3
  83. package/bin/utils/title.js.map +1 -1
  84. package/bin/utils/updatedAt.d.ts +2 -0
  85. package/bin/utils/updatedAt.js +21 -0
  86. package/bin/utils/updatedAt.js.map +1 -0
  87. package/package.json +2 -2
  88. package/src/components/AccordionGroup.ts +55 -25
  89. package/src/components/Card.ts +3 -2
  90. package/src/components/CardGroup.ts +3 -6
  91. package/src/components/CodeGroup.ts +127 -83
  92. package/src/components/Tabs.ts +57 -24
  93. package/src/constants.ts +3 -3
  94. package/src/nav/listItems.ts +1 -2
  95. package/src/scrapingPipeline/color.ts +107 -0
  96. package/src/scrapingPipeline/group.ts +1 -4
  97. package/src/scrapingPipeline/icon.ts +8 -6
  98. package/src/scrapingPipeline/logo.ts +14 -9
  99. package/src/scrapingPipeline/page.ts +30 -9
  100. package/src/scrapingPipeline/site.ts +83 -7
  101. package/src/scrapingPipeline/tabs.ts +15 -13
  102. package/src/scrapingPipeline/title.ts +38 -0
  103. package/src/tabs/retrieveReadme.ts +1 -2
  104. package/src/types/result.ts +1 -1
  105. package/src/utils/breaks.ts +19 -0
  106. package/src/utils/children.ts +10 -3
  107. package/src/utils/className.ts +1 -1
  108. package/src/utils/copyButton.ts +35 -0
  109. package/src/utils/emptyEmphasis.ts +18 -0
  110. package/src/utils/emptyParagraphs.ts +1 -1
  111. package/src/utils/formatEmphasis.ts +37 -0
  112. package/src/utils/images.ts +13 -2
  113. package/src/utils/lists.ts +22 -0
  114. package/src/utils/log.ts +18 -5
  115. package/src/utils/metadata.ts +26 -0
  116. package/src/utils/nestedRoots.ts +1 -1
  117. package/src/utils/position.ts +1 -1
  118. package/src/utils/tableCells.ts +23 -0
  119. package/src/utils/title.ts +10 -4
  120. package/src/utils/updatedAt.ts +25 -0
  121. package/bin/utils/escape.d.ts +0 -2
  122. package/bin/utils/escape.js +0 -25
  123. package/bin/utils/escape.js.map +0 -1
  124. package/src/utils/escape.ts +0 -30
@@ -0,0 +1,107 @@
1
+ import { Colors } from '@mintlify/models';
2
+ import type { Root as HastRoot } from 'hast';
3
+ import { CONTINUE, visit } from 'unist-util-visit';
4
+
5
+ import { framework } from '../utils/detectFramework.js';
6
+
7
+ function toHex(value: number) {
8
+ Math.round(value).toString(16).padStart(2, '0');
9
+ }
10
+
11
+ function checkValidHex(str: string | undefined): boolean {
12
+ if (!str) return false;
13
+ return /^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})$/.test(str);
14
+ }
15
+
16
+ function checkRgbBounds(...numbers: Array<number>): boolean {
17
+ for (const num of numbers) {
18
+ if (num < 0 || num > 255) return false;
19
+ }
20
+ return true;
21
+ }
22
+
23
+ function rgbToHex(color: string): string | undefined {
24
+ if (checkValidHex(color)) return color;
25
+ color = color.trim().toLowerCase();
26
+
27
+ let r: number | undefined, g: number | undefined, b: number | undefined;
28
+
29
+ if (/^\d+\s+\d+\s+\d+(\s+[0-9.]+)?$/.test(color)) {
30
+ [r, g, b] = color.split(/\s+/).map(Number);
31
+ } else {
32
+ const values = color.match(/^rgba?\((\d+),(\d+),(\d+)(?:,([0-9.]+))?\)$/);
33
+
34
+ if (!values) {
35
+ return undefined;
36
+ }
37
+
38
+ [, r, g, b] = values.map(Number);
39
+ }
40
+
41
+ if (!r || !g || !b) return undefined;
42
+
43
+ if (!checkRgbBounds(r, g, b)) return undefined;
44
+
45
+ return `#${toHex(r)}${toHex(g)}${toHex(b)}`.toUpperCase();
46
+ }
47
+
48
+ function getCssValue(cssString: string, key: string): string | undefined {
49
+ const regex = new RegExp(`${key}\\s*[:|,]\\s*([^;)]+)`, 'i');
50
+ const match = cssString.match(regex);
51
+ return match && match[1] ? match[1].trim() : undefined;
52
+ }
53
+
54
+ export const defaultColors = {
55
+ primary: '#0D9373',
56
+ light: '#55D799',
57
+ dark: '#0D9373',
58
+ };
59
+
60
+ export async function downloadColors(hast: HastRoot): Promise<Colors> {
61
+ if (framework.vendor === 'docusaurus') return defaultColors;
62
+
63
+ let primaryHexCode: string | undefined = undefined;
64
+ let lightHexCode: string | undefined = undefined;
65
+ visit(hast, 'element', function (node) {
66
+ if (node.tagName !== 'style') return CONTINUE;
67
+ if (
68
+ (framework.vendor === 'gitbook' && !!Object.keys(node.properties).length) ||
69
+ (framework.vendor === 'readme' && node.properties.title !== 'rm-custom-css')
70
+ )
71
+ return CONTINUE;
72
+
73
+ if (node.children.length !== 1 || !node.children[0] || node.children[0].type !== 'text')
74
+ return CONTINUE;
75
+
76
+ const cssStr = node.children[0].value;
77
+ const primaryColorKey =
78
+ framework.vendor === 'readme' ? '--color-link-primary' : '--primary-color-600';
79
+ const lightColorKey =
80
+ framework.vendor === 'readme' ? '--color-link-primary' : '--primary-color-400';
81
+
82
+ const primaryCssColorValue = getCssValue(cssStr, primaryColorKey);
83
+ const lightCssColorValue = getCssValue(cssStr, lightColorKey);
84
+ if (!primaryCssColorValue || !lightCssColorValue) return CONTINUE;
85
+
86
+ primaryHexCode = rgbToHex(primaryCssColorValue);
87
+ lightHexCode = rgbToHex(lightCssColorValue);
88
+ });
89
+
90
+ const isPrimaryValid = checkValidHex(primaryHexCode);
91
+ const isLightValid = checkValidHex(lightHexCode);
92
+
93
+ if (isPrimaryValid && isLightValid) {
94
+ return {
95
+ primary: primaryHexCode!,
96
+ light: lightHexCode,
97
+ dark: primaryHexCode,
98
+ };
99
+ } else if (isPrimaryValid) {
100
+ return {
101
+ primary: primaryHexCode!,
102
+ dark: primaryHexCode,
103
+ };
104
+ } else {
105
+ return defaultColors;
106
+ }
107
+ }
@@ -52,11 +52,8 @@ export async function scrapePageGroup(
52
52
  navGroup[0]?.origin ?? 'the URL provided'
53
53
  }${errorMessage}`
54
54
  );
55
- console.error(error);
56
55
  throw error;
57
56
  } finally {
58
- if (browser) {
59
- await browser.close();
60
- }
57
+ if (browser) await browser.close();
61
58
  }
62
59
  }
@@ -3,24 +3,26 @@ import { EXIT, visit } from 'unist-util-visit';
3
3
 
4
4
  import { downloadImage } from '../utils/images.js';
5
5
 
6
- export async function downloadFavicon(hast: HastRoot): Promise<string | undefined> {
6
+ export async function downloadFavicon(hast: HastRoot): Promise<string> {
7
7
  let src: string = '';
8
8
  visit(hast, 'element', function (node) {
9
9
  if (
10
10
  node.tagName === 'link' &&
11
- typeof node.properties.rel === 'string' &&
12
- (node.properties.rel === 'icon' || node.properties.rel === 'shortcut icon')
11
+ Array.isArray(node.properties.rel) &&
12
+ node.properties.rel.includes('icon')
13
13
  ) {
14
14
  src = node.properties.href as string;
15
15
  return EXIT;
16
16
  }
17
17
  });
18
18
 
19
- if (!src) return undefined;
19
+ if (!src) {
20
+ return '/favicon.svg';
21
+ }
20
22
 
21
23
  const res = await downloadImage(src, process.cwd());
22
- if (!res.success) return undefined;
23
- if (!res.data) return undefined;
24
+ if (!res.success) return '/favicon.svg';
25
+ if (!res.data) return '/favicon.svg';
24
26
 
25
27
  return res.data[1];
26
28
  }
@@ -88,17 +88,24 @@ export async function downloadLogos(
88
88
  ): Promise<string | { light: string; dark: string } | undefined> {
89
89
  url = new URL(url);
90
90
  const filepaths: Array<string> = [];
91
+
91
92
  if (browser) {
92
93
  const htmls: Array<string> = [];
93
94
 
94
- const page = await browser.newPage();
95
- await page.goto(url.toString(), {
96
- waitUntil: 'networkidle2',
97
- });
95
+ try {
96
+ const page = await browser.newPage();
97
+ await page.goto(url.toString(), {
98
+ waitUntil: 'networkidle2',
99
+ });
100
+
101
+ htmls.push(await page.content());
102
+ await page.click('.rm-ThemeToggle');
103
+ htmls.push(await page.content());
104
+ } catch {
105
+ // do nothing, it just means there's no theme toggle
106
+ }
98
107
 
99
- htmls.push(await page.content());
100
- await page.click('.rm-ThemeToggle');
101
- htmls.push(await page.content());
108
+ await browser.close();
102
109
 
103
110
  await Promise.all(
104
111
  htmls.map(async (html) => {
@@ -119,8 +126,6 @@ export async function downloadLogos(
119
126
  }
120
127
  }
121
128
 
122
- if (browser) await browser.close();
123
-
124
129
  const uniqueFilepaths = [...new Set(filepaths).values()];
125
130
 
126
131
  return uniqueFilepaths.length === 1
@@ -21,18 +21,25 @@ import { rehypeToRemarkCustomComponents } from '../customComponents/plugin.js';
21
21
  import { selectiveRehypeRemark } from '../customComponents/selective.js';
22
22
  import { retrieveRootContent } from '../root/retrieve.js';
23
23
  import type { Result } from '../types/result.js';
24
+ import { unifiedRemoveBreaks } from '../utils/breaks.js';
24
25
  import { unifiedRemoveClassNames } from '../utils/className.js';
26
+ import { unifiedRemoveCopyButtons } from '../utils/copyButton.js';
25
27
  import { detectFramework, framework } from '../utils/detectFramework.js';
28
+ import { remarkRemoveEmptyEmphases } from '../utils/emptyEmphasis.js';
26
29
  import { unifiedRemoveEmptyParagraphs } from '../utils/emptyParagraphs.js';
27
30
  import { getErrorMessage, logErrorResults } from '../utils/errors.js';
28
- import { escapeCharactersOutsideCodeBlocks } from '../utils/escape.js';
29
- import { write, writePage } from '../utils/file.js';
31
+ import { writePage } from '../utils/file.js';
32
+ import { remarkProperlyFormatEmphasis } from '../utils/formatEmphasis.js';
30
33
  import { removeHastComments } from '../utils/hastComments.js';
34
+ import { remarkSpaceListsOut } from '../utils/lists.js';
31
35
  import { log } from '../utils/log.js';
36
+ import { remarkRemoveBottomMetadata } from '../utils/metadata.js';
32
37
  import { unifiedRemoveNestedRoots } from '../utils/nestedRoots.js';
33
38
  import { unifiedRemovePositions } from '../utils/position.js';
34
39
  import { removeLeadingSlash, removeTrailingSlash } from '../utils/strings.js';
40
+ import { remarkRemoveCodeBlocksInCells } from '../utils/tableCells.js';
35
41
  import { getDescriptionFromRoot, getTitleFromHeading } from '../utils/title.js';
42
+ import { remarkRemoveUpdatedAt } from '../utils/updatedAt.js';
36
43
  import { downloadImagesFromFile } from './images.js';
37
44
  import { htmlToHast } from './root.js';
38
45
 
@@ -61,7 +68,8 @@ export async function scrapePage(
61
68
 
62
69
  const urlStr = url.toString();
63
70
  const content = retrieveRootContent(hast);
64
- if (!content) return { success: false, message: `${urlStr}: ${CONTENT_FAILURE_MSG}` };
71
+ if (!content)
72
+ return { success: false, message: `${urlStr}: ${CONTENT_FAILURE_MSG}`, data: [urlStr, ''] };
65
73
 
66
74
  const contentAsRoot: HastRoot = {
67
75
  type: 'root',
@@ -69,18 +77,19 @@ export async function scrapePage(
69
77
  };
70
78
 
71
79
  const mdastTree: MdastRoot = unified()
80
+ .use(unifiedRemoveBreaks)
81
+ .use(unifiedRemoveCopyButtons)
72
82
  .use(createCard)
73
83
  .use(createAccordion)
74
84
  .use(createFrame)
75
- .use(createTabs)
76
85
  .use(createCallout)
77
86
  .use(createCardGroup)
78
87
  .use(createAccordionGroup)
79
88
  .use(createCodeGroup)
89
+ .use(createTabs)
80
90
  .use(unifiedRemoveClassNames)
81
- .use(unifiedRemovePositions)
82
91
  .use(unifiedRemoveEmptyParagraphs)
83
- .use(escapeCharactersOutsideCodeBlocks)
92
+ .use(unifiedRemovePositions)
84
93
  .use(selectiveRehypeRemark)
85
94
 
86
95
  // Cleans up any nested components left untouched
@@ -89,6 +98,13 @@ export async function scrapePage(
89
98
  .use(rehypeToRemarkCustomComponents)
90
99
  .use(convertHeaderLinksToText)
91
100
  .use(unifiedRemoveNestedRoots)
101
+ .use(remarkSpaceListsOut)
102
+ .use(remarkRemoveBottomMetadata)
103
+ .use(remarkRemoveUpdatedAt)
104
+ .use(remarkRemoveEmptyEmphases)
105
+ .use(remarkProperlyFormatEmphasis)
106
+ .use(remarkRemoveCodeBlocksInCells)
107
+ // @ts-expect-error moving some of the pipeline around results in contentAsRoot being treated differently than its type which is Root Element
92
108
  .runSync(contentAsRoot) as MdastRoot;
93
109
 
94
110
  try {
@@ -111,13 +127,15 @@ export async function scrapePage(
111
127
  .use(remarkStringify)
112
128
  .stringify(mdastTree);
113
129
 
130
+ const resultStr = String(result).replace(/\n{3,}/g, '\n\n');
131
+
114
132
  if (opts.rootPath) {
115
133
  url = new URL(opts.rootPath, url.origin);
116
134
  } else if (url.origin === removeTrailingSlash(url.toString())) {
117
135
  url = new URL('home', new URL(url).origin);
118
136
  }
119
137
 
120
- writePage(url, opts.isOverviewPage ? 'Overview' : title, description, String(result));
138
+ writePage(url, opts.isOverviewPage ? 'Overview' : title, description, resultStr);
121
139
  return {
122
140
  success: true,
123
141
  data: opts.rootPath
@@ -125,8 +143,11 @@ export async function scrapePage(
125
143
  : undefined,
126
144
  };
127
145
  } catch (error) {
128
- write('error.json', JSON.stringify(mdastTree, undefined, 2));
129
146
  const errorMessage = getErrorMessage(error);
130
- return { success: false, message: `${urlStr}: ${MDAST_FAILURE_MSG}${errorMessage}` };
147
+ return {
148
+ success: false,
149
+ message: `${urlStr}: ${MDAST_FAILURE_MSG}${errorMessage}`,
150
+ data: [urlStr, ''],
151
+ };
131
152
  }
132
153
  }
@@ -11,11 +11,13 @@ import { detectFramework, framework } from '../utils/detectFramework.js';
11
11
  import { logErrorResults } from '../utils/errors.js';
12
12
  import { startPuppeteer } from '../utils/network.js';
13
13
  import { INDEX_NAMES, iterateThroughReservedNames } from '../utils/reservedNames.js';
14
- import { removeTrailingSlash } from '../utils/strings.js';
14
+ import { removeTrailingSlash, removeLeadingSlash } from '../utils/strings.js';
15
+ import { downloadColors } from './color.js';
15
16
  import { scrapePageGroup } from './group.js';
16
17
  import { downloadFavicon } from './icon.js';
17
18
  import { downloadLogos } from './logo.js';
18
19
  import { htmlToHast } from './root.js';
20
+ import { downloadTitle } from './title.js';
19
21
 
20
22
  export async function scrapeSite(
21
23
  html: string,
@@ -127,24 +129,98 @@ export async function scrapeSite(
127
129
  };
128
130
  });
129
131
 
132
+ const allErrors = [
133
+ ...externalResults.filter((result) => !result.success),
134
+ ...internalResults.filter((result) => !result.success),
135
+ ...rootResults.filter((result) => !result.success),
136
+ ];
137
+
138
+ const allErroredPaths = allErrors
139
+ .map((result) => {
140
+ if (result.data) {
141
+ const url = new URL(result.data[0]);
142
+ const pathname = url.pathname;
143
+ const normalizedPathname = removeLeadingSlash(removeTrailingSlash(pathname));
144
+ return normalizedPathname;
145
+ } else {
146
+ return '';
147
+ }
148
+ })
149
+ .filter(Boolean);
150
+
151
+ traverse(navItems).forEach(function (value) {
152
+ if (typeof value === 'string' && allErroredPaths.includes(value)) {
153
+ this.remove();
154
+ } else if (Array.isArray(value)) {
155
+ this.update(
156
+ value
157
+ .filter((item) =>
158
+ typeof item === 'string' && allErroredPaths.includes(item) ? undefined : item
159
+ )
160
+ .filter(Boolean)
161
+ );
162
+ }
163
+ });
164
+
165
+ let count = 1;
166
+ while (count > 0) {
167
+ count = 0;
168
+ traverse(navItems).forEach(function (value) {
169
+ if (Array.isArray(value) && value.filter(Boolean).length === 0) {
170
+ count++;
171
+ if (this.parent) {
172
+ this.parent.remove();
173
+ } else {
174
+ this.remove();
175
+ }
176
+ }
177
+ });
178
+ }
179
+
180
+ traverse(navItems).forEach(function (value) {
181
+ if (
182
+ typeof value === 'string' &&
183
+ (value.startsWith('https://') || value.startsWith('http://'))
184
+ ) {
185
+ this.remove();
186
+ } else if (
187
+ Array.isArray(value) &&
188
+ value.find(
189
+ (val) =>
190
+ typeof val === 'string' && (val.startsWith('https://') || val.startsWith('http://'))
191
+ )
192
+ ) {
193
+ this.update(
194
+ value.filter(
195
+ (val) =>
196
+ !(
197
+ typeof val === 'string' &&
198
+ (val.startsWith('https://') || val.startsWith('http://'))
199
+ )
200
+ )
201
+ );
202
+ }
203
+ });
204
+
130
205
  logErrorResults('linking to external pages', externalResults);
131
206
  logErrorResults('scraping your docs', [...internalResults, ...rootResults]);
132
207
 
133
- const browser = needsBrowser ? await startPuppeteer() : undefined;
208
+ const needsBrowserForLogos = framework.vendor === 'readme';
209
+ const browser = needsBrowserForLogos ? await startPuppeteer() : undefined;
134
210
 
135
211
  const favicon = await downloadFavicon(hast);
212
+ const colors = await downloadColors(hast);
136
213
  const logo = await downloadLogos(url, browser);
214
+ const name = await downloadTitle(hast);
137
215
 
138
216
  return {
139
217
  success: true,
140
218
  data: {
141
219
  $schema: 'https://mintlify.com/schema.json',
142
- name: '',
220
+ name,
143
221
  logo,
144
- colors: {
145
- primary: '',
146
- },
147
- favicon: favicon ?? '',
222
+ colors,
223
+ favicon,
148
224
  navigation: navItems as Navigation,
149
225
  tabs: opts.tabs,
150
226
  },
@@ -1,4 +1,4 @@
1
- import type { Navigation, NavigationEntry } from '@mintlify/models';
1
+ import type { Colors, Navigation, NavigationEntry } from '@mintlify/models';
2
2
  import { MintConfig, Tab } from '@mintlify/models';
3
3
 
4
4
  import { retrieveTabLinks } from '../tabs/retrieveReadme.js';
@@ -8,10 +8,11 @@ import { getErrorMessage } from '../utils/errors.js';
8
8
  import { log } from '../utils/log.js';
9
9
  import { fetchPageHtml, startPuppeteer } from '../utils/network.js';
10
10
  import { getTitleFromLink } from '../utils/title.js';
11
- import { downloadFavicon } from './icon.js';
11
+ import { defaultColors } from './color.js';
12
12
  import { downloadLogos } from './logo.js';
13
13
  import { htmlToHast } from './root.js';
14
14
  import { scrapeSite } from './site.js';
15
+ import { downloadTitle } from './title.js';
15
16
 
16
17
  export async function scrapeAllSiteTabs(
17
18
  html: string,
@@ -22,12 +23,6 @@ export async function scrapeAllSiteTabs(
22
23
 
23
24
  detectFramework(hast);
24
25
 
25
- const needsBrowser = framework.vendor === 'gitbook';
26
- const browser = needsBrowser ? await startPuppeteer() : undefined;
27
-
28
- const favicon = await downloadFavicon(hast);
29
- const logo = await downloadLogos(url, browser);
30
-
31
26
  if (framework.vendor === 'readme' || framework.vendor === 'docusaurus') {
32
27
  const links = retrieveTabLinks(hast);
33
28
  if (
@@ -59,12 +54,16 @@ export async function scrapeAllSiteTabs(
59
54
 
60
55
  const navigations: Array<NavigationEntry> = [];
61
56
  const tabs: Array<Tab> = [];
57
+ let favicon = '/favicon.svg';
58
+ let colors: Colors = defaultColors;
62
59
 
63
60
  const successes = results.filter((result) => result.success);
64
61
  successes.forEach((result) => {
65
62
  if (!result.data) return;
66
63
  navigations.push(...result.data.navigation);
67
64
  if (result.data.tabs) tabs.push(...result.data.tabs);
65
+ if (result.data.favicon !== '/favicon.svg') favicon = result.data.favicon;
66
+ if (result.data.colors !== defaultColors) colors = result.data.colors;
68
67
  });
69
68
 
70
69
  const failures = results.filter((result) => !result.success);
@@ -72,16 +71,19 @@ export async function scrapeAllSiteTabs(
72
71
  log('Failed to scrape tab' + result.message);
73
72
  });
74
73
 
74
+ const needsBrowser = framework.vendor === 'readme';
75
+ const browser = needsBrowser ? await startPuppeteer() : undefined;
76
+ const logo = await downloadLogos(url, browser);
77
+ const name = await downloadTitle(hast);
78
+
75
79
  return {
76
80
  success: true,
77
81
  data: {
78
82
  $schema: 'https://mintlify.com/schema.json',
79
- name: '',
83
+ name,
80
84
  logo,
81
- colors: {
82
- primary: '',
83
- },
84
- favicon: favicon ?? '',
85
+ colors,
86
+ favicon,
85
87
  navigation: navigations as Navigation,
86
88
  tabs,
87
89
  },
@@ -0,0 +1,38 @@
1
+ import type { Root as HastRoot } from 'hast';
2
+ import { CONTINUE, EXIT, visit } from 'unist-util-visit';
3
+
4
+ const defaultTitle = 'Enter name here';
5
+
6
+ export async function downloadTitle(hast: HastRoot): Promise<string> {
7
+ let text: string | undefined = undefined as string | undefined;
8
+
9
+ visit(hast, 'element', function (node) {
10
+ if (node.tagName !== 'title') return CONTINUE;
11
+
12
+ visit(node, 'text', function (subNode) {
13
+ text = subNode.value;
14
+ return EXIT;
15
+ });
16
+
17
+ if (text) {
18
+ return EXIT;
19
+ }
20
+ });
21
+
22
+ if (!text) return defaultTitle;
23
+
24
+ const title = text as string;
25
+ let siteGroupTitle = '';
26
+
27
+ if (title.includes('|')) {
28
+ siteGroupTitle = (title.split('|').at(-1) ?? '').trim() as string;
29
+ } else if (title.includes('–')) {
30
+ siteGroupTitle = (title.split('–').at(-1) ?? '').trim() as string;
31
+ } else if (title.includes('-')) {
32
+ siteGroupTitle = (title.split('-').at(-1) ?? '').trim() as string;
33
+ } else {
34
+ siteGroupTitle = title.trim();
35
+ }
36
+
37
+ return siteGroupTitle ? siteGroupTitle : defaultTitle;
38
+ }
@@ -8,7 +8,7 @@ import { findTitle, getTitleFromLink } from '../utils/title.js';
8
8
  export function retrieveTabLinks(rootNode: HastRoot): Array<Tab> | undefined {
9
9
  if (framework.vendor !== 'readme' && framework.vendor !== 'docusaurus') return undefined;
10
10
 
11
- let element: Element | undefined = undefined;
11
+ let element: Element | undefined = undefined as Element | undefined;
12
12
  visit(rootNode, 'element', function (node) {
13
13
  if (framework.vendor === 'readme') {
14
14
  if (
@@ -35,7 +35,6 @@ export function retrieveTabLinks(rootNode: HastRoot): Array<Tab> | undefined {
35
35
  }
36
36
  });
37
37
 
38
- // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
39
38
  if (!element) return undefined;
40
39
 
41
40
  const links: Array<Tab> = [];
@@ -1 +1 @@
1
- export type Result<T> = { success: true; data?: T } | { success: false; message: string };
1
+ export type Result<T> = { success: true; data?: T } | { success: false; message: string; data?: T };
@@ -0,0 +1,19 @@
1
+ import type { Element } from 'hast';
2
+ import { visit } from 'unist-util-visit';
3
+
4
+ export function unifiedRemoveBreaks() {
5
+ return function (node: Element) {
6
+ return removeBreaks(node);
7
+ };
8
+ }
9
+
10
+ // ReadMe-specific function since they use breaks in between
11
+ // every element, but either way our parser adds whitespace
12
+ // automatically
13
+ export function removeBreaks(node: Element) {
14
+ return visit(node, 'element', function (subNode, index, parent) {
15
+ if (subNode.tagName === 'br' && parent && typeof index === 'number') {
16
+ parent.children.splice(index, 1);
17
+ }
18
+ });
19
+ }
@@ -11,6 +11,7 @@ import type { State, Handle } from 'hast-util-to-mdast';
11
11
  import type { RootContent as MdastRootContent, Root as MdastRoot } from 'mdast';
12
12
  import { unified } from 'unified';
13
13
 
14
+ import { ESCAPED_COMPONENTS } from '../constants.js';
14
15
  import { mdxJsxFlowElementHandler } from '../customComponents/selective.js';
15
16
 
16
17
  export function turnChildrenIntoMdx(
@@ -19,19 +20,25 @@ export function turnChildrenIntoMdx(
19
20
  ): Array<MdastRootContent> {
20
21
  const hast: HastRoot = {
21
22
  type: 'root',
22
- children: children,
23
+ children,
23
24
  };
24
25
 
25
26
  const handlers: Record<string, Handle> = { ...defaultHandlers };
26
27
  if (opts.jsxImages) {
27
- handlers['img'] = function (_: State, node: Element) {
28
+ handlers['img'] = function (h: State, node: Element) {
28
29
  Object.keys(node.properties).forEach((key) => {
29
30
  if (key !== 'src') delete node.properties[key];
30
31
  });
31
- return mdxJsxFlowElementHandler(_, node);
32
+ return mdxJsxFlowElementHandler(h, node);
32
33
  };
33
34
  }
34
35
 
36
+ ESCAPED_COMPONENTS.forEach((component) => {
37
+ handlers[component] = function (h: State, node: Element) {
38
+ return mdxJsxFlowElementHandler(h, node);
39
+ };
40
+ });
41
+
35
42
  const mdxAst = unified()
36
43
  .use(function () {
37
44
  return function (tree: HastRoot): MdastRoot {
@@ -7,7 +7,7 @@ export function unifiedRemoveClassNames() {
7
7
  };
8
8
  }
9
9
 
10
- export function removeClassNames(node: Element) {
10
+ function removeClassNames(node: Element) {
11
11
  return visit(node, 'element', function (subNode) {
12
12
  if ('properties' in subNode) delete subNode.properties.className;
13
13
  });
@@ -0,0 +1,35 @@
1
+ import type { Root as HastRoot } from 'hast';
2
+ import { CONTINUE, EXIT, visit } from 'unist-util-visit';
3
+
4
+ export function unifiedRemoveCopyButtons() {
5
+ return function (root: HastRoot) {
6
+ return removeCopyButtons(root);
7
+ };
8
+ }
9
+
10
+ // GitBook specific, since they have a 'Copy' button in every
11
+ // code block which can't be not scraped since it's included
12
+ // in every HTML output
13
+ export function removeCopyButtons(root: HastRoot) {
14
+ visit(root, 'element', function (node, index, parent) {
15
+ if (
16
+ node.tagName !== 'button' ||
17
+ !Array.isArray(node.properties.className) ||
18
+ !node.properties.className.includes('group-hover/codeblock:opacity-[1]')
19
+ )
20
+ return CONTINUE;
21
+
22
+ let isCopyButton = false as boolean;
23
+ visit(node, 'text', function (textNode) {
24
+ if (textNode.value === 'Copy' || textNode.value === 'copy') {
25
+ isCopyButton = true;
26
+ return EXIT;
27
+ }
28
+ });
29
+
30
+ if (isCopyButton) {
31
+ if (!parent || typeof index !== 'number') return CONTINUE;
32
+ parent.children.splice(index, 1);
33
+ }
34
+ });
35
+ }
@@ -0,0 +1,18 @@
1
+ import type { Root as MdastRoot } from 'mdast';
2
+ import { CONTINUE, visit } from 'unist-util-visit';
3
+
4
+ export function remarkRemoveEmptyEmphases() {
5
+ return function (root: MdastRoot) {
6
+ return removeEmptyEmphases(root);
7
+ };
8
+ }
9
+
10
+ function removeEmptyEmphases(root: MdastRoot) {
11
+ visit(root, function (node, index, parent) {
12
+ if (node.type !== 'emphasis' && node.type !== 'strong') return CONTINUE;
13
+ if (node.children.length === 0) {
14
+ if (!parent || typeof index !== 'number') return CONTINUE;
15
+ parent.children.splice(index, 1);
16
+ }
17
+ });
18
+ }