@mintlify/scraping 4.0.5 → 4.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/components/AccordionGroup.d.ts +3 -3
- package/bin/components/AccordionGroup.js +54 -27
- package/bin/components/AccordionGroup.js.map +1 -1
- package/bin/components/Card.js +3 -2
- package/bin/components/Card.js.map +1 -1
- package/bin/components/CardGroup.js +3 -6
- package/bin/components/CardGroup.js.map +1 -1
- package/bin/components/CodeGroup.d.ts +1 -1
- package/bin/components/CodeGroup.js +107 -79
- package/bin/components/CodeGroup.js.map +1 -1
- package/bin/components/Tabs.d.ts +1 -1
- package/bin/components/Tabs.js +50 -23
- package/bin/components/Tabs.js.map +1 -1
- package/bin/constants.js +3 -3
- package/bin/constants.js.map +1 -1
- package/bin/nav/listItems.js +0 -1
- package/bin/nav/listItems.js.map +1 -1
- package/bin/scrapingPipeline/color.d.ts +8 -0
- package/bin/scrapingPipeline/color.js +91 -0
- package/bin/scrapingPipeline/color.js.map +1 -0
- package/bin/scrapingPipeline/group.js +1 -3
- package/bin/scrapingPipeline/group.js.map +1 -1
- package/bin/scrapingPipeline/icon.d.ts +1 -1
- package/bin/scrapingPipeline/icon.js +7 -6
- package/bin/scrapingPipeline/icon.js.map +1 -1
- package/bin/scrapingPipeline/logo.js +13 -9
- package/bin/scrapingPipeline/logo.js.map +1 -1
- package/bin/scrapingPipeline/page.js +28 -9
- package/bin/scrapingPipeline/page.js.map +1 -1
- package/bin/scrapingPipeline/site.js +64 -7
- package/bin/scrapingPipeline/site.js.map +1 -1
- package/bin/scrapingPipeline/tabs.js +15 -10
- package/bin/scrapingPipeline/tabs.js.map +1 -1
- package/bin/scrapingPipeline/title.d.ts +2 -0
- package/bin/scrapingPipeline/title.js +34 -0
- package/bin/scrapingPipeline/title.js.map +1 -0
- package/bin/tabs/retrieveReadme.js +0 -1
- package/bin/tabs/retrieveReadme.js.map +1 -1
- package/bin/tsconfig.build.tsbuildinfo +1 -1
- package/bin/types/result.d.ts +1 -0
- package/bin/utils/breaks.d.ts +3 -0
- package/bin/utils/breaks.js +17 -0
- package/bin/utils/breaks.js.map +1 -0
- package/bin/utils/children.js +9 -3
- package/bin/utils/children.js.map +1 -1
- package/bin/utils/className.d.ts +0 -1
- package/bin/utils/className.js +1 -1
- package/bin/utils/className.js.map +1 -1
- package/bin/utils/copyButton.d.ts +3 -0
- package/bin/utils/copyButton.js +30 -0
- package/bin/utils/copyButton.js.map +1 -0
- package/bin/utils/emptyEmphasis.d.ts +2 -0
- package/bin/utils/emptyEmphasis.js +18 -0
- package/bin/utils/emptyEmphasis.js.map +1 -0
- package/bin/utils/emptyParagraphs.d.ts +0 -1
- package/bin/utils/emptyParagraphs.js +1 -1
- package/bin/utils/emptyParagraphs.js.map +1 -1
- package/bin/utils/formatEmphasis.d.ts +2 -0
- package/bin/utils/formatEmphasis.js +32 -0
- package/bin/utils/formatEmphasis.js.map +1 -0
- package/bin/utils/images.js +9 -1
- package/bin/utils/images.js.map +1 -1
- package/bin/utils/lists.d.ts +2 -0
- package/bin/utils/lists.js +21 -0
- package/bin/utils/lists.js.map +1 -0
- package/bin/utils/log.d.ts +17 -0
- package/bin/utils/log.js +15 -5
- package/bin/utils/log.js.map +1 -1
- package/bin/utils/metadata.d.ts +2 -0
- package/bin/utils/metadata.js +23 -0
- package/bin/utils/metadata.js.map +1 -0
- package/bin/utils/nestedRoots.d.ts +0 -1
- package/bin/utils/nestedRoots.js +1 -1
- package/bin/utils/nestedRoots.js.map +1 -1
- package/bin/utils/position.d.ts +0 -1
- package/bin/utils/position.js +1 -1
- package/bin/utils/position.js.map +1 -1
- package/bin/utils/tableCells.d.ts +2 -0
- package/bin/utils/tableCells.js +22 -0
- package/bin/utils/tableCells.js.map +1 -0
- package/bin/utils/title.d.ts +1 -0
- package/bin/utils/title.js +9 -3
- package/bin/utils/title.js.map +1 -1
- package/bin/utils/updatedAt.d.ts +2 -0
- package/bin/utils/updatedAt.js +21 -0
- package/bin/utils/updatedAt.js.map +1 -0
- package/package.json +2 -2
- package/src/components/AccordionGroup.ts +55 -25
- package/src/components/Card.ts +3 -2
- package/src/components/CardGroup.ts +3 -6
- package/src/components/CodeGroup.ts +127 -83
- package/src/components/Tabs.ts +57 -24
- package/src/constants.ts +3 -3
- package/src/nav/listItems.ts +1 -2
- package/src/scrapingPipeline/color.ts +107 -0
- package/src/scrapingPipeline/group.ts +1 -4
- package/src/scrapingPipeline/icon.ts +8 -6
- package/src/scrapingPipeline/logo.ts +14 -9
- package/src/scrapingPipeline/page.ts +30 -9
- package/src/scrapingPipeline/site.ts +83 -7
- package/src/scrapingPipeline/tabs.ts +15 -13
- package/src/scrapingPipeline/title.ts +38 -0
- package/src/tabs/retrieveReadme.ts +1 -2
- package/src/types/result.ts +1 -1
- package/src/utils/breaks.ts +19 -0
- package/src/utils/children.ts +10 -3
- package/src/utils/className.ts +1 -1
- package/src/utils/copyButton.ts +35 -0
- package/src/utils/emptyEmphasis.ts +18 -0
- package/src/utils/emptyParagraphs.ts +1 -1
- package/src/utils/formatEmphasis.ts +37 -0
- package/src/utils/images.ts +13 -2
- package/src/utils/lists.ts +22 -0
- package/src/utils/log.ts +18 -5
- package/src/utils/metadata.ts +26 -0
- package/src/utils/nestedRoots.ts +1 -1
- package/src/utils/position.ts +1 -1
- package/src/utils/tableCells.ts +23 -0
- package/src/utils/title.ts +10 -4
- package/src/utils/updatedAt.ts +25 -0
- package/bin/utils/escape.d.ts +0 -2
- package/bin/utils/escape.js +0 -25
- package/bin/utils/escape.js.map +0 -1
- package/src/utils/escape.ts +0 -30
package/bin/components/Tabs.js
CHANGED
|
@@ -39,40 +39,67 @@ export function gitBookScrapeTabs(node, _, parent) {
|
|
|
39
39
|
};
|
|
40
40
|
return newNode;
|
|
41
41
|
}
|
|
42
|
-
export function readmeScrapeTabs(node, _,
|
|
42
|
+
export function readmeScrapeTabs(node, _, __) {
|
|
43
43
|
if ((node.tagName !== 'div' && node.tagName !== 'a') ||
|
|
44
44
|
!node.properties.className ||
|
|
45
45
|
!Array.isArray(node.properties.className) ||
|
|
46
|
-
!node.properties.className.includes('tabbed-component')
|
|
46
|
+
(!node.properties.className.includes('tabbed-component') &&
|
|
47
|
+
!node.properties.className.includes('tabs') &&
|
|
48
|
+
!node.properties.className.includes('Tabs'))) {
|
|
47
49
|
return undefined;
|
|
48
50
|
}
|
|
49
|
-
if (
|
|
51
|
+
if (!node.children[0] || !node.children[1])
|
|
50
52
|
return undefined;
|
|
51
|
-
const tabTitles = node.children[0];
|
|
52
53
|
const titles = [];
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
54
|
+
const tabContents = [];
|
|
55
|
+
if (node.children.length !== 2) {
|
|
56
|
+
visit(node, 'element', function (subNode) {
|
|
57
|
+
if (subNode.tagName !== 'label' && subNode.tagName !== 'button')
|
|
58
|
+
return CONTINUE;
|
|
59
|
+
let title = '';
|
|
60
|
+
visit(subNode, 'text', function (textNode) {
|
|
61
|
+
title += textNode.value;
|
|
62
|
+
});
|
|
63
|
+
titles.push(title.trim().replace('\n', ''));
|
|
57
64
|
});
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
65
|
+
tabContents.push(...node.children.filter((subNode) => {
|
|
66
|
+
if (subNode.type === 'element' &&
|
|
67
|
+
Array.isArray(subNode.properties.className) &&
|
|
68
|
+
(subNode.properties.className.includes('tab') ||
|
|
69
|
+
subNode.properties.className.includes('Tab') ||
|
|
70
|
+
subNode.properties.className.includes('tabbed-content') ||
|
|
71
|
+
subNode.properties.className.includes('tab-content')))
|
|
72
|
+
return true;
|
|
73
|
+
return false;
|
|
74
|
+
}));
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
const tabTitles = node.children[0];
|
|
78
|
+
visit(tabTitles, 'element', function (subNode) {
|
|
79
|
+
visit(subNode, 'text', function (textNode) {
|
|
80
|
+
titles.push(textNode.value);
|
|
81
|
+
return EXIT;
|
|
73
82
|
});
|
|
83
|
+
});
|
|
84
|
+
node.children.shift();
|
|
85
|
+
if (node.children[0].type === 'element') {
|
|
86
|
+
tabContents.push(...node.children[0].children);
|
|
74
87
|
}
|
|
75
88
|
}
|
|
89
|
+
const tabChildren = [];
|
|
90
|
+
tabContents.forEach((tab, index) => {
|
|
91
|
+
if (!titles[index])
|
|
92
|
+
return;
|
|
93
|
+
const children = turnChildrenIntoMdx([tab]);
|
|
94
|
+
tabChildren.push({
|
|
95
|
+
type: 'element',
|
|
96
|
+
tagName: 'Tab',
|
|
97
|
+
properties: {
|
|
98
|
+
title: titles[index],
|
|
99
|
+
},
|
|
100
|
+
children,
|
|
101
|
+
});
|
|
102
|
+
});
|
|
76
103
|
const newNode = {
|
|
77
104
|
type: 'element',
|
|
78
105
|
tagName: 'Tabs',
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"Tabs.js","sourceRoot":"","sources":["../../src/components/Tabs.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAEzD,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAE/C,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAE3D,MAAM,UAAU,iBAAiB,CAC/B,IAAc,EACd,CAAgB,EAChB,MAAsB;IAEtB,IAAI,IAAI,CAAC,OAAO,KAAK,KAAK,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QAC1F,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,OAAO;QACtC,IAAI,OAAO,CAAC,OAAO,KAAK,QAAQ;YAAE,OAAO,QAAQ,CAAC;QAClD,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,QAAQ;YACvC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,eAAe,CAAC,MAAM,CAAC,CAAC;IACxB,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC;IAExB,MAAM,QAAQ,GAAG,mBAAmB,CAAC,MAAM,CAAC,QAAQ,CAA0B,CAAC;IAC/E,MAAM,WAAW,GAA0B,EAAE,CAAC;IAC9C,KAAK,IAAI,UAAU,GAAG,CAAC,EAAE,UAAU,GAAG,QAAQ,CAAC,MAAM,EAAE,UAAU,EAAE,EAAE,CAAC;QACpE,MAAM,KAAK,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC;QACnC,IAAI,KAAK,EAAE,CAAC;YACV,WAAW,CAAC,IAAI,CAAC;gBACf,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,UAAU,EAAE;oBACV,KAAK,EAAE,MAAM,CAAC,UAAU,CAAC;iBAC1B;gBACD,QAAQ,EAAE,CAAC,KAAK,CAAC;aAClB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAY;QACvB,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,MAAM;QACf,UAAU,EAAE,EAAE;QACd,QAAQ,EAAE,WAAoC;KAC/C,CAAC;IAEF,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,IAAc,EACd,CAAgB,EAChB,
|
|
1
|
+
{"version":3,"file":"Tabs.js","sourceRoot":"","sources":["../../src/components/Tabs.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAEzD,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAE/C,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAE3D,MAAM,UAAU,iBAAiB,CAC/B,IAAc,EACd,CAAgB,EAChB,MAAsB;IAEtB,IAAI,IAAI,CAAC,OAAO,KAAK,KAAK,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QAC1F,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,OAAO;QACtC,IAAI,OAAO,CAAC,OAAO,KAAK,QAAQ;YAAE,OAAO,QAAQ,CAAC;QAClD,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,QAAQ;YACvC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,eAAe,CAAC,MAAM,CAAC,CAAC;IACxB,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC;IAExB,MAAM,QAAQ,GAAG,mBAAmB,CAAC,MAAM,CAAC,QAAQ,CAA0B,CAAC;IAC/E,MAAM,WAAW,GAA0B,EAAE,CAAC;IAC9C,KAAK,IAAI,UAAU,GAAG,CAAC,EAAE,UAAU,GAAG,QAAQ,CAAC,MAAM,EAAE,UAAU,EAAE,EAAE,CAAC;QACpE,MAAM,KAAK,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC;QACnC,IAAI,KAAK,EAAE,CAAC;YACV,WAAW,CAAC,IAAI,CAAC;gBACf,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,UAAU,EAAE;oBACV,KAAK,EAAE,MAAM,CAAC,UAAU,CAAC;iBAC1B;gBACD,QAAQ,EAAE,CAAC,KAAK,CAAC;aAClB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAY;QACvB,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,MAAM;QACf,UAAU,EAAE,EAAE;QACd,QAAQ,EAAE,WAAoC;KAC/C,CAAC;IAEF,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,IAAc,EACd,CAAgB,EAChB,EAAkB;IAElB,IACE,CAAC,IAAI,CAAC,OAAO,KAAK,KAAK,IAAI,IAAI,CAAC,OAAO,KAAK,GAAG,CAAC;QAChD,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS;QAC1B,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;QACzC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,kBAAkB,CAAC;YACtD,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC;YAC3C,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,EAC9C,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;QAAE,OAAO,SAAS,CAAC;IAE7D,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,MAAM,WAAW,GAAmB,EAAE,CAAC;IAEvC,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,OAAO;YACtC,IAAI,OAAO,CAAC,OAAO,KAAK,OAAO,IAAI,OAAO,CAAC,OAAO,KAAK,QAAQ;gBAAE,OAAO,QAAQ,CAAC;YAEjF,IAAI,KAAK,GAAG,EAAE,CAAC;YACf,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,QAAQ;gBACvC,KAAK,IAAI,QAAQ,CAAC,KAAK,CAAC;YAC1B,CAAC,CAAC,CAAC;YAEH,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC;QAC9C,CAAC,CAAC,CAAC;QAEH,WAAW,CAAC,IAAI,CACd,GAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE;YACnC,IACE,OAAO,CAAC,IAAI,KAAK,SAAS;gBAC1B,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC;gBAC3C,CAAC,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC;oBAC3C,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC;oBAC5C,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,gBAAgB,CAAC;oBACvD,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;gBAEvD,OAAO,IAAI,CAAC;YACd,OAAO,KAAK,CAAC;QACf,CAAC,CAAoB,CACtB,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;QAEnC,KAAK,CAAC,SAAS,EAAE,SAAS,EAAE,UAAU,OAAO;YAC3C,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,QAAQ;gBACvC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;gBAC5B,OAAO,IAAI,CAAC;YACd,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC;QACtB,IAAI,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;YACxC,WAAW,CAAC,IAAI,CAAC,GAAI,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAA2B,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;IAED,MAAM,WAAW,GAA0B,EAAE,CAAC;IAC9C,WAAW,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE;QACjC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;YAAE,OAAO;QAC3B,MAAM,QAAQ,GAAG,mBAAmB,CAAC,CAAC,GAAG,CAAC,CAA0B,CAAC;QACrE,WAAW,CAAC,IAAI,CAAC;YACf,IAAI,EAAE,SAAS;YACf,OAAO,EAAE,KAAK;YACd,UAAU,EAAE;gBACV,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC;aACrB;YACD,QAAQ;SACT,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,GAAY;QACvB,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,MAAM;QACf,UAAU,EAAE,EAAE;QACd,QAAQ,EAAE,WAAoC;KAC/C,CAAC;IAEF,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,IAAc,EACd,CAAgB,EAChB,MAAsB;IAEtB,IAAI,IAAI,CAAC,OAAO,KAAK,KAAK,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QAC1F,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,OAAO;QACtC,IAAI,OAAO,CAAC,OAAO,KAAK,IAAI;YAAE,OAAO,QAAQ,CAAC;QAC9C,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,QAAQ;YACvC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,eAAe,CAAC,MAAM,CAAC,CAAC;IACxB,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC;IAExB,MAAM,QAAQ,GAAG,mBAAmB,CAAC,MAAM,CAAC,QAAQ,CAA0B,CAAC;IAC/E,MAAM,WAAW,GAA0B,EAAE,CAAC;IAC9C,KAAK,IAAI,UAAU,GAAG,CAAC,EAAE,UAAU,GAAG,QAAQ,CAAC,MAAM,EAAE,UAAU,EAAE,EAAE,CAAC;QACpE,MAAM,KAAK,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC;QACnC,IAAI,KAAK,EAAE,CAAC;YACV,WAAW,CAAC,IAAI,CAAC;gBACf,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,UAAU,EAAE;oBACV,KAAK,EAAE,MAAM,CAAC,UAAU,CAAC;iBAC1B;gBACD,QAAQ,EAAE,CAAC,KAAK,CAAC;aAClB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAY;QACvB,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,MAAM;QACf,UAAU,EAAE,EAAE;QACd,QAAQ,EAAE,WAAoC;KAC/C,CAAC;IAEF,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
package/bin/constants.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { activeColors } from './utils/log.js';
|
|
2
2
|
export const SUPPORTED_MEDIA_EXTENSIONS = [
|
|
3
3
|
'png',
|
|
4
4
|
'jpeg',
|
|
@@ -44,6 +44,6 @@ ${SPACES}Please double check your documentation provider and ensure they are sup
|
|
|
44
44
|
${SPACES}We currently support: ReadMe, GitBook, and Docusaurus`;
|
|
45
45
|
export const MDAST_FAILURE_MSG = 'failed to convert MDAST to Markdown string';
|
|
46
46
|
export const FINAL_SUCCESS_MESSAGE = `We've successfully scraped your docs site.
|
|
47
|
-
${SPACES}We've downloaded the ${
|
|
48
|
-
${SPACES}into ${
|
|
47
|
+
${SPACES}We've downloaded the ${activeColors.cyan}\`navigation\`${activeColors.default} array (and if necessary, the ${activeColors.cyan}\`tabs\`${activeColors.default} array)
|
|
48
|
+
${SPACES}into ${activeColors.blue}\`mint.json\`${activeColors.default}.`;
|
|
49
49
|
//# sourceMappingURL=constants.js.map
|
package/bin/constants.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"constants.js","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"constants.js","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE9C,MAAM,CAAC,MAAM,0BAA0B,GAAG;IACxC,KAAK;IACL,MAAM;IACN,KAAK;IACL,MAAM;IACN,MAAM;IACN,KAAK;IACL,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,OAAO;IACP,KAAK;IACL,MAAM;IACN,MAAM;IACN,KAAK;CACN,CAAC;AAEF,MAAM,CAAC,MAAM,kBAAkB,GAAG;IAChC,WAAW;IACX,gBAAgB;IAChB,MAAM;IACN,SAAS;IACT,MAAM;IACN,KAAK;IACL,OAAO;IACP,MAAM;IACN,WAAW;IACX,WAAW;IACX,OAAO;IACP,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,KAAK;IACL,SAAS;CACD,CAAC;AAEX,MAAM,CAAC,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;AAErC,MAAM,CAAC,MAAM,mBAAmB,GAAG;EACjC,MAAM;EACN,MAAM,uDAAuD,CAAC;AAEhE,MAAM,CAAC,MAAM,eAAe,GAAG;EAC7B,MAAM;EACN,MAAM,uDAAuD,CAAC;AAEhE,MAAM,CAAC,MAAM,iBAAiB,GAAG,4CAA4C,CAAC;AAE9E,MAAM,CAAC,MAAM,qBAAqB,GAAG;EACnC,MAAM,wBAAwB,YAAY,CAAC,IAAI,iBAAiB,YAAY,CAAC,OAAO,iCAAiC,YAAY,CAAC,IAAI,WAAW,YAAY,CAAC,OAAO;EACrK,MAAM,QAAQ,YAAY,CAAC,IAAI,gBAAgB,YAAY,CAAC,OAAO,GAAG,CAAC"}
|
package/bin/nav/listItems.js
CHANGED
package/bin/nav/listItems.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"listItems.js","sourceRoot":"","sources":["../../src/nav/listItems.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAE/C,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,6BAA6B,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,EAAE,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAC3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAQjD,MAAM,UAAU,eAAe,CAC7B,IAAa,EACb,OAAwB;IACtB,cAAc,EAAE,KAAK;IACrB,gBAAgB,EAAE,IAAI;IACtB,KAAK,EAAE,SAAS;CACjB;IAED,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IACvC,IAAI,CAAC,IAAI;QAAE,OAAO,SAAS,CAAC;IAE5B,IAAI,QAAQ,GAAuB,SAAS,CAAC;IAC7C,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,IAA0B,CAAC;IAEtD,IAAI,QAAQ,KAAK,SAAS,IAAI,QAAQ,KAAK,GAAG,EAAE,CAAC;QAC/C,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,IAAI,kBAAkB,GAAG,
|
|
1
|
+
{"version":3,"file":"listItems.js","sourceRoot":"","sources":["../../src/nav/listItems.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAE/C,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,6BAA6B,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,EAAE,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAC3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAQjD,MAAM,UAAU,eAAe,CAC7B,IAAa,EACb,OAAwB;IACtB,cAAc,EAAE,KAAK;IACrB,gBAAgB,EAAE,IAAI;IACtB,KAAK,EAAE,SAAS;CACjB;IAED,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IACvC,IAAI,CAAC,IAAI;QAAE,OAAO,SAAS,CAAC;IAE5B,IAAI,QAAQ,GAAuB,SAAS,CAAC;IAC7C,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,IAA0B,CAAC;IAEtD,IAAI,QAAQ,KAAK,SAAS,IAAI,QAAQ,KAAK,GAAG,EAAE,CAAC;QAC/C,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,IAAI,kBAAkB,GAAG,KAAgB,CAAC;IAC1C,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,OAAO;QACtC,IACE,OAAO,CAAC,OAAO,KAAK,MAAM;YAC1B,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC;YAC3C,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,cAAc,CAAC,EACrD,CAAC;YACD,kBAAkB,GAAG,IAAI,CAAC;YAC1B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC,CAAC,CAAC;IACH,IAAI,kBAAkB;QAAE,OAAO,SAAS,CAAC;IAEzC,IAAI,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC;QAAE,QAAQ,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;IAE/D,MAAM,aAAa,GAAG,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;IAChE,MAAM,SAAS,GAAG,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAC9D,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;IACvB,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,OAAO,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;QACtD,IAAI,SAAS,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;YAClC,KAAK,GAAG,OAAO,CAAC,aAAa,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QACxD,CAAC;IACH,CAAC;IAED,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,IAAI,YAAY,GAAG,gBAAgB,CAAC,SAAS,CAAC,CAAC;IAC/C,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAC/B,CAAC,KAAK,EAAE,EAAE,CAAC,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,UAAU,CAAC,QAAQ,CAAC,CACnE;QACC,CAAC,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,kBAAkB;QACpD,CAAC,CAAC,QAAQ,CAAC;IAEb,IAAI,YAAY,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACpC,YAAY,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;YACpC,IAAI,KAAK,KAAK,QAAQ;gBAAE,YAAY,CAAC,KAAK,CAAC,GAAG,OAAO,CAAC;QACxD,CAAC,CAAC,CAAC;IACL,CAAC;SAAM,CAAC;QACN,YAAY,GAAG,aAAa,CAAC,OAAO,EAAE,YAAY,EAAE,IAAI,CAAC,CAAC;IAC5D,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,CAAC,OAAO,EAAE,YAAY,EAAE,IAAI,CAAC,EAAE,CAAC;AAC7E,CAAC"}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { CONTINUE, visit } from 'unist-util-visit';
|
|
2
|
+
import { framework } from '../utils/detectFramework.js';
|
|
3
|
+
function toHex(value) {
|
|
4
|
+
Math.round(value).toString(16).padStart(2, '0');
|
|
5
|
+
}
|
|
6
|
+
function checkValidHex(str) {
|
|
7
|
+
if (!str)
|
|
8
|
+
return false;
|
|
9
|
+
return /^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})$/.test(str);
|
|
10
|
+
}
|
|
11
|
+
function checkRgbBounds(...numbers) {
|
|
12
|
+
for (const num of numbers) {
|
|
13
|
+
if (num < 0 || num > 255)
|
|
14
|
+
return false;
|
|
15
|
+
}
|
|
16
|
+
return true;
|
|
17
|
+
}
|
|
18
|
+
function rgbToHex(color) {
|
|
19
|
+
if (checkValidHex(color))
|
|
20
|
+
return color;
|
|
21
|
+
color = color.trim().toLowerCase();
|
|
22
|
+
let r, g, b;
|
|
23
|
+
if (/^\d+\s+\d+\s+\d+(\s+[0-9.]+)?$/.test(color)) {
|
|
24
|
+
[r, g, b] = color.split(/\s+/).map(Number);
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
const values = color.match(/^rgba?\((\d+),(\d+),(\d+)(?:,([0-9.]+))?\)$/);
|
|
28
|
+
if (!values) {
|
|
29
|
+
return undefined;
|
|
30
|
+
}
|
|
31
|
+
[, r, g, b] = values.map(Number);
|
|
32
|
+
}
|
|
33
|
+
if (!r || !g || !b)
|
|
34
|
+
return undefined;
|
|
35
|
+
if (!checkRgbBounds(r, g, b))
|
|
36
|
+
return undefined;
|
|
37
|
+
return `#${toHex(r)}${toHex(g)}${toHex(b)}`.toUpperCase();
|
|
38
|
+
}
|
|
39
|
+
function getCssValue(cssString, key) {
|
|
40
|
+
const regex = new RegExp(`${key}\\s*[:|,]\\s*([^;)]+)`, 'i');
|
|
41
|
+
const match = cssString.match(regex);
|
|
42
|
+
return match && match[1] ? match[1].trim() : undefined;
|
|
43
|
+
}
|
|
44
|
+
export const defaultColors = {
|
|
45
|
+
primary: '#0D9373',
|
|
46
|
+
light: '#55D799',
|
|
47
|
+
dark: '#0D9373',
|
|
48
|
+
};
|
|
49
|
+
export async function downloadColors(hast) {
|
|
50
|
+
if (framework.vendor === 'docusaurus')
|
|
51
|
+
return defaultColors;
|
|
52
|
+
let primaryHexCode = undefined;
|
|
53
|
+
let lightHexCode = undefined;
|
|
54
|
+
visit(hast, 'element', function (node) {
|
|
55
|
+
if (node.tagName !== 'style')
|
|
56
|
+
return CONTINUE;
|
|
57
|
+
if ((framework.vendor === 'gitbook' && !!Object.keys(node.properties).length) ||
|
|
58
|
+
(framework.vendor === 'readme' && node.properties.title !== 'rm-custom-css'))
|
|
59
|
+
return CONTINUE;
|
|
60
|
+
if (node.children.length !== 1 || !node.children[0] || node.children[0].type !== 'text')
|
|
61
|
+
return CONTINUE;
|
|
62
|
+
const cssStr = node.children[0].value;
|
|
63
|
+
const primaryColorKey = framework.vendor === 'readme' ? '--color-link-primary' : '--primary-color-600';
|
|
64
|
+
const lightColorKey = framework.vendor === 'readme' ? '--color-link-primary' : '--primary-color-400';
|
|
65
|
+
const primaryCssColorValue = getCssValue(cssStr, primaryColorKey);
|
|
66
|
+
const lightCssColorValue = getCssValue(cssStr, lightColorKey);
|
|
67
|
+
if (!primaryCssColorValue || !lightCssColorValue)
|
|
68
|
+
return CONTINUE;
|
|
69
|
+
primaryHexCode = rgbToHex(primaryCssColorValue);
|
|
70
|
+
lightHexCode = rgbToHex(lightCssColorValue);
|
|
71
|
+
});
|
|
72
|
+
const isPrimaryValid = checkValidHex(primaryHexCode);
|
|
73
|
+
const isLightValid = checkValidHex(lightHexCode);
|
|
74
|
+
if (isPrimaryValid && isLightValid) {
|
|
75
|
+
return {
|
|
76
|
+
primary: primaryHexCode,
|
|
77
|
+
light: lightHexCode,
|
|
78
|
+
dark: primaryHexCode,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
else if (isPrimaryValid) {
|
|
82
|
+
return {
|
|
83
|
+
primary: primaryHexCode,
|
|
84
|
+
dark: primaryHexCode,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
else {
|
|
88
|
+
return defaultColors;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
//# sourceMappingURL=color.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"color.js","sourceRoot":"","sources":["../../src/scrapingPipeline/color.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAEnD,OAAO,EAAE,SAAS,EAAE,MAAM,6BAA6B,CAAC;AAExD,SAAS,KAAK,CAAC,KAAa;IAC1B,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AAClD,CAAC;AAED,SAAS,aAAa,CAAC,GAAuB;IAC5C,IAAI,CAAC,GAAG;QAAE,OAAO,KAAK,CAAC;IACvB,OAAO,oCAAoC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACxD,CAAC;AAED,SAAS,cAAc,CAAC,GAAG,OAAsB;IAC/C,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,IAAI,GAAG,GAAG,CAAC,IAAI,GAAG,GAAG,GAAG;YAAE,OAAO,KAAK,CAAC;IACzC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,QAAQ,CAAC,KAAa;IAC7B,IAAI,aAAa,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACvC,KAAK,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAEnC,IAAI,CAAqB,EAAE,CAAqB,EAAE,CAAqB,CAAC;IAExE,IAAI,gCAAgC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QACjD,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAC7C,CAAC;SAAM,CAAC;QACN,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC,6CAA6C,CAAC,CAAC;QAE1E,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACnC,CAAC;IAED,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC;QAAE,OAAO,SAAS,CAAC;IAErC,IAAI,CAAC,cAAc,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;QAAE,OAAO,SAAS,CAAC;IAE/C,OAAO,IAAI,KAAK,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC;AAC5D,CAAC;AAED,SAAS,WAAW,CAAC,SAAiB,EAAE,GAAW;IACjD,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,GAAG,GAAG,uBAAuB,EAAE,GAAG,CAAC,CAAC;IAC7D,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACrC,OAAO,KAAK,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;AACzD,CAAC;AAED,MAAM,CAAC,MAAM,aAAa,GAAG;IAC3B,OAAO,EAAE,SAAS;IAClB,KAAK,EAAE,SAAS;IAChB,IAAI,EAAE,SAAS;CAChB,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,IAAc;IACjD,IAAI,SAAS,CAAC,MAAM,KAAK,YAAY;QAAE,OAAO,aAAa,CAAC;IAE5D,IAAI,cAAc,GAAuB,SAAS,CAAC;IACnD,IAAI,YAAY,GAAuB,SAAS,CAAC;IACjD,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,IAAI;QACnC,IAAI,IAAI,CAAC,OAAO,KAAK,OAAO;YAAE,OAAO,QAAQ,CAAC;QAC9C,IACE,CAAC,SAAS,CAAC,MAAM,KAAK,SAAS,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC;YACzE,CAAC,SAAS,CAAC,MAAM,KAAK,QAAQ,IAAI,IAAI,CAAC,UAAU,CAAC,KAAK,KAAK,eAAe,CAAC;YAE5E,OAAO,QAAQ,CAAC;QAElB,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM;YACrF,OAAO,QAAQ,CAAC;QAElB,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;QACtC,MAAM,eAAe,GACnB,SAAS,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,qBAAqB,CAAC;QACjF,MAAM,aAAa,GACjB,SAAS,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,qBAAqB,CAAC;QAEjF,MAAM,oBAAoB,GAAG,WAAW,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;QAClE,MAAM,kBAAkB,GAAG,WAAW,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;QAC9D,IAAI,CAAC,oBAAoB,IAAI,CAAC,kBAAkB;YAAE,OAAO,QAAQ,CAAC;QAElE,cAAc,GAAG,QAAQ,CAAC,oBAAoB,CAAC,CAAC;QAChD,YAAY,GAAG,QAAQ,CAAC,kBAAkB,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,aAAa,CAAC,cAAc,CAAC,CAAC;IACrD,MAAM,YAAY,GAAG,aAAa,CAAC,YAAY,CAAC,CAAC;IAEjD,IAAI,cAAc,IAAI,YAAY,EAAE,CAAC;QACnC,OAAO;YACL,OAAO,EAAE,cAAe;YACxB,KAAK,EAAE,YAAY;YACnB,IAAI,EAAE,cAAc;SACrB,CAAC;IACJ,CAAC;SAAM,IAAI,cAAc,EAAE,CAAC;QAC1B,OAAO;YACL,OAAO,EAAE,cAAe;YACxB,IAAI,EAAE,cAAc;SACrB,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,OAAO,aAAa,CAAC;IACvB,CAAC;AACH,CAAC"}
|
|
@@ -34,13 +34,11 @@ export async function scrapePageGroup(navGroup, needsBrowser, opts = { externalL
|
|
|
34
34
|
catch (error) {
|
|
35
35
|
const errorMessage = getErrorMessage(error);
|
|
36
36
|
log(`We encountered an error when scraping the page group from ${navGroup[0]?.origin ?? 'the URL provided'}${errorMessage}`);
|
|
37
|
-
console.error(error);
|
|
38
37
|
throw error;
|
|
39
38
|
}
|
|
40
39
|
finally {
|
|
41
|
-
if (browser)
|
|
40
|
+
if (browser)
|
|
42
41
|
await browser.close();
|
|
43
|
-
}
|
|
44
42
|
}
|
|
45
43
|
}
|
|
46
44
|
//# sourceMappingURL=group.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"group.js","sourceRoot":"","sources":["../../src/scrapingPipeline/group.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACpE,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEvC,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,QAAoB,EACpB,YAAqB,EACrB,OAGI,EAAE,aAAa,EAAE,KAAK,EAAE;IAE5B,MAAM,OAAO,GAAwB,YAAY,CAAC,CAAC,CAAC,MAAM,cAAc,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAEvF,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAC3B,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE;YAChC,IAAI,CAAC;gBACH,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;oBACvB,MAAM,GAAG,GAAG,UAAU,CAAC,iBAAiB,KAAK,EAAE,EAAE,GAAG,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC;oBAC9E,OAAO,GAAG,CAAC;gBACb,CAAC;gBAED,IAAI,cAAc,GAAG,KAAK,CAAC;gBAC3B,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EAAE,CAAC;oBAChD,cAAc,GAAG,IAAI,CAAC;oBACtB,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC,CAAC;gBAChE,CAAC;gBAED,MAAM,IAAI,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC/C,MAAM,GAAG,GAAG,UAAU,CAAC,IAAI,EAAE,GAAG,EAAE;oBAChC,YAAY,EAAE,KAAK;oBACnB,cAAc;oBACd,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS;iBAC7D,CAAC,CAAC;gBACH,OAAO,GAAG,CAAC;YACb,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;gBAC5C,MAAM,IAAI,KAAK,CAAC,yCAAyC,GAAG,GAAG,YAAY,EAAE,CAAC,CAAC;YACjF,CAAC;QACH,CAAC,CAAC,CACH,CAAC;QACF,OAAO,GAAG,CAAC;IACb,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,GAAG,CACD,6DACE,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,kBACzB,GAAG,YAAY,EAAE,CAClB,CAAC;QACF,
|
|
1
|
+
{"version":3,"file":"group.js","sourceRoot":"","sources":["../../src/scrapingPipeline/group.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACpE,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEvC,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,QAAoB,EACpB,YAAqB,EACrB,OAGI,EAAE,aAAa,EAAE,KAAK,EAAE;IAE5B,MAAM,OAAO,GAAwB,YAAY,CAAC,CAAC,CAAC,MAAM,cAAc,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAEvF,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,CAC3B,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE;YAChC,IAAI,CAAC;gBACH,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;oBACvB,MAAM,GAAG,GAAG,UAAU,CAAC,iBAAiB,KAAK,EAAE,EAAE,GAAG,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC;oBAC9E,OAAO,GAAG,CAAC;gBACb,CAAC;gBAED,IAAI,cAAc,GAAG,KAAK,CAAC;gBAC3B,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EAAE,CAAC;oBAChD,cAAc,GAAG,IAAI,CAAC;oBACtB,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC,CAAC;gBAChE,CAAC;gBAED,MAAM,IAAI,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAC/C,MAAM,GAAG,GAAG,UAAU,CAAC,IAAI,EAAE,GAAG,EAAE;oBAChC,YAAY,EAAE,KAAK;oBACnB,cAAc;oBACd,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS;iBAC7D,CAAC,CAAC;gBACH,OAAO,GAAG,CAAC;YACb,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;gBAC5C,MAAM,IAAI,KAAK,CAAC,yCAAyC,GAAG,GAAG,YAAY,EAAE,CAAC,CAAC;YACjF,CAAC;QACH,CAAC,CAAC,CACH,CAAC;QACF,OAAO,GAAG,CAAC;IACb,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,GAAG,CACD,6DACE,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,kBACzB,GAAG,YAAY,EAAE,CAClB,CAAC;QACF,MAAM,KAAK,CAAC;IACd,CAAC;YAAS,CAAC;QACT,IAAI,OAAO;YAAE,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;IACrC,CAAC;AACH,CAAC"}
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
import type { Root as HastRoot } from 'hast';
|
|
2
|
-
export declare function downloadFavicon(hast: HastRoot): Promise<string
|
|
2
|
+
export declare function downloadFavicon(hast: HastRoot): Promise<string>;
|
|
@@ -4,19 +4,20 @@ export async function downloadFavicon(hast) {
|
|
|
4
4
|
let src = '';
|
|
5
5
|
visit(hast, 'element', function (node) {
|
|
6
6
|
if (node.tagName === 'link' &&
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
Array.isArray(node.properties.rel) &&
|
|
8
|
+
node.properties.rel.includes('icon')) {
|
|
9
9
|
src = node.properties.href;
|
|
10
10
|
return EXIT;
|
|
11
11
|
}
|
|
12
12
|
});
|
|
13
|
-
if (!src)
|
|
14
|
-
return
|
|
13
|
+
if (!src) {
|
|
14
|
+
return '/favicon.svg';
|
|
15
|
+
}
|
|
15
16
|
const res = await downloadImage(src, process.cwd());
|
|
16
17
|
if (!res.success)
|
|
17
|
-
return
|
|
18
|
+
return '/favicon.svg';
|
|
18
19
|
if (!res.data)
|
|
19
|
-
return
|
|
20
|
+
return '/favicon.svg';
|
|
20
21
|
return res.data[1];
|
|
21
22
|
}
|
|
22
23
|
//# sourceMappingURL=icon.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"icon.js","sourceRoot":"","sources":["../../src/scrapingPipeline/icon.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAE/C,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAAc;IAClD,IAAI,GAAG,GAAW,EAAE,CAAC;IACrB,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,IAAI;QACnC,IACE,IAAI,CAAC,OAAO,KAAK,MAAM;YACvB,OAAO,IAAI,CAAC,UAAU,CAAC,GAAG,
|
|
1
|
+
{"version":3,"file":"icon.js","sourceRoot":"","sources":["../../src/scrapingPipeline/icon.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAE/C,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAAc;IAClD,IAAI,GAAG,GAAW,EAAE,CAAC;IACrB,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,IAAI;QACnC,IACE,IAAI,CAAC,OAAO,KAAK,MAAM;YACvB,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAClC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,EACpC,CAAC;YACD,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,IAAc,CAAC;YACrC,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,cAAc,CAAC;IACxB,CAAC;IAED,MAAM,GAAG,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;IACpD,IAAI,CAAC,GAAG,CAAC,OAAO;QAAE,OAAO,cAAc,CAAC;IACxC,IAAI,CAAC,GAAG,CAAC,IAAI;QAAE,OAAO,cAAc,CAAC;IAErC,OAAO,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACrB,CAAC"}
|
|
@@ -64,13 +64,19 @@ export async function downloadLogos(url, browser) {
|
|
|
64
64
|
const filepaths = [];
|
|
65
65
|
if (browser) {
|
|
66
66
|
const htmls = [];
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
67
|
+
try {
|
|
68
|
+
const page = await browser.newPage();
|
|
69
|
+
await page.goto(url.toString(), {
|
|
70
|
+
waitUntil: 'networkidle2',
|
|
71
|
+
});
|
|
72
|
+
htmls.push(await page.content());
|
|
73
|
+
await page.click('.rm-ThemeToggle');
|
|
74
|
+
htmls.push(await page.content());
|
|
75
|
+
}
|
|
76
|
+
catch {
|
|
77
|
+
// do nothing, it just means there's no theme toggle
|
|
78
|
+
}
|
|
79
|
+
await browser.close();
|
|
74
80
|
await Promise.all(htmls.map(async (html) => {
|
|
75
81
|
return await findLogosFromHtml(html, findReadmeLogoNodes, filepaths);
|
|
76
82
|
}));
|
|
@@ -85,8 +91,6 @@ export async function downloadLogos(url, browser) {
|
|
|
85
91
|
log(`Failed to retrieve logo from HTML: ${errorMessage}`);
|
|
86
92
|
}
|
|
87
93
|
}
|
|
88
|
-
if (browser)
|
|
89
|
-
await browser.close();
|
|
90
94
|
const uniqueFilepaths = [...new Set(filepaths).values()];
|
|
91
95
|
return uniqueFilepaths.length === 1
|
|
92
96
|
? uniqueFilepaths[0]
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"logo.js","sourceRoot":"","sources":["../../src/scrapingPipeline/logo.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAE/C,OAAO,EAAE,SAAS,EAAE,MAAM,6BAA6B,CAAC;AACxD,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEvC,SAAS,mBAAmB,CAAC,IAAc;IACzC,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,IAAI;QACnC,IACE,IAAI,CAAC,OAAO,KAAK,KAAK;YACtB,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;YACxC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC;YAEjD,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACxB,CAAC,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;AAChD,CAAC;AAED,SAAS,oBAAoB,CAAC,IAAc;IAC1C,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,IAAI;QACnC,IAAI,IAAI,CAAC,OAAO,KAAK,KAAK,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,KAAK,MAAM,EAAE,CAAC;YAC7D,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtB,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;AAChD,CAAC;AAED,SAAS,uBAAuB,CAAC,IAAc;IAC7C,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,IAAI;QACnC,IACE,IAAI,CAAC,OAAO,KAAK,KAAK;YACtB,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;YACxC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,eAAe,CAAC,EACnD,CAAC;YACD,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,OAAO;gBACtC,IAAI,OAAO,CAAC,OAAO,KAAK,KAAK;oBAAE,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxD,CAAC,CAAC,CAAC;YACH,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;AAChD,CAAC;AAED,KAAK,UAAU,iBAAiB,CAC9B,IAAY,EACZ,UAA0D,EAC1D,SAAwB;IAExB,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAC9B,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAElC,IAAI,QAAQ,EAAE,CAAC;QACb,SAAS,CAAC,IAAI,CACZ,GAAG,CAAC,MAAM,OAAO,CAAC,GAAG,CACnB,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YAC1B,MAAM,GAAG,GAAG,MAAM,aAAa,CAC7B,IAAI,CAAC,UAAU,CAAC,GAAa,EAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,QAAQ,CAAC,CAC9B,CAAC;YAEF,IAAI,GAAG,CAAC,OAAO,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;gBAC5B,OAAO,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACrB,CAAC;iBAAM,CAAC;gBACN,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC,CAAC,CACH,CAAC,CACH,CAAC;IACJ,CAAC;IAED,SAAS,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,KAAK,EAAE,EAAE;QACpC,IAAI,CAAC,QAAQ;YAAE,SAAS,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAiB,EACjB,OAA4B;IAE5B,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACnB,MAAM,SAAS,GAAkB,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"logo.js","sourceRoot":"","sources":["../../src/scrapingPipeline/logo.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAE/C,OAAO,EAAE,SAAS,EAAE,MAAM,6BAA6B,CAAC;AACxD,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEvC,SAAS,mBAAmB,CAAC,IAAc;IACzC,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,IAAI;QACnC,IACE,IAAI,CAAC,OAAO,KAAK,KAAK;YACtB,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;YACxC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC;YAEjD,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACxB,CAAC,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;AAChD,CAAC;AAED,SAAS,oBAAoB,CAAC,IAAc;IAC1C,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,IAAI;QACnC,IAAI,IAAI,CAAC,OAAO,KAAK,KAAK,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,KAAK,MAAM,EAAE,CAAC;YAC7D,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtB,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;AAChD,CAAC;AAED,SAAS,uBAAuB,CAAC,IAAc;IAC7C,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,IAAI;QACnC,IACE,IAAI,CAAC,OAAO,KAAK,KAAK;YACtB,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;YACxC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,QAAQ,CAAC,eAAe,CAAC,EACnD,CAAC;YACD,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,OAAO;gBACtC,IAAI,OAAO,CAAC,OAAO,KAAK,KAAK;oBAAE,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxD,CAAC,CAAC,CAAC;YACH,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;AAChD,CAAC;AAED,KAAK,UAAU,iBAAiB,CAC9B,IAAY,EACZ,UAA0D,EAC1D,SAAwB;IAExB,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAC9B,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAElC,IAAI,QAAQ,EAAE,CAAC;QACb,SAAS,CAAC,IAAI,CACZ,GAAG,CAAC,MAAM,OAAO,CAAC,GAAG,CACnB,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YAC1B,MAAM,GAAG,GAAG,MAAM,aAAa,CAC7B,IAAI,CAAC,UAAU,CAAC,GAAa,EAC7B,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,QAAQ,CAAC,CAC9B,CAAC;YAEF,IAAI,GAAG,CAAC,OAAO,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;gBAC5B,OAAO,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACrB,CAAC;iBAAM,CAAC;gBACN,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC,CAAC,CACH,CAAC,CACH,CAAC;IACJ,CAAC;IAED,SAAS,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,KAAK,EAAE,EAAE;QACpC,IAAI,CAAC,QAAQ;YAAE,SAAS,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAiB,EACjB,OAA4B;IAE5B,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACnB,MAAM,SAAS,GAAkB,EAAE,CAAC;IAEpC,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,KAAK,GAAkB,EAAE,CAAC;QAEhC,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;YACrC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE;gBAC9B,SAAS,EAAE,cAAc;aAC1B,CAAC,CAAC;YAEH,KAAK,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;YACjC,MAAM,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;YACpC,KAAK,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;QACnC,CAAC;QAAC,MAAM,CAAC;YACP,oDAAoD;QACtD,CAAC;QAED,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;QAEtB,MAAM,OAAO,CAAC,GAAG,CACf,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YACvB,OAAO,MAAM,iBAAiB,CAAC,IAAI,EAAE,mBAAmB,EAAE,SAAS,CAAC,CAAC;QACvE,CAAC,CAAC,CACH,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,aAAa,CAAC,GAAG,CAAC,CAAC;YACtC,MAAM,iBAAiB,CACrB,IAAI,EACJ,SAAS,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,uBAAuB,EAC/E,SAAS,CACV,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;YAC5C,GAAG,CAAC,sCAAsC,YAAY,EAAE,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,MAAM,eAAe,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IAEzD,OAAO,eAAe,CAAC,MAAM,KAAK,CAAC;QACjC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC;QACpB,CAAC,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC;YAC1B,CAAC,CAAC;gBACE,KAAK,EAAE,eAAe,CAAC,CAAC,CAAW;gBACnC,IAAI,EAAE,eAAe,CAAC,CAAC,CAAW;aACnC;YACH,CAAC,CAAC,SAAS,CAAC;AAClB,CAAC"}
|
|
@@ -8,18 +8,25 @@ import { createCallout, createCard, createAccordion, createAccordionGroup, creat
|
|
|
8
8
|
import { rehypeToRemarkCustomComponents } from '../customComponents/plugin.js';
|
|
9
9
|
import { selectiveRehypeRemark } from '../customComponents/selective.js';
|
|
10
10
|
import { retrieveRootContent } from '../root/retrieve.js';
|
|
11
|
+
import { unifiedRemoveBreaks } from '../utils/breaks.js';
|
|
11
12
|
import { unifiedRemoveClassNames } from '../utils/className.js';
|
|
13
|
+
import { unifiedRemoveCopyButtons } from '../utils/copyButton.js';
|
|
12
14
|
import { detectFramework, framework } from '../utils/detectFramework.js';
|
|
15
|
+
import { remarkRemoveEmptyEmphases } from '../utils/emptyEmphasis.js';
|
|
13
16
|
import { unifiedRemoveEmptyParagraphs } from '../utils/emptyParagraphs.js';
|
|
14
17
|
import { getErrorMessage, logErrorResults } from '../utils/errors.js';
|
|
15
|
-
import {
|
|
16
|
-
import {
|
|
18
|
+
import { writePage } from '../utils/file.js';
|
|
19
|
+
import { remarkProperlyFormatEmphasis } from '../utils/formatEmphasis.js';
|
|
17
20
|
import { removeHastComments } from '../utils/hastComments.js';
|
|
21
|
+
import { remarkSpaceListsOut } from '../utils/lists.js';
|
|
18
22
|
import { log } from '../utils/log.js';
|
|
23
|
+
import { remarkRemoveBottomMetadata } from '../utils/metadata.js';
|
|
19
24
|
import { unifiedRemoveNestedRoots } from '../utils/nestedRoots.js';
|
|
20
25
|
import { unifiedRemovePositions } from '../utils/position.js';
|
|
21
26
|
import { removeLeadingSlash, removeTrailingSlash } from '../utils/strings.js';
|
|
27
|
+
import { remarkRemoveCodeBlocksInCells } from '../utils/tableCells.js';
|
|
22
28
|
import { getDescriptionFromRoot, getTitleFromHeading } from '../utils/title.js';
|
|
29
|
+
import { remarkRemoveUpdatedAt } from '../utils/updatedAt.js';
|
|
23
30
|
import { downloadImagesFromFile } from './images.js';
|
|
24
31
|
import { htmlToHast } from './root.js';
|
|
25
32
|
export async function scrapePage(html, url, opts = { externalLink: false }) {
|
|
@@ -37,24 +44,25 @@ export async function scrapePage(html, url, opts = { externalLink: false }) {
|
|
|
37
44
|
const urlStr = url.toString();
|
|
38
45
|
const content = retrieveRootContent(hast);
|
|
39
46
|
if (!content)
|
|
40
|
-
return { success: false, message: `${urlStr}: ${CONTENT_FAILURE_MSG}
|
|
47
|
+
return { success: false, message: `${urlStr}: ${CONTENT_FAILURE_MSG}`, data: [urlStr, ''] };
|
|
41
48
|
const contentAsRoot = {
|
|
42
49
|
type: 'root',
|
|
43
50
|
children: [content],
|
|
44
51
|
};
|
|
45
52
|
const mdastTree = unified()
|
|
53
|
+
.use(unifiedRemoveBreaks)
|
|
54
|
+
.use(unifiedRemoveCopyButtons)
|
|
46
55
|
.use(createCard)
|
|
47
56
|
.use(createAccordion)
|
|
48
57
|
.use(createFrame)
|
|
49
|
-
.use(createTabs)
|
|
50
58
|
.use(createCallout)
|
|
51
59
|
.use(createCardGroup)
|
|
52
60
|
.use(createAccordionGroup)
|
|
53
61
|
.use(createCodeGroup)
|
|
62
|
+
.use(createTabs)
|
|
54
63
|
.use(unifiedRemoveClassNames)
|
|
55
|
-
.use(unifiedRemovePositions)
|
|
56
64
|
.use(unifiedRemoveEmptyParagraphs)
|
|
57
|
-
.use(
|
|
65
|
+
.use(unifiedRemovePositions)
|
|
58
66
|
.use(selectiveRehypeRemark)
|
|
59
67
|
// Cleans up any nested components left untouched
|
|
60
68
|
// by `selectiveRehypeRemark`, and converts them to
|
|
@@ -62,6 +70,13 @@ export async function scrapePage(html, url, opts = { externalLink: false }) {
|
|
|
62
70
|
.use(rehypeToRemarkCustomComponents)
|
|
63
71
|
.use(convertHeaderLinksToText)
|
|
64
72
|
.use(unifiedRemoveNestedRoots)
|
|
73
|
+
.use(remarkSpaceListsOut)
|
|
74
|
+
.use(remarkRemoveBottomMetadata)
|
|
75
|
+
.use(remarkRemoveUpdatedAt)
|
|
76
|
+
.use(remarkRemoveEmptyEmphases)
|
|
77
|
+
.use(remarkProperlyFormatEmphasis)
|
|
78
|
+
.use(remarkRemoveCodeBlocksInCells)
|
|
79
|
+
// @ts-expect-error moving some of the pipeline around results in contentAsRoot being treated differently than its type which is Root Element
|
|
65
80
|
.runSync(contentAsRoot);
|
|
66
81
|
try {
|
|
67
82
|
const imageResults = await downloadImagesFromFile(mdastTree, url);
|
|
@@ -81,13 +96,14 @@ export async function scrapePage(html, url, opts = { externalLink: false }) {
|
|
|
81
96
|
// @ts-expect-error remarkStringify errors even if used for valid code from documentation examples
|
|
82
97
|
.use(remarkStringify)
|
|
83
98
|
.stringify(mdastTree);
|
|
99
|
+
const resultStr = String(result).replace(/\n{3,}/g, '\n\n');
|
|
84
100
|
if (opts.rootPath) {
|
|
85
101
|
url = new URL(opts.rootPath, url.origin);
|
|
86
102
|
}
|
|
87
103
|
else if (url.origin === removeTrailingSlash(url.toString())) {
|
|
88
104
|
url = new URL('home', new URL(url).origin);
|
|
89
105
|
}
|
|
90
|
-
writePage(url, opts.isOverviewPage ? 'Overview' : title, description,
|
|
106
|
+
writePage(url, opts.isOverviewPage ? 'Overview' : title, description, resultStr);
|
|
91
107
|
return {
|
|
92
108
|
success: true,
|
|
93
109
|
data: opts.rootPath
|
|
@@ -96,9 +112,12 @@ export async function scrapePage(html, url, opts = { externalLink: false }) {
|
|
|
96
112
|
};
|
|
97
113
|
}
|
|
98
114
|
catch (error) {
|
|
99
|
-
write('error.json', JSON.stringify(mdastTree, undefined, 2));
|
|
100
115
|
const errorMessage = getErrorMessage(error);
|
|
101
|
-
return {
|
|
116
|
+
return {
|
|
117
|
+
success: false,
|
|
118
|
+
message: `${urlStr}: ${MDAST_FAILURE_MSG}${errorMessage}`,
|
|
119
|
+
data: [urlStr, ''],
|
|
120
|
+
};
|
|
102
121
|
}
|
|
103
122
|
}
|
|
104
123
|
//# sourceMappingURL=page.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"page.js","sourceRoot":"","sources":["../../src/scrapingPipeline/page.ts"],"names":[],"mappings":"AAEA,OAAO,SAAS,MAAM,YAAY,CAAC;AACnC,OAAO,SAAS,MAAM,YAAY,CAAC;AACnC,OAAO,eAAe,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAElC,OAAO,EAAE,wBAAwB,EAAE,MAAM,uBAAuB,CAAC;AACjE,OAAO,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACzE,OAAO,EACL,aAAa,EACb,UAAU,EACV,eAAe,EACf,oBAAoB,EACpB,WAAW,EACX,eAAe,EACf,UAAU,EACV,eAAe,GAChB,MAAM,+BAA+B,CAAC;AACvC,OAAO,EAAE,8BAA8B,EAAE,MAAM,+BAA+B,CAAC;AAC/E,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AACzE,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAE1D,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAChE,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,6BAA6B,CAAC;AACzE,OAAO,EAAE,4BAA4B,EAAE,MAAM,6BAA6B,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACtE,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"page.js","sourceRoot":"","sources":["../../src/scrapingPipeline/page.ts"],"names":[],"mappings":"AAEA,OAAO,SAAS,MAAM,YAAY,CAAC;AACnC,OAAO,SAAS,MAAM,YAAY,CAAC;AACnC,OAAO,eAAe,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAElC,OAAO,EAAE,wBAAwB,EAAE,MAAM,uBAAuB,CAAC;AACjE,OAAO,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACzE,OAAO,EACL,aAAa,EACb,UAAU,EACV,eAAe,EACf,oBAAoB,EACpB,WAAW,EACX,eAAe,EACf,UAAU,EACV,eAAe,GAChB,MAAM,+BAA+B,CAAC;AACvC,OAAO,EAAE,8BAA8B,EAAE,MAAM,+BAA+B,CAAC;AAC/E,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AACzE,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAE1D,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAChE,OAAO,EAAE,wBAAwB,EAAE,MAAM,wBAAwB,CAAC;AAClE,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,6BAA6B,CAAC;AACzE,OAAO,EAAE,yBAAyB,EAAE,MAAM,2BAA2B,CAAC;AACtE,OAAO,EAAE,4BAA4B,EAAE,MAAM,6BAA6B,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACtE,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,4BAA4B,EAAE,MAAM,4BAA4B,CAAC;AAC1E,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AACxD,OAAO,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAC;AAClE,OAAO,EAAE,wBAAwB,EAAE,MAAM,yBAAyB,CAAC;AACnE,OAAO,EAAE,sBAAsB,EAAE,MAAM,sBAAsB,CAAC;AAC9D,OAAO,EAAE,kBAAkB,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAC9E,OAAO,EAAE,6BAA6B,EAAE,MAAM,wBAAwB,CAAC;AACvE,OAAO,EAAE,sBAAsB,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAChF,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEvC,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,IAAY,EACZ,GAAiB,EACjB,OAII,EAAE,YAAY,EAAE,KAAK,EAAE;IAE3B,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IAEnB,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;QACtB,MAAM,QAAQ,GAAG,IAAI,CAAC;QACtB,MAAM,eAAe,GAAG,GAAG,QAAQ,MAAM,CAAC;QAC1C,SAAS,CAAC,eAAe,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;QACvD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,QAAQ,CAAC,EAAE,CAAC;IAC7D,CAAC;IAED,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAC9B,kBAAkB,CAAC,IAAI,CAAC,CAAC;IAEzB,IAAI,CAAC,SAAS,CAAC,MAAM;QAAE,eAAe,CAAC,IAAI,CAAC,CAAC;IAE7C,MAAM,MAAM,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC;IAC9B,MAAM,OAAO,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAC1C,IAAI,CAAC,OAAO;QACV,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,MAAM,KAAK,mBAAmB,EAAE,EAAE,IAAI,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC;IAE9F,MAAM,aAAa,GAAa;QAC9B,IAAI,EAAE,MAAM;QACZ,QAAQ,EAAE,CAAC,OAAO,CAAC;KACpB,CAAC;IAEF,MAAM,SAAS,GAAc,OAAO,EAAE;SACnC,GAAG,CAAC,mBAAmB,CAAC;SACxB,GAAG,CAAC,wBAAwB,CAAC;SAC7B,GAAG,CAAC,UAAU,CAAC;SACf,GAAG,CAAC,eAAe,CAAC;SACpB,GAAG,CAAC,WAAW,CAAC;SAChB,GAAG,CAAC,aAAa,CAAC;SAClB,GAAG,CAAC,eAAe,CAAC;SACpB,GAAG,CAAC,oBAAoB,CAAC;SACzB,GAAG,CAAC,eAAe,CAAC;SACpB,GAAG,CAAC,UAAU,CAAC;SACf,GAAG,CAAC,uBAAuB,CAAC;SAC5B,GAAG,CAAC,4BAA4B,CAAC;SACjC,GAAG,CAAC,sBAAsB,CAAC;SAC3B,GAAG,CAAC,qBAAqB,CAAC;QAE3B,iDAAiD;QACjD,mDAAmD;QACnD,4BAA4B;SAC3B,GAAG,CAAC,8BAA8B,CAAC;SACnC,GAAG,CAAC,wBAAwB,CAAC;SAC7B,GAAG,CAAC,wBAAwB,CAAC;SAC7B,GAAG,CAAC,mBAAmB,CAAC;SACxB,GAAG,CAAC,0BAA0B,CAAC;SAC/B,GAAG,CAAC,qBAAqB,CAAC;SAC1B,GAAG,CAAC,yBAAyB,CAAC;SAC9B,GAAG,CAAC,4BAA4B,CAAC;SACjC,GAAG,CAAC,6BAA6B,CAAC;QACnC,6IAA6I;SAC5I,OAAO,CAAC,aAAa,CAAc,CAAC;IAEvC,IAAI,CAAC;QACH,MAAM,YAAY,GAAG,MAAM,sBAAsB,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QAClE,eAAe,CAAC,wBAAwB,GAAG,CAAC,QAAQ,EAAE,EAAE,EAAE,YAAY,CAAC,CAAC;IAC1E,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,GAAG,CAAC,yDAAyD,GAAG,CAAC,QAAQ,EAAE,GAAG,YAAY,EAAE,CAAC,CAAC;QAC9F,MAAM,KAAK,CAAC;IACd,CAAC;IAED,MAAM,KAAK,GAAG,mBAAmB,CAAC,SAAS,CAAC,CAAC;IAC7C,MAAM,WAAW,GAAG,sBAAsB,CAAC,SAAS,CAAC,CAAC;IAEtD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,OAAO,EAAE;aACrB,GAAG,CAAC,SAAS,CAAC;aACd,GAAG,CAAC,SAAS,CAAC;YACf,kGAAkG;aACjG,GAAG,CAAC,eAAe,CAAC;aACpB,SAAS,CAAC,SAAS,CAAC,CAAC;QAExB,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QAE5D,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,GAAG,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3C,CAAC;aAAM,IAAI,GAAG,CAAC,MAAM,KAAK,mBAAmB,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC;YAC9D,GAAG,GAAG,IAAI,GAAG,CAAC,MAAM,EAAE,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;QAC7C,CAAC;QAED,SAAS,CAAC,GAAG,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,EAAE,WAAW,EAAE,SAAS,CAAC,CAAC;QACjF,OAAO;YACL,OAAO,EAAE,IAAI;YACb,IAAI,EAAE,IAAI,CAAC,QAAQ;gBACjB,CAAC,CAAC,CAAC,kBAAkB,CAAC,mBAAmB,CAAC,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,IAAI,CAAC,QAAQ,CAAC;gBACpF,CAAC,CAAC,SAAS;SACd,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,YAAY,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QAC5C,OAAO;YACL,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,GAAG,MAAM,KAAK,iBAAiB,GAAG,YAAY,EAAE;YACzD,IAAI,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC;SACnB,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
@@ -7,11 +7,13 @@ import { detectFramework, framework } from '../utils/detectFramework.js';
|
|
|
7
7
|
import { logErrorResults } from '../utils/errors.js';
|
|
8
8
|
import { startPuppeteer } from '../utils/network.js';
|
|
9
9
|
import { INDEX_NAMES, iterateThroughReservedNames } from '../utils/reservedNames.js';
|
|
10
|
-
import { removeTrailingSlash } from '../utils/strings.js';
|
|
10
|
+
import { removeTrailingSlash, removeLeadingSlash } from '../utils/strings.js';
|
|
11
|
+
import { downloadColors } from './color.js';
|
|
11
12
|
import { scrapePageGroup } from './group.js';
|
|
12
13
|
import { downloadFavicon } from './icon.js';
|
|
13
14
|
import { downloadLogos } from './logo.js';
|
|
14
15
|
import { htmlToHast } from './root.js';
|
|
16
|
+
import { downloadTitle } from './title.js';
|
|
15
17
|
export async function scrapeSite(html, url, opts = {}) {
|
|
16
18
|
let hast = opts.hast;
|
|
17
19
|
if (!hast)
|
|
@@ -96,21 +98,76 @@ export async function scrapeSite(html, url, opts = {}) {
|
|
|
96
98
|
pages: [navItem],
|
|
97
99
|
};
|
|
98
100
|
});
|
|
101
|
+
const allErrors = [
|
|
102
|
+
...externalResults.filter((result) => !result.success),
|
|
103
|
+
...internalResults.filter((result) => !result.success),
|
|
104
|
+
...rootResults.filter((result) => !result.success),
|
|
105
|
+
];
|
|
106
|
+
const allErroredPaths = allErrors
|
|
107
|
+
.map((result) => {
|
|
108
|
+
if (result.data) {
|
|
109
|
+
const url = new URL(result.data[0]);
|
|
110
|
+
const pathname = url.pathname;
|
|
111
|
+
const normalizedPathname = removeLeadingSlash(removeTrailingSlash(pathname));
|
|
112
|
+
return normalizedPathname;
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
return '';
|
|
116
|
+
}
|
|
117
|
+
})
|
|
118
|
+
.filter(Boolean);
|
|
119
|
+
traverse(navItems).forEach(function (value) {
|
|
120
|
+
if (typeof value === 'string' && allErroredPaths.includes(value)) {
|
|
121
|
+
this.remove();
|
|
122
|
+
}
|
|
123
|
+
else if (Array.isArray(value)) {
|
|
124
|
+
this.update(value
|
|
125
|
+
.filter((item) => typeof item === 'string' && allErroredPaths.includes(item) ? undefined : item)
|
|
126
|
+
.filter(Boolean));
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
let count = 1;
|
|
130
|
+
while (count > 0) {
|
|
131
|
+
count = 0;
|
|
132
|
+
traverse(navItems).forEach(function (value) {
|
|
133
|
+
if (Array.isArray(value) && value.filter(Boolean).length === 0) {
|
|
134
|
+
count++;
|
|
135
|
+
if (this.parent) {
|
|
136
|
+
this.parent.remove();
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
this.remove();
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
traverse(navItems).forEach(function (value) {
|
|
145
|
+
if (typeof value === 'string' &&
|
|
146
|
+
(value.startsWith('https://') || value.startsWith('http://'))) {
|
|
147
|
+
this.remove();
|
|
148
|
+
}
|
|
149
|
+
else if (Array.isArray(value) &&
|
|
150
|
+
value.find((val) => typeof val === 'string' && (val.startsWith('https://') || val.startsWith('http://')))) {
|
|
151
|
+
this.update(value.filter((val) => !(typeof val === 'string' &&
|
|
152
|
+
(val.startsWith('https://') || val.startsWith('http://')))));
|
|
153
|
+
}
|
|
154
|
+
});
|
|
99
155
|
logErrorResults('linking to external pages', externalResults);
|
|
100
156
|
logErrorResults('scraping your docs', [...internalResults, ...rootResults]);
|
|
101
|
-
const
|
|
157
|
+
const needsBrowserForLogos = framework.vendor === 'readme';
|
|
158
|
+
const browser = needsBrowserForLogos ? await startPuppeteer() : undefined;
|
|
102
159
|
const favicon = await downloadFavicon(hast);
|
|
160
|
+
const colors = await downloadColors(hast);
|
|
103
161
|
const logo = await downloadLogos(url, browser);
|
|
162
|
+
const name = await downloadTitle(hast);
|
|
104
163
|
return {
|
|
105
164
|
success: true,
|
|
106
165
|
data: {
|
|
107
166
|
$schema: 'https://mintlify.com/schema.json',
|
|
108
|
-
name
|
|
167
|
+
name,
|
|
109
168
|
logo,
|
|
110
|
-
colors
|
|
111
|
-
|
|
112
|
-
},
|
|
113
|
-
favicon: favicon ?? '',
|
|
169
|
+
colors,
|
|
170
|
+
favicon,
|
|
114
171
|
navigation: navItems,
|
|
115
172
|
tabs: opts.tabs,
|
|
116
173
|
},
|