@mintlify/scraping 3.0.140 → 3.0.142

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/bin/browser.js +1 -1
  2. package/bin/browser.js.map +1 -1
  3. package/bin/checks.d.ts +8 -0
  4. package/bin/checks.js +24 -0
  5. package/bin/checks.js.map +1 -0
  6. package/bin/cli.js +49 -45
  7. package/bin/cli.js.map +1 -1
  8. package/bin/scraping/detectFramework.d.ts +7 -14
  9. package/bin/scraping/detectFramework.js +8 -14
  10. package/bin/scraping/detectFramework.js.map +1 -1
  11. package/bin/scraping/downloadAllImages.d.ts +2 -1
  12. package/bin/scraping/downloadAllImages.js +1 -2
  13. package/bin/scraping/downloadAllImages.js.map +1 -1
  14. package/bin/scraping/downloadLogoImage.js +0 -1
  15. package/bin/scraping/downloadLogoImage.js.map +1 -1
  16. package/bin/scraping/replaceImagePaths.d.ts +1 -1
  17. package/bin/scraping/replaceImagePaths.js +0 -3
  18. package/bin/scraping/replaceImagePaths.js.map +1 -1
  19. package/bin/scraping/scrapePageCommands.d.ts +3 -3
  20. package/bin/scraping/scrapePageCommands.js +22 -27
  21. package/bin/scraping/scrapePageCommands.js.map +1 -1
  22. package/bin/scraping/scrapeSectionCommands.d.ts +5 -5
  23. package/bin/scraping/scrapeSectionCommands.js +27 -30
  24. package/bin/scraping/scrapeSectionCommands.js.map +1 -1
  25. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js +1 -1
  26. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomPage.js.map +1 -1
  27. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js +2 -2
  28. package/bin/scraping/site-scrapers/Intercom/scrapeIntercomSection.js.map +1 -1
  29. package/bin/scraping/site-scrapers/alternateGroupTitle.d.ts +3 -1
  30. package/bin/scraping/site-scrapers/alternateGroupTitle.js +1 -1
  31. package/bin/scraping/site-scrapers/alternateGroupTitle.js.map +1 -1
  32. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.d.ts +5 -1
  33. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js +1 -1
  34. package/bin/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.js.map +1 -1
  35. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.d.ts +3 -1
  36. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js +1 -4
  37. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursively.js.map +1 -1
  38. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.d.ts +3 -1
  39. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js +0 -3
  40. package/bin/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.js.map +1 -1
  41. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js +3 -3
  42. package/bin/scraping/site-scrapers/openNestedDocusaurusMenus.js.map +1 -1
  43. package/bin/scraping/site-scrapers/openNestedGitbookMenus.js +2 -2
  44. package/bin/scraping/site-scrapers/openNestedGitbookMenus.js.map +1 -1
  45. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js +1 -1
  46. package/bin/scraping/site-scrapers/scrapeDocusaurusPage.js.map +1 -1
  47. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js +2 -2
  48. package/bin/scraping/site-scrapers/scrapeDocusaurusSection.js.map +1 -1
  49. package/bin/scraping/site-scrapers/scrapeGitBookPage.js +1 -1
  50. package/bin/scraping/site-scrapers/scrapeGitBookPage.js.map +1 -1
  51. package/bin/scraping/site-scrapers/scrapeGitBookSection.js +1 -1
  52. package/bin/scraping/site-scrapers/scrapeGitBookSection.js.map +1 -1
  53. package/bin/scraping/site-scrapers/scrapeReadMePage.js +1 -1
  54. package/bin/scraping/site-scrapers/scrapeReadMePage.js.map +1 -1
  55. package/bin/scraping/site-scrapers/scrapeReadMeSection.js +2 -2
  56. package/bin/scraping/site-scrapers/scrapeReadMeSection.js.map +1 -1
  57. package/bin/tsconfig.build.tsbuildinfo +1 -1
  58. package/bin/util.d.ts +0 -1
  59. package/bin/util.js +9 -26
  60. package/bin/util.js.map +1 -1
  61. package/package.json +7 -7
  62. package/src/browser.ts +1 -1
  63. package/src/checks.ts +32 -0
  64. package/src/cli.ts +48 -74
  65. package/src/scraping/detectFramework.ts +20 -15
  66. package/src/scraping/downloadAllImages.ts +7 -7
  67. package/src/scraping/downloadLogoImage.ts +0 -1
  68. package/src/scraping/replaceImagePaths.ts +1 -5
  69. package/src/scraping/scrapePageCommands.ts +32 -29
  70. package/src/scraping/scrapeSectionCommands.ts +38 -34
  71. package/src/scraping/site-scrapers/Intercom/scrapeIntercomPage.ts +1 -1
  72. package/src/scraping/site-scrapers/Intercom/scrapeIntercomSection.ts +2 -2
  73. package/src/scraping/site-scrapers/alternateGroupTitle.ts +5 -2
  74. package/src/scraping/site-scrapers/links-per-group/getDocusaurusLinksPerGroup.ts +7 -5
  75. package/src/scraping/site-scrapers/links-per-group/getLinksRecursively.ts +8 -6
  76. package/src/scraping/site-scrapers/links-per-group/getLinksRecursivelyGitBook.ts +7 -5
  77. package/src/scraping/site-scrapers/openNestedDocusaurusMenus.ts +3 -3
  78. package/src/scraping/site-scrapers/openNestedGitbookMenus.ts +3 -3
  79. package/src/scraping/site-scrapers/scrapeDocusaurusPage.ts +1 -1
  80. package/src/scraping/site-scrapers/scrapeDocusaurusSection.ts +2 -2
  81. package/src/scraping/site-scrapers/scrapeGitBookPage.ts +1 -1
  82. package/src/scraping/site-scrapers/scrapeGitBookSection.ts +3 -3
  83. package/src/scraping/site-scrapers/scrapeReadMePage.ts +1 -1
  84. package/src/scraping/site-scrapers/scrapeReadMeSection.ts +3 -3
  85. package/src/util.ts +10 -26
  86. package/tsconfig.json +0 -1
  87. package/bin/validation/isValidLink.d.ts +0 -1
  88. package/bin/validation/isValidLink.js +0 -11
  89. package/bin/validation/isValidLink.js.map +0 -1
  90. package/bin/validation/stopIfInvalidLink.d.ts +0 -1
  91. package/bin/validation/stopIfInvalidLink.js +0 -9
  92. package/bin/validation/stopIfInvalidLink.js.map +0 -1
  93. package/src/validation/isValidLink.ts +0 -9
  94. package/src/validation/stopIfInvalidLink.ts +0 -9
package/bin/util.js CHANGED
@@ -1,7 +1,6 @@
1
- import { mkdirSync, writeFileSync } from 'fs';
1
+ import { existsSync, mkdirSync, writeFileSync } from 'fs';
2
2
  import Ora from 'ora';
3
3
  import path from 'path';
4
- import stopIfInvalidLink from './validation/stopIfInvalidLink.js';
5
4
  export const MintConfig = (name, color, ctaName, ctaUrl, filename) => {
6
5
  return {
7
6
  name,
@@ -68,35 +67,19 @@ export const createPage = (title, description, markdown, overwrite = false, root
68
67
  const writePath = path.join(rootDir, addMdx(fileName || toFilename(title)));
69
68
  // Create the folders needed if they're missing
70
69
  mkdirSync(rootDir, { recursive: true });
71
- // Write the page to memory
72
- if (overwrite) {
70
+ if (!overwrite && existsSync(writePath)) {
71
+ console.log(`❌ Skipping existing file ${writePath}`);
72
+ return;
73
+ }
74
+ // Write the page to disk
75
+ try {
73
76
  writeFileSync(writePath, Page(title, description, markdown));
74
77
  console.log('✏️ - ' + writePath);
75
78
  }
76
- else {
77
- try {
78
- writeFileSync(writePath, Page(title, description, markdown), {
79
- flag: 'wx',
80
- });
81
- console.log('✏️ - ' + writePath);
82
- }
83
- catch (e) {
84
- // We do a try-catch instead of an if-statement to avoid a race condition
85
- // of the file being created after we started writing.
86
- if (e?.code === 'EEXIST') {
87
- console.log(`❌ Skipping existing file ${writePath}`);
88
- }
89
- else {
90
- console.error(e);
91
- }
92
- }
79
+ catch (e) {
80
+ console.error(e);
93
81
  }
94
82
  };
95
- export function getHrefFromArgs(argv) {
96
- const href = argv.url;
97
- stopIfInvalidLink(href);
98
- return href;
99
- }
100
83
  export const buildLogger = (startText = '') => {
101
84
  const logger = Ora().start(startText);
102
85
  return logger;
package/bin/util.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAC9C,OAAO,GAAuB,MAAM,KAAK,CAAC;AAC1C,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,OAAO,iBAAiB,MAAM,mCAAmC,CAAC;AAElE,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,IAAY,EACZ,KAAa,EACb,OAAe,EACf,MAAc,EACd,QAAgB,EAChB,EAAE;IACF,OAAO;QACL,IAAI;QACJ,IAAI,EAAE,EAAE;QACR,OAAO,EAAE,EAAE;QACX,MAAM,EAAE;YACN,OAAO,EAAE,KAAK;SACf;QACD,WAAW,EAAE,EAAE;QACf,eAAe,EAAE;YACf,IAAI,EAAE,OAAO;YACb,GAAG,EAAE,MAAM;SACZ;QACD,OAAO,EAAE,EAAE;QACX,UAAU,EAAE;YACV;gBACE,KAAK,EAAE,MAAM;gBACb,KAAK,EAAE,CAAC,QAAQ,CAAC;aAClB;SACF;QACD,6DAA6D;KAC9D,CAAC;AACJ,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,IAAI,GAAG,CAAC,KAAa,EAAE,WAAoB,EAAE,QAAiB,EAAE,EAAE;IAC7E,uDAAuD;IACvD,yDAAyD;IACzD,wBAAwB;IACxB,MAAM,eAAe,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC9C,MAAM,aAAa,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC5C,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,KAAK,GAAG,GAAG,GAAG,KAAK,CAAC;IACtB,CAAC;IACD,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;IACtB,CAAC;IAED,MAAM,mBAAmB,GAAG,WAAW,CAAC,CAAC,CAAC,mBAAmB,WAAW,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACjF,OAAO,eAAe,KAAK,GAAG,mBAAmB,YAAY,QAAQ,EAAE,CAAC;AAC1E,CAAC,CAAC;AAEF,MAAM,UAAU,SAAS,CAAC,GAAW;IACnC,+CAA+C;IAC/C,gDAAgD;IAChD,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;AAC7B,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAAuB;IACzD,4BAA4B;IAC5B,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACrE,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,KAAa,EAAE,EAAE;IAC1C,sDAAsD;IACtD,uDAAuD;IACvD,OAAO,KAAK;SACT,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,IAAI,EAAE;SACN,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,WAAW,EAAE,CAAC;AACnB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,MAAM,GAAG,CAAC,QAAgB,EAAE,EAAE;IACzC,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC9B,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,OAAO,QAAQ,GAAG,MAAM,CAAC;AAC3B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,KAAa,EACb,WAAoB,EACpB,QAAiB,EACjB,SAAS,GAAG,KAAK,EACjB,OAAO,GAAG,EAAE,EACZ,QAAiB,EACjB,EAAE;IACF,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,QAAQ,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE5E,+CAA+C;IAC/C,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAExC,2BAA2B;IAC3B,IAAI,SAAS,EAAE,CAAC;QACd,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;IACnC,CAAC;SAAM,CAAC;QACN,IAAI,CAAC;YACH,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,EAAE;gBAC3D,IAAI,EAAE,IAAI;aACX,CAAC,CAAC;YACH,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;QACnC,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,yEAAyE;YACzE,sDAAsD;YACtD,IAAK,CAAsB,EAAE,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC/C,OAAO,CAAC,GAAG,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;YACvD,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACnB,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,UAAU,eAAe,CAAC,IAAS;IACvC,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC;IACtB,iBAAiB,CAAC,IAAI,CAAC,CAAC;IACxB,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,SAAS,GAAG,EAAE,EAAW,EAAE;IACrD,MAAM,MAAM,GAAG,GAAG,EAAE,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACtC,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,QAAgB,EAAE,EAAE;IACnD,MAAM,GAAG,GAAG,QAAQ,CAAC,SAAS,CAAC,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC/E,IAAI,QAAQ,KAAK,GAAG;QAAE,OAAO,SAAS,CAAC;IACvC,OAAO,GAAG,CAAC,WAAW,EAAE,CAAC;AAC3B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,wBAAwB,GAAG,CAAC,QAAgB,EAAE,EAAE;IAC3D,MAAM,SAAS,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC7C,OAAO,SAAS,IAAI,CAAC,SAAS,KAAK,KAAK,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,KAAK,KAAK,CAAC,CAAC;AACzF,CAAC,CAAC"}
1
+ {"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAC1D,OAAO,GAAuB,MAAM,KAAK,CAAC;AAC1C,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,IAAY,EACZ,KAAa,EACb,OAAe,EACf,MAAc,EACd,QAAgB,EAChB,EAAE;IACF,OAAO;QACL,IAAI;QACJ,IAAI,EAAE,EAAE;QACR,OAAO,EAAE,EAAE;QACX,MAAM,EAAE;YACN,OAAO,EAAE,KAAK;SACf;QACD,WAAW,EAAE,EAAE;QACf,eAAe,EAAE;YACf,IAAI,EAAE,OAAO;YACb,GAAG,EAAE,MAAM;SACZ;QACD,OAAO,EAAE,EAAE;QACX,UAAU,EAAE;YACV;gBACE,KAAK,EAAE,MAAM;gBACb,KAAK,EAAE,CAAC,QAAQ,CAAC;aAClB;SACF;QACD,6DAA6D;KAC9D,CAAC;AACJ,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,IAAI,GAAG,CAAC,KAAa,EAAE,WAAoB,EAAE,QAAiB,EAAE,EAAE;IAC7E,uDAAuD;IACvD,yDAAyD;IACzD,wBAAwB;IACxB,MAAM,eAAe,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC9C,MAAM,aAAa,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC5C,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,KAAK,GAAG,GAAG,GAAG,KAAK,CAAC;IACtB,CAAC;IACD,IAAI,CAAC,aAAa,EAAE,CAAC;QACnB,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;IACtB,CAAC;IAED,MAAM,mBAAmB,GAAG,WAAW,CAAC,CAAC,CAAC,mBAAmB,WAAW,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACjF,OAAO,eAAe,KAAK,GAAG,mBAAmB,YAAY,QAAQ,EAAE,CAAC;AAC1E,CAAC,CAAC;AAEF,MAAM,UAAU,SAAS,CAAC,GAAW;IACnC,+CAA+C;IAC/C,gDAAgD;IAChD,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;AAC7B,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAAuB;IACzD,4BAA4B;IAC5B,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AACrE,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,KAAa,EAAE,EAAE;IAC1C,sDAAsD;IACtD,uDAAuD;IACvD,OAAO,KAAK;SACT,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,IAAI,EAAE;SACN,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;SAClB,WAAW,EAAE,CAAC;AACnB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,MAAM,GAAG,CAAC,QAAgB,EAAE,EAAE;IACzC,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC9B,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,OAAO,QAAQ,GAAG,MAAM,CAAC;AAC3B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,KAAa,EACb,WAAoB,EACpB,QAAiB,EACjB,SAAS,GAAG,KAAK,EACjB,OAAO,GAAG,EAAE,EACZ,QAAiB,EACjB,EAAE;IACF,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,QAAQ,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE5E,+CAA+C;IAC/C,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAExC,IAAI,CAAC,SAAS,IAAI,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QACxC,OAAO,CAAC,GAAG,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;QACrD,OAAO;IACT,CAAC;IAED,yBAAyB;IACzB,IAAI,CAAC;QACH,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;IACnC,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACnB,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,SAAS,GAAG,EAAE,EAAW,EAAE;IACrD,MAAM,MAAM,GAAG,GAAG,EAAE,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACtC,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,QAAgB,EAAE,EAAE;IACnD,MAAM,GAAG,GAAG,QAAQ,CAAC,SAAS,CAAC,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC/E,IAAI,QAAQ,KAAK,GAAG;QAAE,OAAO,SAAS,CAAC;IACvC,OAAO,GAAG,CAAC,WAAW,EAAE,CAAC;AAC3B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,wBAAwB,GAAG,CAAC,QAAgB,EAAE,EAAE;IAC3D,MAAM,SAAS,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC7C,OAAO,SAAS,IAAI,CAAC,SAAS,KAAK,KAAK,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,KAAK,KAAK,CAAC,CAAC;AACzF,CAAC,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mintlify/scraping",
3
- "version": "3.0.140",
3
+ "version": "3.0.142",
4
4
  "description": "Scrape documentation frameworks to Mintlify docs",
5
5
  "engines": {
6
6
  "node": ">=18.0.0"
@@ -39,22 +39,22 @@
39
39
  },
40
40
  "dependencies": {
41
41
  "@apidevtools/swagger-parser": "^10.1.0",
42
- "@mintlify/common": "1.0.124",
42
+ "@mintlify/common": "1.0.125",
43
43
  "axios": "^1.2.2",
44
- "cheerio": "^0.22.0",
44
+ "cheerio": "^1.0.0-rc.12",
45
45
  "fs-extra": "^11.1.1",
46
46
  "node-html-markdown": "^1.3.0",
47
47
  "ora": "^6.1.2",
48
- "puppeteer": "^19.4.0",
48
+ "puppeteer": "^22.14.0",
49
49
  "yargs": "^17.6.0"
50
50
  },
51
51
  "devDependencies": {
52
52
  "@mintlify/eslint-config": "1.0.5",
53
53
  "@mintlify/eslint-config-typescript": "1.0.10",
54
- "@mintlify/models": "0.0.108",
54
+ "@mintlify/models": "0.0.109",
55
55
  "@mintlify/prettier-config": "1.0.4",
56
56
  "@mintlify/ts-config": "2.0.2",
57
- "@mintlify/validation": "0.1.170",
57
+ "@mintlify/validation": "0.1.171",
58
58
  "@trivago/prettier-plugin-sort-imports": "^4.2.1",
59
59
  "@tsconfig/recommended": "1.x",
60
60
  "@types/cheerio": "^0.22.31",
@@ -71,5 +71,5 @@
71
71
  "typescript": "^5.5.3",
72
72
  "vitest": "^2.0.4"
73
73
  },
74
- "gitHead": "214320b7644d2fbc47d85d1f618f3f65e162f6a4"
74
+ "gitHead": "74d38f577b4f02953bfc0f9b0d493371cbb256d2"
75
75
  }
package/src/browser.ts CHANGED
@@ -19,6 +19,6 @@ export async function getHtmlWithPuppeteer(href: string) {
19
19
  waitUntil: 'networkidle2',
20
20
  });
21
21
  const html = await page.content();
22
- browser.close();
22
+ void browser.close();
23
23
  return html;
24
24
  }
package/src/checks.ts ADDED
@@ -0,0 +1,32 @@
1
+ import { Framework } from './scraping/detectFramework.js';
2
+
3
+ // This checks the link is written correctly, not that the page exists.
4
+ export function checkUrl({ url }: { url: string }) {
5
+ try {
6
+ new URL(url);
7
+ } catch {
8
+ throw Error(`Invalid link: ${url}\nMake sure the link starts with http:// or https://`);
9
+ }
10
+ return true;
11
+ }
12
+
13
+ export function checkVersion({
14
+ tool,
15
+ docusaurusVersion,
16
+ }: {
17
+ tool: Framework | undefined;
18
+ docusaurusVersion: string | undefined;
19
+ }) {
20
+ if (tool === 'docusaurus') {
21
+ if (docusaurusVersion === undefined) {
22
+ throw Error(
23
+ 'When using Docusaurus, you must specify the version (1,2,3) using the --docusaurusVersion flag'
24
+ );
25
+ }
26
+ } else {
27
+ if (docusaurusVersion !== undefined) {
28
+ throw Error('The --docusaurusVersion flag is only applicable when using Docusaurus.');
29
+ }
30
+ }
31
+ return true;
32
+ }
package/src/cli.ts CHANGED
@@ -1,95 +1,71 @@
1
1
  #!/usr/bin/env node
2
-
3
- /* eslint-disable @typescript-eslint/no-empty-function */
4
2
  import yargs from 'yargs';
5
3
  import { hideBin } from 'yargs/helpers';
6
4
 
5
+ import { checkUrl, checkVersion } from './checks.js';
7
6
  import { generateOpenApiPages } from './openapi/generateOpenApiPages.js';
8
- import { scrapePageAutomatically, scrapePageWrapper } from './scraping/scrapePageCommands.js';
9
- import {
10
- scrapeSectionAutomatically,
11
- scrapeSectionAxiosWrapper,
12
- scrapeGitbookSectionCommand,
13
- } from './scraping/scrapeSectionCommands.js';
14
- import { scrapeIntercomPage } from './scraping/site-scrapers/Intercom/scrapeIntercomPage.js';
15
- import { scrapeIntercomSection } from './scraping/site-scrapers/Intercom/scrapeIntercomSection.js';
16
- import { scrapeGitBookPage } from './scraping/site-scrapers/scrapeGitBookPage.js';
17
- import { scrapeReadMePage } from './scraping/site-scrapers/scrapeReadMePage.js';
18
- import { scrapeReadMeSection } from './scraping/site-scrapers/scrapeReadMeSection.js';
7
+ import { FrameworkHint, frameworks } from './scraping/detectFramework.js';
8
+ import { scrapePageAutomatically } from './scraping/scrapePageCommands.js';
9
+ import { scrapeSectionAutomatically } from './scraping/scrapeSectionCommands.js';
19
10
 
20
11
  await yargs(hideBin(process.argv))
21
12
  .command(
22
- 'page [url]',
13
+ 'page <url>',
23
14
  'Scrapes a page',
24
- () => {},
25
- async (argv) => {
26
- await scrapePageAutomatically(argv);
27
- }
28
- )
29
- .command(
30
- 'gitbook-page [url]',
31
- 'Scrapes a GitBook page',
32
- () => {},
33
- async (argv) => {
34
- await scrapePageWrapper(argv, scrapeGitBookPage);
35
- }
36
- )
37
- .command(
38
- 'readme-page [url]',
39
- 'Scrapes a ReadMe page',
40
- () => {},
41
- async (argv) => {
42
- await scrapePageWrapper(argv, scrapeReadMePage);
43
- }
44
- )
45
- .command(
46
- 'intercom-page [url]',
47
- 'Scrapes a Intercom page',
48
- () => {},
49
- async (argv) => {
50
- await scrapePageWrapper(argv, scrapeIntercomPage);
15
+ (yargs) =>
16
+ yargs
17
+ .positional('url', { type: 'string', demandOption: true })
18
+ .check(checkUrl)
19
+ .option('overwrite', { alias: 'O', type: 'boolean', default: false })
20
+ .option('tool', { alias: 't', choices: frameworks })
21
+ .option('docusaurusVersion', {
22
+ alias: 'd',
23
+ type: 'string',
24
+ choices: ['1', '2', '3'] as const,
25
+ })
26
+ .check(checkVersion),
27
+ async ({ url, overwrite, tool, docusaurusVersion }) => {
28
+ const frameworkHint: FrameworkHint = {
29
+ framework: tool,
30
+ version: docusaurusVersion ?? '3',
31
+ };
32
+
33
+ await scrapePageAutomatically(url, overwrite, frameworkHint);
51
34
  }
52
35
  )
53
36
  .command(
54
- 'section [url]',
37
+ 'section <url>',
55
38
  'Scrapes the docs in the section',
56
- () => {},
57
- async (argv) => {
58
- await scrapeSectionAutomatically(argv);
59
- }
60
- )
61
- .command(
62
- 'gitbook-section [url]',
63
- 'Scrapes the Gitbook section',
64
- () => {},
65
- async (argv) => {
66
- await scrapeGitbookSectionCommand(argv);
67
- }
68
- )
69
- .command(
70
- 'readme-section [url]',
71
- 'Scrapes the ReadMe section',
72
- () => {},
73
- async (argv) => {
74
- await scrapeSectionAxiosWrapper(argv, scrapeReadMeSection);
75
- }
76
- )
77
- .command(
78
- 'intercom-section [url]',
79
- 'Scrapes the Intercom section',
80
- () => {},
81
- async (argv) => {
82
- await scrapeSectionAxiosWrapper(argv, scrapeIntercomSection);
39
+ (yargs) =>
40
+ yargs
41
+ .positional('url', { type: 'string', demandOption: true })
42
+ .check(checkUrl)
43
+ .option('overwrite', { alias: 'O', type: 'boolean', default: false })
44
+ .option('tool', { alias: 't', choices: frameworks })
45
+ .option('docusaurusVersion', {
46
+ alias: 'd',
47
+ type: 'string',
48
+ choices: ['1', '2', '3'] as const,
49
+ })
50
+ .check(checkVersion),
51
+ async ({ url, overwrite, tool, docusaurusVersion }) => {
52
+ const frameworkHint: FrameworkHint = {
53
+ framework: tool,
54
+ version: docusaurusVersion ?? '3',
55
+ };
56
+
57
+ await scrapeSectionAutomatically(url, overwrite, frameworkHint);
83
58
  }
84
59
  )
85
60
  .command(
86
61
  'openapi-file <openapiFilename>',
87
62
  'Creates MDX files from an OpenAPI spec',
88
- (yargs) => {
89
- return yargs
63
+ (yargs) =>
64
+ yargs
90
65
  .positional('openapiFilename', {
91
66
  describe: 'The filename of the OpenAPI spec',
92
67
  type: 'string',
68
+ demandOption: true,
93
69
  })
94
70
  .option('writeFiles', {
95
71
  describe: 'Whether or not to write the frontmatter files',
@@ -101,9 +77,7 @@ await yargs(hideBin(process.argv))
101
77
  describe: 'The folder in which to write any created frontmatter files',
102
78
  type: 'string',
103
79
  alias: 'o',
104
- })
105
- .demandOption('openapiFilename');
106
- },
80
+ }),
107
81
  async (argv) => {
108
82
  try {
109
83
  const { nav } = await generateOpenApiPages(
@@ -1,14 +1,19 @@
1
- import cheerio from 'cheerio';
1
+ import * as cheerio from 'cheerio';
2
2
 
3
- export enum Frameworks {
4
- DOCUSAURUS = 'DOCUSAURUS',
5
- GITBOOK = 'GITBOOK',
6
- README = 'README',
7
- INTERCOM = 'INTERCOM',
8
- }
3
+ export const frameworks = ['docusaurus', 'gitbook', 'readme', 'intercom'] as const;
4
+ export type Framework = (typeof frameworks)[number];
5
+
6
+ export type FrameworkHint =
7
+ | {
8
+ framework: 'docusaurus';
9
+ version: '1' | '2' | '3';
10
+ }
11
+ | {
12
+ framework: 'gitbook' | 'readme' | 'intercom' | undefined;
13
+ };
9
14
 
10
- export function detectFramework(html) {
11
- const $: cheerio.Root = cheerio.load(html);
15
+ export function detectFramework(html: string): FrameworkHint {
16
+ const $ = cheerio.load(html);
12
17
  const docusaurusMeta = $('meta[name="generator"]');
13
18
 
14
19
  if (
@@ -19,31 +24,31 @@ export function detectFramework(html) {
19
24
  ) {
20
25
  const metaAttrString = docusaurusMeta.attr('content') as string;
21
26
  if (metaAttrString.includes('v3')) {
22
- return { framework: Frameworks.DOCUSAURUS, version: '3' };
27
+ return { framework: 'docusaurus', version: '3' };
23
28
  }
24
29
  if (metaAttrString.includes('v2')) {
25
- return { framework: Frameworks.DOCUSAURUS, version: '2' };
30
+ return { framework: 'docusaurus', version: '2' };
26
31
  } else if (metaAttrString.includes('v1')) {
27
32
  console.warn(
28
33
  'WARNING: We detected Docusaurus version 1 but we only support scraping versions 2 and 3.'
29
34
  );
30
- return { framework: Frameworks.DOCUSAURUS, version: '1' };
35
+ return { framework: 'docusaurus', version: '1' };
31
36
  }
32
37
  }
33
38
 
34
39
  const isGitBook = $('.gitbook-root').length > 0;
35
40
  if (isGitBook) {
36
- return { framework: Frameworks.GITBOOK };
41
+ return { framework: 'gitbook' };
37
42
  }
38
43
 
39
44
  const isReadMe = $('meta[name="readme-deploy"]').length > 0;
40
45
  if (isReadMe) {
41
- return { framework: Frameworks.README };
46
+ return { framework: 'readme' };
42
47
  }
43
48
 
44
49
  const isIntercom = $("meta[name='intercom:trackingEvent']").length > 0;
45
50
  if (isIntercom) {
46
- return { framework: Frameworks.INTERCOM };
51
+ return { framework: 'intercom' };
47
52
  }
48
53
 
49
54
  return { framework: undefined };
@@ -1,3 +1,4 @@
1
+ import { Cheerio, CheerioAPI, Element } from 'cheerio';
1
2
  import path from 'path';
2
3
 
3
4
  import downloadImage, {
@@ -6,14 +7,13 @@ import downloadImage, {
6
7
  removeMetadataFromImageSrc,
7
8
  } from '../downloadImage.js';
8
9
 
9
- // To Do: Use CheerioElement instead of any when we bump the cheerio version
10
10
  export default async function downloadAllImages(
11
- $: any,
12
- content: any,
11
+ $: CheerioAPI,
12
+ content: Cheerio<Element>,
13
13
  origin: string,
14
14
  baseDir: string,
15
15
  overwrite: boolean,
16
- modifyFileName?: any
16
+ modifyFileName?: (fileName: string) => string
17
17
  ) {
18
18
  if (!baseDir) {
19
19
  console.debug('Skipping image downloading');
@@ -23,17 +23,17 @@ export default async function downloadAllImages(
23
23
  // We remove duplicates because some frameworks duplicate img tags
24
24
  // to show the image larger when clicked on.
25
25
  const imageSrcs = [
26
- ...new Set<string>(
26
+ ...new Set(
27
27
  content
28
28
  .find('img[src]')
29
- .map((i, image) => $(image).attr('src'))
29
+ .map((_, image) => $(image).attr('src'))
30
30
  .toArray()
31
31
  ),
32
32
  ];
33
33
 
34
34
  // Wait to all images to download before continuing
35
35
  const origToNewArray = await Promise.all(
36
- imageSrcs.map(async (imageSrc: string) => {
36
+ imageSrcs.map(async (imageSrc) => {
37
37
  if (!imageSrc || !isValidImageSrc(imageSrc)) {
38
38
  return {};
39
39
  }
@@ -7,7 +7,6 @@ import downloadImage, {
7
7
  } from '../downloadImage.js';
8
8
  import { getFileExtension } from '../util.js';
9
9
 
10
- // To Do: Use CheerioElement instead of any when we bump the cheerio version
11
10
  export default async function downloadLogoImage(
12
11
  imageSrc: string | undefined,
13
12
  imageBaseDir: string,
@@ -1,12 +1,8 @@
1
1
  export default function replaceImagePaths(
2
- origToWritePath: object,
2
+ origToWritePath: Record<string, string>,
3
3
  cliDir: string,
4
4
  markdown: string
5
5
  ) {
6
- if (origToWritePath == null) {
7
- return markdown;
8
- }
9
-
10
6
  // Change image paths to use the downloaded locations
11
7
  for (const [origHref, writePath] of Object.entries(origToWritePath)) {
12
8
  // Use relative paths within the folder we are in
@@ -1,65 +1,68 @@
1
1
  import axios from 'axios';
2
- import { ArgumentsCamelCase } from 'yargs';
3
2
 
4
3
  import { getHtmlWithPuppeteer } from '../browser.js';
5
- import { getHrefFromArgs } from '../util.js';
6
- import { detectFramework, Frameworks } from './detectFramework.js';
4
+ import { detectFramework, Framework, FrameworkHint, frameworks } from './detectFramework.js';
7
5
  import { scrapePage, ScrapePageFn } from './scrapePage.js';
8
6
  import { scrapeIntercomPage } from './site-scrapers/Intercom/scrapeIntercomPage.js';
9
7
  import { scrapeDocusaurusPage } from './site-scrapers/scrapeDocusaurusPage.js';
10
8
  import { scrapeGitBookPage } from './site-scrapers/scrapeGitBookPage.js';
11
9
  import { scrapeReadMePage } from './site-scrapers/scrapeReadMePage.js';
12
10
 
13
- function validateFramework(framework) {
11
+ function validateFramework(framework: Framework | undefined) {
14
12
  if (!framework) {
15
- console.log('Could not detect the framework automatically. Please use one of:');
16
- console.log('scrape-page-docusaurus');
17
- console.log('scrape-page-gitbook');
18
- console.log('scrape-page-readme');
19
- console.log('scrape-page-intercom');
13
+ console.log(
14
+ `Could not detect the framework automatically. Please use the -t flag to specify one of: ${frameworks.join(
15
+ ', '
16
+ )}`
17
+ );
20
18
  return process.exit(1);
21
19
  }
22
20
  }
23
21
 
24
22
  export async function scrapePageWrapper(
25
- argv: ArgumentsCamelCase,
23
+ url: string,
24
+ overwrite: boolean,
26
25
  scrapeFunc: ScrapePageFn,
27
26
  options?: { version?: string; puppeteer?: boolean }
28
27
  ) {
29
- const href = getHrefFromArgs(argv);
30
28
  let html: string;
31
29
  if (options?.puppeteer) {
32
- html = await getHtmlWithPuppeteer(href);
30
+ html = await getHtmlWithPuppeteer(url);
33
31
  } else {
34
- const res = await axios.get(href);
32
+ const res = await axios.get(url);
35
33
  html = res.data;
36
34
  }
37
- await scrapePage(scrapeFunc, href, html, !!argv.overwrite, options?.version);
35
+ await scrapePage(scrapeFunc, url, html, overwrite, options?.version);
38
36
  process.exit(0);
39
37
  }
40
38
 
41
- export async function scrapePageAutomatically(argv: any) {
42
- const href = getHrefFromArgs(argv);
43
- const res = await axios.get(href);
39
+ export async function scrapePageAutomatically(
40
+ url: string,
41
+ overwrite: boolean,
42
+ frameworkHint: FrameworkHint
43
+ ) {
44
+ const res = await axios.get(url);
44
45
  const html = res.data;
45
- const { framework, version } = detectFramework(html);
46
+ frameworkHint = frameworkHint.framework ? frameworkHint : detectFramework(html);
46
47
 
47
- validateFramework(framework);
48
+ validateFramework(frameworkHint.framework);
48
49
 
49
- console.log('Detected framework: ' + framework);
50
+ console.log('Detected framework: ' + frameworkHint.framework);
50
51
 
51
- switch (framework) {
52
- case Frameworks.DOCUSAURUS:
53
- await scrapePageWrapper(argv, scrapeDocusaurusPage, { version });
52
+ switch (frameworkHint.framework) {
53
+ case 'docusaurus':
54
+ await scrapePageWrapper(url, overwrite, scrapeDocusaurusPage, {
55
+ version: frameworkHint.version,
56
+ });
54
57
  break;
55
- case Frameworks.GITBOOK:
56
- await scrapePageWrapper(argv, scrapeGitBookPage, { puppeteer: true });
58
+ case 'gitbook':
59
+ await scrapePageWrapper(url, overwrite, scrapeGitBookPage, { puppeteer: true });
57
60
  break;
58
- case Frameworks.README:
59
- await scrapePageWrapper(argv, scrapeReadMePage);
61
+ case 'readme':
62
+ await scrapePageWrapper(url, overwrite, scrapeReadMePage);
60
63
  break;
61
- case Frameworks.INTERCOM:
62
- await scrapePageWrapper(argv, scrapeIntercomPage);
64
+ case 'intercom':
65
+ await scrapePageWrapper(url, overwrite, scrapeIntercomPage);
63
66
  break;
64
67
  }
65
68
  }