@cosmocoder/mcp-web-docs 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +368 -0
  3. package/build/__mocks__/embeddings.d.ts +17 -0
  4. package/build/__mocks__/embeddings.js +66 -0
  5. package/build/__mocks__/embeddings.js.map +1 -0
  6. package/build/config.d.ts +44 -0
  7. package/build/config.js +158 -0
  8. package/build/config.js.map +1 -0
  9. package/build/config.test.d.ts +1 -0
  10. package/build/config.test.js +165 -0
  11. package/build/config.test.js.map +1 -0
  12. package/build/crawler/auth.d.ts +128 -0
  13. package/build/crawler/auth.js +546 -0
  14. package/build/crawler/auth.js.map +1 -0
  15. package/build/crawler/auth.test.d.ts +1 -0
  16. package/build/crawler/auth.test.js +174 -0
  17. package/build/crawler/auth.test.js.map +1 -0
  18. package/build/crawler/base.d.ts +24 -0
  19. package/build/crawler/base.js +149 -0
  20. package/build/crawler/base.js.map +1 -0
  21. package/build/crawler/base.test.d.ts +1 -0
  22. package/build/crawler/base.test.js +234 -0
  23. package/build/crawler/base.test.js.map +1 -0
  24. package/build/crawler/browser-config.d.ts +2 -0
  25. package/build/crawler/browser-config.js +29 -0
  26. package/build/crawler/browser-config.js.map +1 -0
  27. package/build/crawler/browser-config.test.d.ts +1 -0
  28. package/build/crawler/browser-config.test.js +56 -0
  29. package/build/crawler/browser-config.test.js.map +1 -0
  30. package/build/crawler/cheerio.d.ts +11 -0
  31. package/build/crawler/cheerio.js +134 -0
  32. package/build/crawler/cheerio.js.map +1 -0
  33. package/build/crawler/chromium.d.ts +21 -0
  34. package/build/crawler/chromium.js +596 -0
  35. package/build/crawler/chromium.js.map +1 -0
  36. package/build/crawler/content-extractor-types.d.ts +25 -0
  37. package/build/crawler/content-extractor-types.js +2 -0
  38. package/build/crawler/content-extractor-types.js.map +1 -0
  39. package/build/crawler/content-extractors.d.ts +9 -0
  40. package/build/crawler/content-extractors.js +9 -0
  41. package/build/crawler/content-extractors.js.map +1 -0
  42. package/build/crawler/content-utils.d.ts +2 -0
  43. package/build/crawler/content-utils.js +22 -0
  44. package/build/crawler/content-utils.js.map +1 -0
  45. package/build/crawler/content-utils.test.d.ts +1 -0
  46. package/build/crawler/content-utils.test.js +99 -0
  47. package/build/crawler/content-utils.test.js.map +1 -0
  48. package/build/crawler/crawlee-crawler.d.ts +63 -0
  49. package/build/crawler/crawlee-crawler.js +342 -0
  50. package/build/crawler/crawlee-crawler.js.map +1 -0
  51. package/build/crawler/crawlee-crawler.test.d.ts +1 -0
  52. package/build/crawler/crawlee-crawler.test.js +280 -0
  53. package/build/crawler/crawlee-crawler.test.js.map +1 -0
  54. package/build/crawler/default-extractor.d.ts +4 -0
  55. package/build/crawler/default-extractor.js +26 -0
  56. package/build/crawler/default-extractor.js.map +1 -0
  57. package/build/crawler/default-extractor.test.d.ts +1 -0
  58. package/build/crawler/default-extractor.test.js +200 -0
  59. package/build/crawler/default-extractor.test.js.map +1 -0
  60. package/build/crawler/default.d.ts +11 -0
  61. package/build/crawler/default.js +138 -0
  62. package/build/crawler/default.js.map +1 -0
  63. package/build/crawler/docs-crawler.d.ts +26 -0
  64. package/build/crawler/docs-crawler.js +97 -0
  65. package/build/crawler/docs-crawler.js.map +1 -0
  66. package/build/crawler/docs-crawler.test.d.ts +1 -0
  67. package/build/crawler/docs-crawler.test.js +185 -0
  68. package/build/crawler/docs-crawler.test.js.map +1 -0
  69. package/build/crawler/factory.d.ts +6 -0
  70. package/build/crawler/factory.js +83 -0
  71. package/build/crawler/factory.js.map +1 -0
  72. package/build/crawler/github-pages-extractor.d.ts +4 -0
  73. package/build/crawler/github-pages-extractor.js +33 -0
  74. package/build/crawler/github-pages-extractor.js.map +1 -0
  75. package/build/crawler/github-pages-extractor.test.d.ts +1 -0
  76. package/build/crawler/github-pages-extractor.test.js +184 -0
  77. package/build/crawler/github-pages-extractor.test.js.map +1 -0
  78. package/build/crawler/github.d.ts +20 -0
  79. package/build/crawler/github.js +181 -0
  80. package/build/crawler/github.js.map +1 -0
  81. package/build/crawler/github.test.d.ts +1 -0
  82. package/build/crawler/github.test.js +326 -0
  83. package/build/crawler/github.test.js.map +1 -0
  84. package/build/crawler/puppeteer.d.ts +16 -0
  85. package/build/crawler/puppeteer.js +191 -0
  86. package/build/crawler/puppeteer.js.map +1 -0
  87. package/build/crawler/queue-manager.d.ts +43 -0
  88. package/build/crawler/queue-manager.js +169 -0
  89. package/build/crawler/queue-manager.js.map +1 -0
  90. package/build/crawler/queue-manager.test.d.ts +1 -0
  91. package/build/crawler/queue-manager.test.js +509 -0
  92. package/build/crawler/queue-manager.test.js.map +1 -0
  93. package/build/crawler/site-rules.d.ts +11 -0
  94. package/build/crawler/site-rules.js +104 -0
  95. package/build/crawler/site-rules.js.map +1 -0
  96. package/build/crawler/site-rules.test.d.ts +1 -0
  97. package/build/crawler/site-rules.test.js +139 -0
  98. package/build/crawler/site-rules.test.js.map +1 -0
  99. package/build/crawler/storybook-extractor.d.ts +34 -0
  100. package/build/crawler/storybook-extractor.js +767 -0
  101. package/build/crawler/storybook-extractor.js.map +1 -0
  102. package/build/crawler/storybook-extractor.test.d.ts +1 -0
  103. package/build/crawler/storybook-extractor.test.js +491 -0
  104. package/build/crawler/storybook-extractor.test.js.map +1 -0
  105. package/build/embeddings/fastembed.d.ts +25 -0
  106. package/build/embeddings/fastembed.js +188 -0
  107. package/build/embeddings/fastembed.js.map +1 -0
  108. package/build/embeddings/fastembed.test.d.ts +1 -0
  109. package/build/embeddings/fastembed.test.js +307 -0
  110. package/build/embeddings/fastembed.test.js.map +1 -0
  111. package/build/embeddings/openai.d.ts +8 -0
  112. package/build/embeddings/openai.js +56 -0
  113. package/build/embeddings/openai.js.map +1 -0
  114. package/build/embeddings/types.d.ts +4 -0
  115. package/build/embeddings/types.js +2 -0
  116. package/build/embeddings/types.js.map +1 -0
  117. package/build/index.d.ts +2 -0
  118. package/build/index.js +1007 -0
  119. package/build/index.js.map +1 -0
  120. package/build/index.test.d.ts +1 -0
  121. package/build/index.test.js +364 -0
  122. package/build/index.test.js.map +1 -0
  123. package/build/indexing/queue-manager.d.ts +36 -0
  124. package/build/indexing/queue-manager.js +86 -0
  125. package/build/indexing/queue-manager.js.map +1 -0
  126. package/build/indexing/queue-manager.test.d.ts +1 -0
  127. package/build/indexing/queue-manager.test.js +257 -0
  128. package/build/indexing/queue-manager.test.js.map +1 -0
  129. package/build/indexing/status.d.ts +39 -0
  130. package/build/indexing/status.js +207 -0
  131. package/build/indexing/status.js.map +1 -0
  132. package/build/indexing/status.test.d.ts +1 -0
  133. package/build/indexing/status.test.js +246 -0
  134. package/build/indexing/status.test.js.map +1 -0
  135. package/build/processor/content.d.ts +16 -0
  136. package/build/processor/content.js +286 -0
  137. package/build/processor/content.js.map +1 -0
  138. package/build/processor/content.test.d.ts +1 -0
  139. package/build/processor/content.test.js +369 -0
  140. package/build/processor/content.test.js.map +1 -0
  141. package/build/processor/markdown.d.ts +11 -0
  142. package/build/processor/markdown.js +256 -0
  143. package/build/processor/markdown.js.map +1 -0
  144. package/build/processor/markdown.test.d.ts +1 -0
  145. package/build/processor/markdown.test.js +312 -0
  146. package/build/processor/markdown.test.js.map +1 -0
  147. package/build/processor/metadata-parser.d.ts +37 -0
  148. package/build/processor/metadata-parser.js +245 -0
  149. package/build/processor/metadata-parser.js.map +1 -0
  150. package/build/processor/metadata-parser.test.d.ts +1 -0
  151. package/build/processor/metadata-parser.test.js +357 -0
  152. package/build/processor/metadata-parser.test.js.map +1 -0
  153. package/build/processor/processor.d.ts +8 -0
  154. package/build/processor/processor.js +190 -0
  155. package/build/processor/processor.js.map +1 -0
  156. package/build/processor/processor.test.d.ts +1 -0
  157. package/build/processor/processor.test.js +357 -0
  158. package/build/processor/processor.test.js.map +1 -0
  159. package/build/rag/cache.d.ts +10 -0
  160. package/build/rag/cache.js +10 -0
  161. package/build/rag/cache.js.map +1 -0
  162. package/build/rag/code-generator.d.ts +11 -0
  163. package/build/rag/code-generator.js +30 -0
  164. package/build/rag/code-generator.js.map +1 -0
  165. package/build/rag/context-assembler.d.ts +23 -0
  166. package/build/rag/context-assembler.js +113 -0
  167. package/build/rag/context-assembler.js.map +1 -0
  168. package/build/rag/docs-search.d.ts +55 -0
  169. package/build/rag/docs-search.js +380 -0
  170. package/build/rag/docs-search.js.map +1 -0
  171. package/build/rag/pipeline.d.ts +26 -0
  172. package/build/rag/pipeline.js +91 -0
  173. package/build/rag/pipeline.js.map +1 -0
  174. package/build/rag/query-processor.d.ts +14 -0
  175. package/build/rag/query-processor.js +57 -0
  176. package/build/rag/query-processor.js.map +1 -0
  177. package/build/rag/reranker.d.ts +55 -0
  178. package/build/rag/reranker.js +210 -0
  179. package/build/rag/reranker.js.map +1 -0
  180. package/build/rag/response-generator.d.ts +20 -0
  181. package/build/rag/response-generator.js +101 -0
  182. package/build/rag/response-generator.js.map +1 -0
  183. package/build/rag/retriever.d.ts +19 -0
  184. package/build/rag/retriever.js +111 -0
  185. package/build/rag/retriever.js.map +1 -0
  186. package/build/rag/validator.d.ts +22 -0
  187. package/build/rag/validator.js +128 -0
  188. package/build/rag/validator.js.map +1 -0
  189. package/build/rag/version-manager.d.ts +23 -0
  190. package/build/rag/version-manager.js +98 -0
  191. package/build/rag/version-manager.js.map +1 -0
  192. package/build/setupTests.d.ts +4 -0
  193. package/build/setupTests.js +50 -0
  194. package/build/setupTests.js.map +1 -0
  195. package/build/storage/storage.d.ts +38 -0
  196. package/build/storage/storage.js +700 -0
  197. package/build/storage/storage.js.map +1 -0
  198. package/build/storage/storage.test.d.ts +1 -0
  199. package/build/storage/storage.test.js +338 -0
  200. package/build/storage/storage.test.js.map +1 -0
  201. package/build/types/rag.d.ts +27 -0
  202. package/build/types/rag.js +2 -0
  203. package/build/types/rag.js.map +1 -0
  204. package/build/types.d.ts +120 -0
  205. package/build/types.js +2 -0
  206. package/build/types.js.map +1 -0
  207. package/build/util/content-utils.d.ts +31 -0
  208. package/build/util/content-utils.js +120 -0
  209. package/build/util/content-utils.js.map +1 -0
  210. package/build/util/content.d.ts +1 -0
  211. package/build/util/content.js +16 -0
  212. package/build/util/content.js.map +1 -0
  213. package/build/util/docs.d.ts +1 -0
  214. package/build/util/docs.js +26 -0
  215. package/build/util/docs.js.map +1 -0
  216. package/build/util/docs.test.d.ts +1 -0
  217. package/build/util/docs.test.js +49 -0
  218. package/build/util/docs.test.js.map +1 -0
  219. package/build/util/favicon.d.ts +6 -0
  220. package/build/util/favicon.js +88 -0
  221. package/build/util/favicon.js.map +1 -0
  222. package/build/util/favicon.test.d.ts +1 -0
  223. package/build/util/favicon.test.js +140 -0
  224. package/build/util/favicon.test.js.map +1 -0
  225. package/build/util/logger.d.ts +17 -0
  226. package/build/util/logger.js +72 -0
  227. package/build/util/logger.js.map +1 -0
  228. package/build/util/logger.test.d.ts +1 -0
  229. package/build/util/logger.test.js +46 -0
  230. package/build/util/logger.test.js.map +1 -0
  231. package/build/util/security.d.ts +312 -0
  232. package/build/util/security.js +719 -0
  233. package/build/util/security.js.map +1 -0
  234. package/build/util/security.test.d.ts +1 -0
  235. package/build/util/security.test.js +524 -0
  236. package/build/util/security.test.js.map +1 -0
  237. package/build/util/site-detector.d.ts +22 -0
  238. package/build/util/site-detector.js +42 -0
  239. package/build/util/site-detector.js.map +1 -0
  240. package/package.json +112 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"site-rules.js","sourceRoot":"","sources":["../../src/crawler/site-rules.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAU5D,MAAM,CAAC,MAAM,SAAS,GAAwB;IAC5C;QACE,IAAI,EAAE,WAAW;QACjB,SAAS,EAAE,iBAAiB,CAAC,SAAS;QACtC,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;YACrB,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;gBACxB,OAAO,CAAC,CAAC,CACP,QAAQ,CAAC,aAAa,CAAC,kDAAkD,CAAC;oBAC1E,QAAQ,CAAC,aAAa,CAAC,gCAAgC,CAAC;oBACxD,QAAQ,CAAC,OAAO,EAAE,QAAQ,CAAC,aAAa,CAAC;oBACzC,QAAQ,CAAC,OAAO,EAAE,QAAQ,CAAC,cAAc,CAAC;oBACzC,MAA4D,CAAC,wBAAwB,CACvF,CAAC;YACJ,CAAC,CAAC,CAAC;QACL,CAAC;QACD,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE,EAAE;YAC3B,MAAM,OAAO,CAAC,GAAG,CAAC;gBAChB,IAAI,CAAC,gBAAgB,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,0CAA0C,CAAC,CAAC;gBAC1H,IAAI;qBACD,eAAe,CAAC,6DAA6D,EAAE;oBAC9E,OAAO,EAAE,IAAI;iBACd,CAAC;qBACD,KAAK,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,yCAAyC,CAAC,CAAC;aACrE,CAAC,CAAC;YAEH,+BAA+B;YAC/B,MAAM,IAAI,CAAC,eAAe,CAAC,oBAAoB,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC,CAAC;YAE/G,mCAAmC;YACnC,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;gBACvB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,kCAAkC,CAAC,CAAC,CAAC;gBAC1F,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,EAAE,CAAE,MAA4B,CAAC,KAAK,EAAE,CAAC,CAAC;YACrE,CAAC,CAAC,CAAC;YAEH,qCAAqC;YACrC,MAAM,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;YAE/B,+CAA+C;YAC/C,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;gBACvB,MAAM,aAAa,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,yBAAyB,CAAC,CAAC,CAAC;gBACvF,aAAa,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,EAAE,CAAE,MAA4B,CAAC,KAAK,EAAE,CAAC,CAAC;YAC3E,CAAC,CAAC,CAAC;YAEH,0DAA0D;YAC1D,MAAM,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;YAEhC,8DAA8D;YAC9D,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,IAAI,EAAE;gBAC7B,0BAA0B;gBAC1B,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAsB,CAAC;gBACrE,MAAM,UAAU,GAAG,MAAM,EAAE,eAAe,IAAI,QAAQ,CAAC;gBAEvD,uCAAuC;gBACvC,MAAM,eAAe,GAAG,UAAU,CAAC,aAAa,CAAC,mCAAmC,CAAC,CAAC;gBACtF,IAAI,eAAe,EAAE,CAAC;oBACpB,eAAe,CAAC,QAAQ,CAAC,CAAC,EAAE,eAAe,CAAC,YAAY,CAAC,CAAC;oBAC1D,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;oBACzD,eAAe,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBACjC,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,uDAAuD;YACvD,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;gBACvB,MAAM,eAAe,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CACrF,GAAG,CAAC,WAAW,EAAE,WAAW,EAAE,CAAC,QAAQ,CAAC,WAAW,CAAC,CACrD,CAAC;gBACF,eAAe,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,mBAAmB;YAChF,CAAC,CAAC,CAAC;YAEH,MAAM,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;YAE/B,qDAAqD;YACrD,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,IAAI,EAAE;gBAC7B,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAsB,CAAC;gBACrE,MAAM,UAAU,GAAG,MAAM,EAAE,eAAe,IAAI,QAAQ,CAAC;gBAEvD,qCAAqC;gBACrC,MAAM,UAAU,GAAG,UAAU,CAAC,gBAAgB,CAC5C,sDAAsD,GAAG,mDAAmD,CAC7G,CAAC;gBACF,UAAU,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAE,GAAyB,CAAC,KAAK,EAAE,CAAC,CAAC;gBAEhE,+DAA+D;gBAC/D,MAAM,YAAY,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAC3E,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,WAAW,EAAE,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,WAAW,EAAE,QAAQ,CAAC,MAAM,CAAC,CAChF,CAAC;gBACF,KAAK,MAAM,GAAG,IAAI,YAAY,EAAE,CAAC;oBAC/B,GAAG,CAAC,KAAK,EAAE,CAAC;oBACZ,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;gBAC3D,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,MAAM,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;YAE/B,8CAA8C;YAC9C,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;gBACzC,OAAO,QAAQ,CAAC,gBAAgB,CAAC,0EAA0E,CAAC,CAAC,MAAM,CAAC;YACtH,CAAC,CAAC,CAAC;YACH,GAAG,CAAC,KAAK,CAAC,SAAS,SAAS,gCAAgC,CAAC,CAAC;QAChE,CAAC;QACD,aAAa,EAAE;YACb,iBAAiB;YACjB,0BAA0B;YAC1B,2BAA2B;YAC3B,8BAA8B;YAC9B,2BAA2B;YAC3B,kBAAkB;SACnB;KACF;IACD;QACE,IAAI,EAAE,QAAQ;QACd,SAAS,EAAE,iBAAiB,CAAC,MAAM;QACnC,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE;YACrB,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;gBACxB,OAAO,CACL,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,QAAQ,CAAC,aAAa,CAAC,4CAA4C,CAAC,KAAK,IAAI,CAChI,CAAC;YACJ,CAAC,CAAC,CAAC;QACL,CAAC;KACF;IACD;QACE,IAAI,EAAE,SAAS;QACf,SAAS,EAAE,iBAAiB,CAAC,OAAO;QACpC,MAAM,EAAE,KAAK,IAAI,EAAE,CAAC,IAAI;KACzB;CACF,CAAC"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,139 @@
1
+ import { siteRules } from './site-rules.js';
2
+ describe('Site Rules', () => {
3
+ const createMockPage = (evaluateResult = false) => {
4
+ return {
5
+ evaluate: vi.fn().mockResolvedValue(evaluateResult),
6
+ waitForLoadState: vi.fn().mockResolvedValue(undefined),
7
+ waitForSelector: vi.fn().mockResolvedValue(undefined),
8
+ waitForTimeout: vi.fn().mockResolvedValue(undefined),
9
+ };
10
+ };
11
+ const mockLog = {
12
+ info: vi.fn(),
13
+ debug: vi.fn(),
14
+ warning: vi.fn(),
15
+ error: vi.fn(),
16
+ };
17
+ describe('storybook rule', () => {
18
+ const storybookRule = siteRules.find((r) => r.type === 'storybook');
19
+ it('should detect storybook by #storybook-root element', async () => {
20
+ const page = createMockPage(true);
21
+ const result = await storybookRule.detect(page);
22
+ expect(result).toBe(true);
23
+ expect(page.evaluate).toHaveBeenCalled();
24
+ });
25
+ it('should not detect when no storybook elements found', async () => {
26
+ const page = createMockPage(false);
27
+ const result = await storybookRule.detect(page);
28
+ expect(result).toBe(false);
29
+ });
30
+ it('should have prepare function for storybook', () => {
31
+ expect(storybookRule.prepare).toBeDefined();
32
+ });
33
+ it('should have link selectors for storybook', () => {
34
+ expect(storybookRule.linkSelectors).toBeDefined();
35
+ expect(storybookRule.linkSelectors).toContain('.sidebar-item a');
36
+ expect(storybookRule.linkSelectors).toContain('[data-nodetype="story"] a');
37
+ });
38
+ it('should expand sidebar sections in prepare', async () => {
39
+ const page = {
40
+ evaluate: vi.fn().mockResolvedValue(5),
41
+ waitForLoadState: vi.fn().mockResolvedValue(undefined),
42
+ waitForSelector: vi.fn().mockResolvedValue(undefined),
43
+ waitForTimeout: vi.fn().mockResolvedValue(undefined),
44
+ };
45
+ await storybookRule.prepare?.(page, mockLog);
46
+ expect(page.waitForLoadState).toHaveBeenCalledWith('networkidle', { timeout: 5000 });
47
+ expect(page.evaluate).toHaveBeenCalled();
48
+ });
49
+ it('should handle timeout errors gracefully in prepare', async () => {
50
+ const page = {
51
+ evaluate: vi.fn().mockResolvedValue(0),
52
+ waitForLoadState: vi.fn().mockRejectedValue(new Error('Timeout')),
53
+ waitForSelector: vi.fn().mockRejectedValue(new Error('Timeout')),
54
+ waitForTimeout: vi.fn().mockResolvedValue(undefined),
55
+ };
56
+ // Should not throw
57
+ await expect(storybookRule.prepare?.(page, mockLog)).resolves.toBeUndefined();
58
+ });
59
+ it('should use StorybookExtractor', () => {
60
+ expect(storybookRule.extractor.constructor.name).toBe('StorybookExtractor');
61
+ });
62
+ });
63
+ describe('github rule', () => {
64
+ const githubRule = siteRules.find((r) => r.type === 'github');
65
+ it('should detect GitHub Pages sites', async () => {
66
+ const page = createMockPage(true);
67
+ const result = await githubRule.detect(page);
68
+ expect(result).toBe(true);
69
+ });
70
+ it('should not detect non-GitHub sites', async () => {
71
+ const page = createMockPage(false);
72
+ const result = await githubRule.detect(page);
73
+ expect(result).toBe(false);
74
+ });
75
+ it('should use GitHubPagesExtractor', () => {
76
+ expect(githubRule.extractor.constructor.name).toBe('GitHubPagesExtractor');
77
+ });
78
+ it('should not have prepare function', () => {
79
+ expect(githubRule.prepare).toBeUndefined();
80
+ });
81
+ it('should not have link selectors', () => {
82
+ expect(githubRule.linkSelectors).toBeUndefined();
83
+ });
84
+ });
85
+ describe('default rule', () => {
86
+ const defaultRule = siteRules.find((r) => r.type === 'default');
87
+ it('should always detect (fallback)', async () => {
88
+ const page = createMockPage(false);
89
+ const result = await defaultRule.detect(page);
90
+ expect(result).toBe(true);
91
+ });
92
+ it('should use DefaultExtractor', () => {
93
+ expect(defaultRule.extractor.constructor.name).toBe('DefaultExtractor');
94
+ });
95
+ it('should not have prepare function', () => {
96
+ expect(defaultRule.prepare).toBeUndefined();
97
+ });
98
+ it('should not have link selectors', () => {
99
+ expect(defaultRule.linkSelectors).toBeUndefined();
100
+ });
101
+ });
102
+ describe('rule ordering', () => {
103
+ it('should have storybook rule before default', () => {
104
+ const storybookIndex = siteRules.findIndex((r) => r.type === 'storybook');
105
+ const defaultIndex = siteRules.findIndex((r) => r.type === 'default');
106
+ expect(storybookIndex).toBeLessThan(defaultIndex);
107
+ });
108
+ it('should have github rule before default', () => {
109
+ const githubIndex = siteRules.findIndex((r) => r.type === 'github');
110
+ const defaultIndex = siteRules.findIndex((r) => r.type === 'default');
111
+ expect(githubIndex).toBeLessThan(defaultIndex);
112
+ });
113
+ it('should have default rule last', () => {
114
+ const defaultIndex = siteRules.findIndex((r) => r.type === 'default');
115
+ expect(defaultIndex).toBe(siteRules.length - 1);
116
+ });
117
+ });
118
+ describe('all rules', () => {
119
+ it('should have extractors for all rules', () => {
120
+ for (const rule of siteRules) {
121
+ expect(rule.extractor).toBeDefined();
122
+ expect(rule.extractor.extractContent).toBeDefined();
123
+ }
124
+ });
125
+ it('should have detect functions for all rules', () => {
126
+ for (const rule of siteRules) {
127
+ expect(rule.detect).toBeDefined();
128
+ expect(typeof rule.detect).toBe('function');
129
+ }
130
+ });
131
+ it('should have type strings for all rules', () => {
132
+ for (const rule of siteRules) {
133
+ expect(rule.type).toBeDefined();
134
+ expect(typeof rule.type).toBe('string');
135
+ }
136
+ });
137
+ });
138
+ });
139
+ //# sourceMappingURL=site-rules.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"site-rules.test.js","sourceRoot":"","sources":["../../src/crawler/site-rules.test.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAE5C,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,MAAM,cAAc,GAAG,CAAC,iBAA0B,KAAK,EAAQ,EAAE;QAC/D,OAAO;YACL,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,cAAc,CAAC;YACnD,gBAAgB,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,SAAS,CAAC;YACtD,eAAe,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,SAAS,CAAC;YACrD,cAAc,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,SAAS,CAAC;SAClC,CAAC;IACvB,CAAC,CAAC;IAEF,MAAM,OAAO,GAAQ;QACnB,IAAI,EAAE,EAAE,CAAC,EAAE,EAAE;QACb,KAAK,EAAE,EAAE,CAAC,EAAE,EAAE;QACd,OAAO,EAAE,EAAE,CAAC,EAAE,EAAE;QAChB,KAAK,EAAE,EAAE,CAAC,EAAE,EAAE;KACG,CAAC;IAEpB,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;QAC9B,MAAM,aAAa,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAE,CAAC;QAErE,EAAE,CAAC,oDAAoD,EAAE,KAAK,IAAI,EAAE;YAClE,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;YAElC,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC1B,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,gBAAgB,EAAE,CAAC;QAC3C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oDAAoD,EAAE,KAAK,IAAI,EAAE;YAClE,MAAM,IAAI,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YAEnC,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAEhD,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;YACpD,MAAM,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QAC9C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;YAClD,MAAM,CAAC,aAAa,CAAC,aAAa,CAAC,CAAC,WAAW,EAAE,CAAC;YAClD,MAAM,CAAC,aAAa,CAAC,aAAa,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;YACjE,MAAM,CAAC,aAAa,CAAC,aAAa,CAAC,CAAC,SAAS,CAAC,2BAA2B,CAAC,CAAC;QAC7E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,IAAI,GAAG;gBACX,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,CAAC,CAAC;gBACtC,gBAAgB,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,SAAS,CAAC;gBACtD,eAAe,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,SAAS,CAAC;gBACrD,cAAc,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,SAAS,CAAC;aAClC,CAAC;YAErB,MAAM,aAAa,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAE7C,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,oBAAoB,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;YACrF,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,gBAAgB,EAAE,CAAC;QAC3C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oDAAoD,EAAE,KAAK,IAAI,EAAE;YAClE,MAAM,IAAI,GAAG;gBACX,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,CAAC,CAAC;gBACtC,gBAAgB,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC;gBACjE,eAAe,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC;gBAChE,cAAc,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,SAAS,CAAC;aAClC,CAAC;YAErB,mBAAmB;YACnB,MAAM,MAAM,CAAC,aAAa,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,aAAa,EAAE,CAAC;QAChF,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACvC,MAAM,CAAC,aAAa,CAAC,SAAS,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;QAC9E,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,aAAa,EAAE,GAAG,EAAE;QAC3B,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAE,CAAC;QAE/D,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;YAChD,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;YAElC,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAE7C,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;YAClD,MAAM,IAAI,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YAEnC,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAE7C,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;YACzC,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;QAC7E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC1C,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,aAAa,EAAE,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;YACxC,MAAM,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC,aAAa,EAAE,CAAC;QACnD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,cAAc,EAAE,GAAG,EAAE;QAC5B,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAE,CAAC;QAEjE,EAAE,CAAC,iCAAiC,EAAE,KAAK,IAAI,EAAE;YAC/C,MAAM,IAAI,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YAEnC,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAE9C,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACrC,MAAM,CAAC,WAAW,CAAC,SAAS,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAC1E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC1C,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,aAAa,EAAE,CAAC;QAC9C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;YACxC,MAAM,CAAC,WAAW,CAAC,aAAa,CAAC,CAAC,aAAa,EAAE,CAAC;QACpD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;QAC7B,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;YACnD,MAAM,cAAc,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAC,CAAC;YAC1E,MAAM,YAAY,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC;YAEtE,MAAM,CAAC,cAAc,CAAC,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;QACpD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;YAChD,MAAM,WAAW,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;YACpE,MAAM,YAAY,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC;YAEtE,MAAM,CAAC,WAAW,CAAC,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACvC,MAAM,YAAY,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC;YAEtE,MAAM,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,WAAW,EAAE,GAAG,EAAE;QACzB,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;YAC9C,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;gBAC7B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;gBACrC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,WAAW,EAAE,CAAC;YACtD,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;YACpD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;gBAC7B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;gBAClC,MAAM,CAAC,OAAO,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC9C,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;YAChD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;gBAC7B,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC;gBAChC,MAAM,CAAC,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC1C,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,34 @@
1
+ import { ContentExtractor, ExtractedContent } from './content-extractor-types.js';
2
+ export declare class StorybookExtractor implements ContentExtractor {
3
+ private addContentToSections;
4
+ private isElementVisible;
5
+ private formatCodeBlock;
6
+ private extractLinks;
7
+ private processSection;
8
+ private processSectionContent;
9
+ private processPropsTable;
10
+ /**
11
+ * Find the closest heading element before the given element
12
+ * This helps detect if a Props table already has a component-prefixed heading
13
+ */
14
+ private findClosestPrecedingHeading;
15
+ /**
16
+ * Click all "Show more" buttons in a props table to expand type values
17
+ */
18
+ private expandAllTypeValues;
19
+ /**
20
+ * Extract type values from a table cell, handling Storybook's various DOM structures
21
+ */
22
+ private extractTypeFromCell;
23
+ private waitForSidebar;
24
+ private expandSidebarSections;
25
+ private waitForStorybookContent;
26
+ private processTableContent;
27
+ private processListContent;
28
+ /**
29
+ * Extract type annotations from inline code examples and prop documentation
30
+ */
31
+ private extractTypeAnnotations;
32
+ private waitForStorybookAPI;
33
+ extractContent(document: Document): Promise<ExtractedContent>;
34
+ }