@mseep/core 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. package/CHANGELOG.md +285 -0
  2. package/LICENSE +21 -0
  3. package/README.ja.md +14 -0
  4. package/README.ko.md +14 -0
  5. package/README.md +227 -0
  6. package/README.pt-BR.md +14 -0
  7. package/README.skills.md +50 -0
  8. package/README.uk.md +14 -0
  9. package/README.zh-CN.md +14 -0
  10. package/bin/booklib-mcp.js +458 -0
  11. package/bin/booklib.js +2394 -0
  12. package/bin/skills.cjs +1292 -0
  13. package/community/registry.json +1616 -0
  14. package/hooks/hooks.json +52 -0
  15. package/hooks/posttooluse-capture.mjs +67 -0
  16. package/hooks/posttooluse-contradict.mjs +76 -0
  17. package/hooks/posttooluse-imports.mjs +67 -0
  18. package/hooks/pretooluse-inject.mjs +82 -0
  19. package/hooks/suggest.js +153 -0
  20. package/lib/agent-detector.js +96 -0
  21. package/lib/config-loader.js +39 -0
  22. package/lib/conflict-resolver.js +148 -0
  23. package/lib/connectors/context7.js +167 -0
  24. package/lib/connectors/github.js +223 -0
  25. package/lib/connectors/local.js +120 -0
  26. package/lib/connectors/notion.js +436 -0
  27. package/lib/connectors/web.js +134 -0
  28. package/lib/context-builder.js +574 -0
  29. package/lib/discovery-engine.js +298 -0
  30. package/lib/doctor/hook-installer.js +83 -0
  31. package/lib/doctor/usage-tracker.js +87 -0
  32. package/lib/engine/auditor.js +103 -0
  33. package/lib/engine/auto-linker.js +177 -0
  34. package/lib/engine/bm25-index.js +178 -0
  35. package/lib/engine/capture.js +120 -0
  36. package/lib/engine/context-map.js +641 -0
  37. package/lib/engine/corrections.js +194 -0
  38. package/lib/engine/decision-checker.js +203 -0
  39. package/lib/engine/doctor.js +207 -0
  40. package/lib/engine/embedding-provider.js +72 -0
  41. package/lib/engine/gap-detector.js +138 -0
  42. package/lib/engine/gap-resolver.js +135 -0
  43. package/lib/engine/graph-injector.js +137 -0
  44. package/lib/engine/graph-search.js +183 -0
  45. package/lib/engine/graph.js +170 -0
  46. package/lib/engine/handoff.js +411 -0
  47. package/lib/engine/import-checker.js +249 -0
  48. package/lib/engine/import-parser.js +145 -0
  49. package/lib/engine/indexer.js +334 -0
  50. package/lib/engine/lookup-priority.js +15 -0
  51. package/lib/engine/parser.js +257 -0
  52. package/lib/engine/principle-extractor.js +116 -0
  53. package/lib/engine/project-analyzer.js +353 -0
  54. package/lib/engine/query-expander.js +42 -0
  55. package/lib/engine/reasoning-modes.js +353 -0
  56. package/lib/engine/registries.js +524 -0
  57. package/lib/engine/reranker.js +45 -0
  58. package/lib/engine/rrf.js +59 -0
  59. package/lib/engine/scanner.js +151 -0
  60. package/lib/engine/searcher.js +223 -0
  61. package/lib/engine/session-coordinator.js +291 -0
  62. package/lib/engine/session-manager.js +375 -0
  63. package/lib/engine/source-detector.js +240 -0
  64. package/lib/engine/source-manager.js +142 -0
  65. package/lib/engine/structured-response.js +47 -0
  66. package/lib/engine/synthesis-templates.js +364 -0
  67. package/lib/installer.js +70 -0
  68. package/lib/instinct-block.js +21 -0
  69. package/lib/mcp-config-writer.js +107 -0
  70. package/lib/paths.js +62 -0
  71. package/lib/project-initializer.js +856 -0
  72. package/lib/registry/skills.js +102 -0
  73. package/lib/registry-searcher.js +107 -0
  74. package/lib/rules/rules-manager.js +169 -0
  75. package/lib/skill-fetcher.js +333 -0
  76. package/lib/well-known-builder.js +74 -0
  77. package/lib/wizard/index.js +1389 -0
  78. package/lib/wizard/integration-detector.js +41 -0
  79. package/lib/wizard/project-detector.js +146 -0
  80. package/lib/wizard/prompt.js +221 -0
  81. package/lib/wizard/registry-embeddings.js +107 -0
  82. package/lib/wizard/skill-recommender.js +69 -0
  83. package/package.json +70 -0
  84. package/skills/animation-at-work/SKILL.md +270 -0
  85. package/skills/animation-at-work/assets/example_asset.txt +1 -0
  86. package/skills/animation-at-work/evals/evals.json +44 -0
  87. package/skills/animation-at-work/evals/results.json +13 -0
  88. package/skills/animation-at-work/examples/after.md +64 -0
  89. package/skills/animation-at-work/examples/before.md +35 -0
  90. package/skills/animation-at-work/references/api_reference.md +369 -0
  91. package/skills/animation-at-work/references/review-checklist.md +79 -0
  92. package/skills/animation-at-work/scripts/audit_animations.py +295 -0
  93. package/skills/animation-at-work/scripts/example.py +1 -0
  94. package/skills/booklib-mcp-guide/SKILL.md +129 -0
  95. package/skills/booklib-mcp-guide/evals/evals.json +37 -0
  96. package/skills/booklib-mcp-guide/examples/after.md +34 -0
  97. package/skills/booklib-mcp-guide/examples/before.md +27 -0
  98. package/skills/booklib-mcp-guide/references/tool-catalog.md +9 -0
  99. package/skills/clean-code-reviewer/SKILL.md +444 -0
  100. package/skills/clean-code-reviewer/audit.json +35 -0
  101. package/skills/clean-code-reviewer/evals/evals.json +185 -0
  102. package/skills/clean-code-reviewer/evals/results.json +13 -0
  103. package/skills/clean-code-reviewer/examples/after.md +48 -0
  104. package/skills/clean-code-reviewer/examples/before.md +33 -0
  105. package/skills/clean-code-reviewer/references/api_reference.md +158 -0
  106. package/skills/clean-code-reviewer/references/practices-catalog.md +282 -0
  107. package/skills/clean-code-reviewer/references/review-checklist.md +254 -0
  108. package/skills/clean-code-reviewer/scripts/pre-review.py +206 -0
  109. package/skills/data-intensive-patterns/SKILL.md +267 -0
  110. package/skills/data-intensive-patterns/assets/example_asset.txt +1 -0
  111. package/skills/data-intensive-patterns/evals/evals.json +54 -0
  112. package/skills/data-intensive-patterns/evals/results.json +13 -0
  113. package/skills/data-intensive-patterns/examples/after.md +61 -0
  114. package/skills/data-intensive-patterns/examples/before.md +38 -0
  115. package/skills/data-intensive-patterns/references/api_reference.md +34 -0
  116. package/skills/data-intensive-patterns/references/patterns-catalog.md +551 -0
  117. package/skills/data-intensive-patterns/references/review-checklist.md +193 -0
  118. package/skills/data-intensive-patterns/scripts/adr.py +213 -0
  119. package/skills/data-intensive-patterns/scripts/example.py +1 -0
  120. package/skills/data-pipelines/SKILL.md +259 -0
  121. package/skills/data-pipelines/assets/example_asset.txt +1 -0
  122. package/skills/data-pipelines/evals/evals.json +45 -0
  123. package/skills/data-pipelines/evals/results.json +13 -0
  124. package/skills/data-pipelines/examples/after.md +97 -0
  125. package/skills/data-pipelines/examples/before.md +37 -0
  126. package/skills/data-pipelines/references/api_reference.md +301 -0
  127. package/skills/data-pipelines/references/review-checklist.md +181 -0
  128. package/skills/data-pipelines/scripts/example.py +1 -0
  129. package/skills/data-pipelines/scripts/new_pipeline.py +444 -0
  130. package/skills/design-patterns/SKILL.md +271 -0
  131. package/skills/design-patterns/assets/example_asset.txt +1 -0
  132. package/skills/design-patterns/evals/evals.json +46 -0
  133. package/skills/design-patterns/evals/results.json +13 -0
  134. package/skills/design-patterns/examples/after.md +52 -0
  135. package/skills/design-patterns/examples/before.md +29 -0
  136. package/skills/design-patterns/references/api_reference.md +1 -0
  137. package/skills/design-patterns/references/patterns-catalog.md +726 -0
  138. package/skills/design-patterns/references/review-checklist.md +173 -0
  139. package/skills/design-patterns/scripts/example.py +1 -0
  140. package/skills/design-patterns/scripts/scaffold.py +807 -0
  141. package/skills/domain-driven-design/SKILL.md +142 -0
  142. package/skills/domain-driven-design/assets/example_asset.txt +1 -0
  143. package/skills/domain-driven-design/evals/evals.json +48 -0
  144. package/skills/domain-driven-design/evals/results.json +13 -0
  145. package/skills/domain-driven-design/examples/after.md +80 -0
  146. package/skills/domain-driven-design/examples/before.md +43 -0
  147. package/skills/domain-driven-design/references/api_reference.md +1 -0
  148. package/skills/domain-driven-design/references/patterns-catalog.md +545 -0
  149. package/skills/domain-driven-design/references/review-checklist.md +158 -0
  150. package/skills/domain-driven-design/scripts/example.py +1 -0
  151. package/skills/domain-driven-design/scripts/scaffold.py +421 -0
  152. package/skills/effective-java/SKILL.md +227 -0
  153. package/skills/effective-java/assets/example_asset.txt +1 -0
  154. package/skills/effective-java/evals/evals.json +46 -0
  155. package/skills/effective-java/evals/results.json +13 -0
  156. package/skills/effective-java/examples/after.md +83 -0
  157. package/skills/effective-java/examples/before.md +37 -0
  158. package/skills/effective-java/references/api_reference.md +1 -0
  159. package/skills/effective-java/references/items-catalog.md +955 -0
  160. package/skills/effective-java/references/review-checklist.md +216 -0
  161. package/skills/effective-java/scripts/checkstyle_setup.py +211 -0
  162. package/skills/effective-java/scripts/example.py +1 -0
  163. package/skills/effective-kotlin/SKILL.md +271 -0
  164. package/skills/effective-kotlin/assets/example_asset.txt +1 -0
  165. package/skills/effective-kotlin/audit.json +29 -0
  166. package/skills/effective-kotlin/evals/evals.json +45 -0
  167. package/skills/effective-kotlin/evals/results.json +13 -0
  168. package/skills/effective-kotlin/examples/after.md +36 -0
  169. package/skills/effective-kotlin/examples/before.md +38 -0
  170. package/skills/effective-kotlin/references/api_reference.md +1 -0
  171. package/skills/effective-kotlin/references/practices-catalog.md +1228 -0
  172. package/skills/effective-kotlin/references/review-checklist.md +126 -0
  173. package/skills/effective-kotlin/scripts/example.py +1 -0
  174. package/skills/effective-python/SKILL.md +441 -0
  175. package/skills/effective-python/evals/evals.json +44 -0
  176. package/skills/effective-python/evals/results.json +13 -0
  177. package/skills/effective-python/examples/after.md +56 -0
  178. package/skills/effective-python/examples/before.md +40 -0
  179. package/skills/effective-python/ref-01-pythonic-thinking.md +202 -0
  180. package/skills/effective-python/ref-02-lists-and-dicts.md +146 -0
  181. package/skills/effective-python/ref-03-functions.md +186 -0
  182. package/skills/effective-python/ref-04-comprehensions-generators.md +211 -0
  183. package/skills/effective-python/ref-05-classes-interfaces.md +188 -0
  184. package/skills/effective-python/ref-06-metaclasses-attributes.md +209 -0
  185. package/skills/effective-python/ref-07-concurrency.md +213 -0
  186. package/skills/effective-python/ref-08-robustness-performance.md +248 -0
  187. package/skills/effective-python/ref-09-testing-debugging.md +253 -0
  188. package/skills/effective-python/ref-10-collaboration.md +175 -0
  189. package/skills/effective-python/references/api_reference.md +218 -0
  190. package/skills/effective-python/references/practices-catalog.md +483 -0
  191. package/skills/effective-python/references/review-checklist.md +190 -0
  192. package/skills/effective-python/scripts/lint.py +173 -0
  193. package/skills/effective-typescript/SKILL.md +262 -0
  194. package/skills/effective-typescript/audit.json +29 -0
  195. package/skills/effective-typescript/evals/evals.json +37 -0
  196. package/skills/effective-typescript/evals/results.json +13 -0
  197. package/skills/effective-typescript/examples/after.md +70 -0
  198. package/skills/effective-typescript/examples/before.md +47 -0
  199. package/skills/effective-typescript/references/api_reference.md +118 -0
  200. package/skills/effective-typescript/references/practices-catalog.md +371 -0
  201. package/skills/effective-typescript/scripts/review.py +169 -0
  202. package/skills/kotlin-in-action/SKILL.md +261 -0
  203. package/skills/kotlin-in-action/assets/example_asset.txt +1 -0
  204. package/skills/kotlin-in-action/evals/evals.json +43 -0
  205. package/skills/kotlin-in-action/evals/results.json +13 -0
  206. package/skills/kotlin-in-action/examples/after.md +53 -0
  207. package/skills/kotlin-in-action/examples/before.md +39 -0
  208. package/skills/kotlin-in-action/references/api_reference.md +1 -0
  209. package/skills/kotlin-in-action/references/practices-catalog.md +436 -0
  210. package/skills/kotlin-in-action/references/review-checklist.md +204 -0
  211. package/skills/kotlin-in-action/scripts/example.py +1 -0
  212. package/skills/kotlin-in-action/scripts/setup_detekt.py +224 -0
  213. package/skills/lean-startup/SKILL.md +160 -0
  214. package/skills/lean-startup/assets/example_asset.txt +1 -0
  215. package/skills/lean-startup/evals/evals.json +43 -0
  216. package/skills/lean-startup/evals/results.json +13 -0
  217. package/skills/lean-startup/examples/after.md +80 -0
  218. package/skills/lean-startup/examples/before.md +34 -0
  219. package/skills/lean-startup/references/api_reference.md +319 -0
  220. package/skills/lean-startup/references/review-checklist.md +137 -0
  221. package/skills/lean-startup/scripts/example.py +1 -0
  222. package/skills/lean-startup/scripts/new_experiment.py +286 -0
  223. package/skills/microservices-patterns/SKILL.md +384 -0
  224. package/skills/microservices-patterns/evals/evals.json +45 -0
  225. package/skills/microservices-patterns/evals/results.json +13 -0
  226. package/skills/microservices-patterns/examples/after.md +69 -0
  227. package/skills/microservices-patterns/examples/before.md +40 -0
  228. package/skills/microservices-patterns/references/patterns-catalog.md +391 -0
  229. package/skills/microservices-patterns/references/review-checklist.md +169 -0
  230. package/skills/microservices-patterns/scripts/new_service.py +583 -0
  231. package/skills/programming-with-rust/SKILL.md +209 -0
  232. package/skills/programming-with-rust/evals/evals.json +37 -0
  233. package/skills/programming-with-rust/evals/results.json +13 -0
  234. package/skills/programming-with-rust/examples/after.md +107 -0
  235. package/skills/programming-with-rust/examples/before.md +59 -0
  236. package/skills/programming-with-rust/references/api_reference.md +152 -0
  237. package/skills/programming-with-rust/references/practices-catalog.md +335 -0
  238. package/skills/programming-with-rust/scripts/review.py +142 -0
  239. package/skills/refactoring-ui/SKILL.md +362 -0
  240. package/skills/refactoring-ui/assets/example_asset.txt +1 -0
  241. package/skills/refactoring-ui/evals/evals.json +45 -0
  242. package/skills/refactoring-ui/evals/results.json +13 -0
  243. package/skills/refactoring-ui/examples/after.md +85 -0
  244. package/skills/refactoring-ui/examples/before.md +58 -0
  245. package/skills/refactoring-ui/references/api_reference.md +355 -0
  246. package/skills/refactoring-ui/references/review-checklist.md +114 -0
  247. package/skills/refactoring-ui/scripts/audit_css.py +250 -0
  248. package/skills/refactoring-ui/scripts/example.py +1 -0
  249. package/skills/rust-in-action/SKILL.md +350 -0
  250. package/skills/rust-in-action/evals/evals.json +38 -0
  251. package/skills/rust-in-action/evals/results.json +13 -0
  252. package/skills/rust-in-action/examples/after.md +156 -0
  253. package/skills/rust-in-action/examples/before.md +56 -0
  254. package/skills/rust-in-action/references/practices-catalog.md +346 -0
  255. package/skills/rust-in-action/scripts/review.py +147 -0
  256. package/skills/skill-router/SKILL.md +186 -0
  257. package/skills/skill-router/evals/evals.json +38 -0
  258. package/skills/skill-router/evals/results.json +13 -0
  259. package/skills/skill-router/examples/after.md +63 -0
  260. package/skills/skill-router/examples/before.md +39 -0
  261. package/skills/skill-router/references/api_reference.md +24 -0
  262. package/skills/skill-router/references/routing-heuristics.md +89 -0
  263. package/skills/skill-router/references/skill-catalog.md +174 -0
  264. package/skills/skill-router/scripts/route.py +266 -0
  265. package/skills/spring-boot-in-action/SKILL.md +340 -0
  266. package/skills/spring-boot-in-action/evals/evals.json +39 -0
  267. package/skills/spring-boot-in-action/evals/results.json +13 -0
  268. package/skills/spring-boot-in-action/examples/after.md +185 -0
  269. package/skills/spring-boot-in-action/examples/before.md +84 -0
  270. package/skills/spring-boot-in-action/references/practices-catalog.md +403 -0
  271. package/skills/spring-boot-in-action/scripts/review.py +184 -0
  272. package/skills/storytelling-with-data/SKILL.md +241 -0
  273. package/skills/storytelling-with-data/assets/example_asset.txt +1 -0
  274. package/skills/storytelling-with-data/evals/evals.json +47 -0
  275. package/skills/storytelling-with-data/evals/results.json +13 -0
  276. package/skills/storytelling-with-data/examples/after.md +50 -0
  277. package/skills/storytelling-with-data/examples/before.md +33 -0
  278. package/skills/storytelling-with-data/references/api_reference.md +379 -0
  279. package/skills/storytelling-with-data/references/review-checklist.md +111 -0
  280. package/skills/storytelling-with-data/scripts/chart_review.py +301 -0
  281. package/skills/storytelling-with-data/scripts/example.py +1 -0
  282. package/skills/system-design-interview/SKILL.md +233 -0
  283. package/skills/system-design-interview/assets/example_asset.txt +1 -0
  284. package/skills/system-design-interview/evals/evals.json +46 -0
  285. package/skills/system-design-interview/evals/results.json +13 -0
  286. package/skills/system-design-interview/examples/after.md +94 -0
  287. package/skills/system-design-interview/examples/before.md +27 -0
  288. package/skills/system-design-interview/references/api_reference.md +582 -0
  289. package/skills/system-design-interview/references/review-checklist.md +201 -0
  290. package/skills/system-design-interview/scripts/example.py +1 -0
  291. package/skills/system-design-interview/scripts/new_design.py +421 -0
  292. package/skills/using-asyncio-python/SKILL.md +290 -0
  293. package/skills/using-asyncio-python/assets/example_asset.txt +1 -0
  294. package/skills/using-asyncio-python/evals/evals.json +43 -0
  295. package/skills/using-asyncio-python/evals/results.json +13 -0
  296. package/skills/using-asyncio-python/examples/after.md +68 -0
  297. package/skills/using-asyncio-python/examples/before.md +39 -0
  298. package/skills/using-asyncio-python/references/api_reference.md +267 -0
  299. package/skills/using-asyncio-python/references/review-checklist.md +149 -0
  300. package/skills/using-asyncio-python/scripts/check_blocking.py +270 -0
  301. package/skills/using-asyncio-python/scripts/example.py +1 -0
  302. package/skills/web-scraping-python/SKILL.md +280 -0
  303. package/skills/web-scraping-python/assets/example_asset.txt +1 -0
  304. package/skills/web-scraping-python/evals/evals.json +46 -0
  305. package/skills/web-scraping-python/evals/results.json +13 -0
  306. package/skills/web-scraping-python/examples/after.md +109 -0
  307. package/skills/web-scraping-python/examples/before.md +40 -0
  308. package/skills/web-scraping-python/references/api_reference.md +393 -0
  309. package/skills/web-scraping-python/references/review-checklist.md +163 -0
  310. package/skills/web-scraping-python/scripts/example.py +1 -0
  311. package/skills/web-scraping-python/scripts/new_scraper.py +231 -0
  312. package/skills/writing-plans/audit.json +34 -0
@@ -0,0 +1,436 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+
4
+ const NOTION_API = 'https://api.notion.com/v1';
5
+ const NOTION_VERSION = '2022-06-28';
6
+ const RATE_MS = 334; // ~3 req/sec (Notion's limit)
7
+ const PAGE_SIZE = 100;
8
+ // Notion IDs are UUIDs (with or without hyphens)
9
+ const ID_PATTERN = /^[a-f0-9]{8}-?[a-f0-9]{4}-?[a-f0-9]{4}-?[a-f0-9]{4}-?[a-f0-9]{12}$/i;
10
+
11
+ export class NotionConnector {
12
+ constructor(opts = {}) {
13
+ this.apiKey = opts.apiKey ?? process.env.NOTION_API_KEY;
14
+ this.rateMs = opts.rateMs ?? RATE_MS;
15
+ }
16
+
17
+ /**
18
+ * Check if API key is configured.
19
+ * @returns {{ ok: boolean, error?: string }}
20
+ */
21
+ checkAuth() {
22
+ if (!this.apiKey) {
23
+ return {
24
+ ok: false,
25
+ error: 'NOTION_API_KEY not set. Create an integration at https://www.notion.so/my-integrations then: export NOTION_API_KEY=<token>',
26
+ };
27
+ }
28
+ return { ok: true };
29
+ }
30
+
31
+ /**
32
+ * Fetch a single page and its content blocks, save as markdown.
33
+ * @param {string} pageId
34
+ * @param {string} outputDir
35
+ * @returns {Promise<{ pageCount: number, title: string }>}
36
+ */
37
+ async fetchPage(pageId, outputDir) {
38
+ this._validateId(pageId);
39
+ fs.mkdirSync(outputDir, { recursive: true });
40
+
41
+ const page = await this._apiGet(`/pages/${pageId}`);
42
+ const title = this._extractTitle(page);
43
+
44
+ const blocks = await this._getAllBlocks(pageId);
45
+
46
+ const md = this._blocksToMarkdown(blocks, title);
47
+ const filename = this._sanitizeFilename(title || pageId) + '.md';
48
+ fs.writeFileSync(path.join(outputDir, filename), md);
49
+
50
+ return { pageCount: 1, title };
51
+ }
52
+
53
+ /**
54
+ * Fetch all entries from a Notion database, each as a markdown file.
55
+ * @param {string} databaseId
56
+ * @param {string} outputDir
57
+ * @param {object} [opts]
58
+ * @param {string} [opts.since] - ISO date, only fetch entries edited after this
59
+ * @returns {Promise<{ pageCount: number }>}
60
+ */
61
+ async fetchDatabase(databaseId, outputDir, opts = {}) {
62
+ this._validateId(databaseId);
63
+ fs.mkdirSync(outputDir, { recursive: true });
64
+
65
+ let hasMore = true;
66
+ let startCursor;
67
+ let count = 0;
68
+
69
+ while (hasMore) {
70
+ const body = { page_size: PAGE_SIZE };
71
+ if (startCursor) body.start_cursor = startCursor;
72
+ if (opts.since) {
73
+ body.filter = {
74
+ timestamp: 'last_edited_time',
75
+ last_edited_time: { after: opts.since },
76
+ };
77
+ }
78
+
79
+ const result = await this._apiPost(`/databases/${databaseId}/query`, body);
80
+
81
+ for (const entry of result.results ?? []) {
82
+ const title = this._extractTitle(entry);
83
+ const blocks = await this._getAllBlocks(entry.id);
84
+ const props = this._extractProperties(entry);
85
+ const md = this._entryToMarkdown(title, props, blocks);
86
+ const filename = this._sanitizeFilename(title || entry.id) + '.md';
87
+ fs.writeFileSync(path.join(outputDir, filename), md);
88
+ count++;
89
+ await this._sleep(this.rateMs);
90
+ }
91
+
92
+ hasMore = result.has_more;
93
+ startCursor = result.next_cursor;
94
+ }
95
+
96
+ return { pageCount: count };
97
+ }
98
+
99
+ /**
100
+ * Search Notion workspace and save matching pages.
101
+ * @param {string} query
102
+ * @param {string} outputDir
103
+ * @param {object} [opts]
104
+ * @param {number} [opts.limit=20]
105
+ * @returns {Promise<{ pageCount: number }>}
106
+ */
107
+ async fetchSearch(query, outputDir, opts = {}) {
108
+ const { limit = 20 } = opts;
109
+ fs.mkdirSync(outputDir, { recursive: true });
110
+
111
+ const body = { query, page_size: Math.min(limit, PAGE_SIZE) };
112
+ const result = await this._apiPost('/search', body);
113
+
114
+ let count = 0;
115
+ for (const item of result.results ?? []) {
116
+ if (item.object !== 'page') continue;
117
+ if (count >= limit) break;
118
+
119
+ const title = this._extractTitle(item);
120
+ const blocks = await this._getAllBlocks(item.id);
121
+ const md = this._blocksToMarkdown(blocks, title);
122
+ const filename = this._sanitizeFilename(title || item.id) + '.md';
123
+ fs.writeFileSync(path.join(outputDir, filename), md);
124
+ count++;
125
+ await this._sleep(this.rateMs);
126
+ }
127
+
128
+ return { pageCount: count };
129
+ }
130
+
131
+ // -- Block to Markdown Conversion --
132
+
133
+ /**
134
+ * Convert an array of Notion blocks to markdown string.
135
+ * @param {Array} blocks
136
+ * @param {string} [title]
137
+ * @returns {string}
138
+ */
139
+ _blocksToMarkdown(blocks, title) {
140
+ const lines = [];
141
+ if (title) lines.push(`# ${title}`, '');
142
+
143
+ for (const block of blocks) {
144
+ const md = this._blockToMd(block);
145
+ if (md !== null) lines.push(md);
146
+ }
147
+
148
+ return lines.join('\n') + '\n';
149
+ }
150
+
151
+ /**
152
+ * Convert a single Notion block to markdown.
153
+ * @param {object} block
154
+ * @param {number} [indent=0]
155
+ * @returns {string|null}
156
+ */
157
+ _blockToMd(block, indent = 0) {
158
+ const prefix = ' '.repeat(indent);
159
+ const type = block.type;
160
+ const data = block[type];
161
+
162
+ if (!data) return null;
163
+
164
+ switch (type) {
165
+ case 'paragraph':
166
+ return prefix + this._richTextToMd(data.rich_text);
167
+ case 'heading_1':
168
+ return `## ${this._richTextToMd(data.rich_text)}`;
169
+ case 'heading_2':
170
+ return `### ${this._richTextToMd(data.rich_text)}`;
171
+ case 'heading_3':
172
+ return `#### ${this._richTextToMd(data.rich_text)}`;
173
+ case 'bulleted_list_item':
174
+ return `${prefix}- ${this._richTextToMd(data.rich_text)}`;
175
+ case 'numbered_list_item':
176
+ return `${prefix}1. ${this._richTextToMd(data.rich_text)}`;
177
+ case 'to_do':
178
+ return `${prefix}- [${data.checked ? 'x' : ' '}] ${this._richTextToMd(data.rich_text)}`;
179
+ case 'toggle':
180
+ return `${prefix}<details><summary>${this._richTextToMd(data.rich_text)}</summary>\n\n${prefix}_(toggle content)_\n\n${prefix}</details>`;
181
+ case 'code':
182
+ return `\`\`\`${data.language ?? ''}\n${this._richTextToMd(data.rich_text)}\n\`\`\``;
183
+ case 'quote':
184
+ return `${prefix}> ${this._richTextToMd(data.rich_text)}`;
185
+ case 'callout':
186
+ return `${prefix}> ${data.icon?.emoji ?? '\u{1F4A1}'} ${this._richTextToMd(data.rich_text)}`;
187
+ case 'divider':
188
+ return '---';
189
+ case 'table_of_contents':
190
+ return '_[Table of Contents]_';
191
+ case 'breadcrumb':
192
+ return '';
193
+ case 'column_list':
194
+ return '';
195
+ case 'column':
196
+ return '';
197
+ case 'child_page':
198
+ return `- \u{1F4C4} **[${data.title}]**`;
199
+ case 'child_database':
200
+ return `- \u{1F5C3}\u{FE0F} **[${data.title}]**`;
201
+ case 'embed':
202
+ return `[Embed: ${data.url ?? ''}](${data.url ?? ''})`;
203
+ case 'image':
204
+ return `![image](${data.file?.url ?? data.external?.url ?? ''})`;
205
+ case 'video':
206
+ return `[Video: ${data.file?.url ?? data.external?.url ?? ''}]`;
207
+ case 'pdf':
208
+ return `[PDF: ${data.file?.url ?? data.external?.url ?? ''}]`;
209
+ case 'file':
210
+ return `[File: ${data.file?.url ?? data.external?.url ?? ''}]`;
211
+ case 'audio':
212
+ return `[Audio: ${data.file?.url ?? data.external?.url ?? ''}]`;
213
+ case 'bookmark':
214
+ return `[${data.caption?.length ? this._richTextToMd(data.caption) : data.url}](${data.url})`;
215
+ case 'equation':
216
+ return `$$${data.expression}$$`;
217
+ case 'link_preview':
218
+ return `[Link: ${data.url}](${data.url})`;
219
+ case 'link_to_page':
220
+ return '[→ Linked page]';
221
+ case 'synced_block':
222
+ return '_(synced block)_';
223
+ case 'template':
224
+ return `_(template: ${this._richTextToMd(data.rich_text)})_`;
225
+ case 'table':
226
+ return '';
227
+ case 'table_row':
228
+ if (!data.cells) return null;
229
+ return `| ${data.cells.map(cell => this._richTextToMd(cell)).join(' | ')} |`;
230
+ default:
231
+ return `<!-- unknown block: ${type} -->`;
232
+ }
233
+ }
234
+
235
+ /**
236
+ * Convert Notion rich_text array to markdown string with formatting.
237
+ * @param {Array} richText
238
+ * @returns {string}
239
+ */
240
+ _richTextToMd(richText) {
241
+ if (!Array.isArray(richText)) return '';
242
+ return richText.map(t => {
243
+ let text = t.plain_text ?? '';
244
+ if (!t.annotations) return text;
245
+ if (t.annotations.bold) text = `**${text}**`;
246
+ if (t.annotations.italic) text = `*${text}*`;
247
+ if (t.annotations.strikethrough) text = `~~${text}~~`;
248
+ if (t.annotations.code) text = `\`${text}\``;
249
+ if (t.href) text = `[${text}](${t.href})`;
250
+ return text;
251
+ }).join('');
252
+ }
253
+
254
+ // -- Property Extraction (for database entries) --
255
+
256
+ /**
257
+ * Extract properties from a database entry as key-value pairs.
258
+ * @param {object} entry - Notion page object with properties
259
+ * @returns {Record<string, string>}
260
+ */
261
+ _extractProperties(entry) {
262
+ const props = {};
263
+ for (const [key, prop] of Object.entries(entry.properties ?? {})) {
264
+ props[key] = this._propertyToString(prop);
265
+ }
266
+ return props;
267
+ }
268
+
269
+ /**
270
+ * Convert a single Notion property value to a string.
271
+ * @param {object} prop
272
+ * @returns {string}
273
+ */
274
+ _propertyToString(prop) {
275
+ switch (prop.type) {
276
+ case 'title':
277
+ return prop.title?.map(t => t.plain_text).join('') ?? '';
278
+ case 'rich_text':
279
+ return prop.rich_text?.map(t => t.plain_text).join('') ?? '';
280
+ case 'number':
281
+ return String(prop.number ?? '');
282
+ case 'select':
283
+ return prop.select?.name ?? '';
284
+ case 'multi_select':
285
+ return (prop.multi_select ?? []).map(s => s.name).join(', ');
286
+ case 'date':
287
+ return prop.date?.start ?? '';
288
+ case 'checkbox':
289
+ return prop.checkbox ? 'Yes' : 'No';
290
+ case 'url':
291
+ return prop.url ?? '';
292
+ case 'email':
293
+ return prop.email ?? '';
294
+ case 'phone_number':
295
+ return prop.phone_number ?? '';
296
+ case 'status':
297
+ return prop.status?.name ?? '';
298
+ case 'people':
299
+ return (prop.people ?? []).map(p => p.name ?? p.id).join(', ');
300
+ case 'relation':
301
+ return (prop.relation ?? []).map(r => r.id).join(', ');
302
+ case 'rollup':
303
+ return JSON.stringify(prop.rollup?.array ?? []);
304
+ case 'formula':
305
+ return String(prop.formula?.string ?? prop.formula?.number ?? prop.formula?.boolean ?? '');
306
+ case 'created_time':
307
+ return prop.created_time ?? '';
308
+ case 'last_edited_time':
309
+ return prop.last_edited_time ?? '';
310
+ case 'created_by':
311
+ return prop.created_by?.name ?? prop.created_by?.id ?? '';
312
+ case 'last_edited_by':
313
+ return prop.last_edited_by?.name ?? prop.last_edited_by?.id ?? '';
314
+ case 'files':
315
+ return (prop.files ?? []).map(f => f.name ?? f.file?.url ?? f.external?.url ?? '').join(', ');
316
+ case 'unique_id':
317
+ return `${prop.unique_id?.prefix ?? ''}${prop.unique_id?.number ?? ''}`;
318
+ case 'verification':
319
+ return prop.verification?.state ?? '';
320
+ default:
321
+ return '';
322
+ }
323
+ }
324
+
325
+ // -- Helpers --
326
+
327
+ /**
328
+ * Format a database entry as markdown with YAML-like frontmatter.
329
+ * @param {string} title
330
+ * @param {Record<string, string>} props
331
+ * @param {Array} blocks
332
+ * @returns {string}
333
+ */
334
+ _entryToMarkdown(title, props, blocks) {
335
+ const lines = [];
336
+ lines.push('---');
337
+ for (const [key, val] of Object.entries(props)) {
338
+ if (val && key !== 'Name' && key !== 'Title') {
339
+ lines.push(`${key}: ${val}`);
340
+ }
341
+ }
342
+ lines.push('---', '');
343
+
344
+ const body = this._blocksToMarkdown(blocks, title);
345
+ lines.push(body);
346
+
347
+ return lines.join('\n');
348
+ }
349
+
350
+ /** Extract title from a Notion page object. */
351
+ _extractTitle(page) {
352
+ for (const prop of Object.values(page.properties ?? {})) {
353
+ if (prop.type === 'title' && prop.title?.length > 0) {
354
+ return prop.title.map(t => t.plain_text).join('');
355
+ }
356
+ }
357
+ if (page.child_page?.title) return page.child_page.title;
358
+ return '';
359
+ }
360
+
361
+ /** Get all blocks for a page, handling pagination. */
362
+ async _getAllBlocks(pageId) {
363
+ const allBlocks = [];
364
+ let hasMore = true;
365
+ let startCursor;
366
+
367
+ while (hasMore) {
368
+ let url = `/blocks/${pageId}/children?page_size=${PAGE_SIZE}`;
369
+ if (startCursor) url += `&start_cursor=${startCursor}`;
370
+
371
+ const result = await this._apiGet(url);
372
+ allBlocks.push(...(result.results ?? []));
373
+ hasMore = result.has_more;
374
+ startCursor = result.next_cursor;
375
+
376
+ if (hasMore) await this._sleep(this.rateMs);
377
+ }
378
+
379
+ return allBlocks;
380
+ }
381
+
382
+ /** Validate a Notion UUID. Prevents path traversal in API URLs. */
383
+ _validateId(id) {
384
+ if (!id || !ID_PATTERN.test(id)) {
385
+ throw new Error(`Invalid Notion ID: "${id}". Expected a UUID (e.g., 1a2b3c4d-5e6f-7a8b-9c0d-1e2f3a4b5c6d)`);
386
+ }
387
+ }
388
+
389
+ /** Sanitize a string for use as a filename. */
390
+ _sanitizeFilename(str) {
391
+ return str
392
+ .replace(/[/\\:*?"<>|]/g, '_')
393
+ .replace(/\s+/g, '-')
394
+ .slice(0, 100)
395
+ .toLowerCase();
396
+ }
397
+
398
+ /** GET request to Notion API. */
399
+ async _apiGet(endpoint) {
400
+ await this._sleep(this.rateMs);
401
+ const res = await fetch(`${NOTION_API}${endpoint}`, {
402
+ headers: {
403
+ 'Authorization': `Bearer ${this.apiKey}`,
404
+ 'Notion-Version': NOTION_VERSION,
405
+ },
406
+ });
407
+ if (!res.ok) {
408
+ const body = await res.text().catch(() => 'unknown');
409
+ throw new Error(`Notion API ${res.status}: ${body.slice(0, 200)}`);
410
+ }
411
+ return res.json();
412
+ }
413
+
414
+ /** POST request to Notion API. */
415
+ async _apiPost(endpoint, body) {
416
+ await this._sleep(this.rateMs);
417
+ const res = await fetch(`${NOTION_API}${endpoint}`, {
418
+ method: 'POST',
419
+ headers: {
420
+ 'Authorization': `Bearer ${this.apiKey}`,
421
+ 'Notion-Version': NOTION_VERSION,
422
+ 'Content-Type': 'application/json',
423
+ },
424
+ body: JSON.stringify(body),
425
+ });
426
+ if (!res.ok) {
427
+ const body = await res.text().catch(() => 'unknown');
428
+ throw new Error(`Notion API ${res.status}: ${body.slice(0, 200)}`);
429
+ }
430
+ return res.json();
431
+ }
432
+
433
+ _sleep(ms) {
434
+ return new Promise(resolve => setTimeout(resolve, ms));
435
+ }
436
+ }
@@ -0,0 +1,134 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import TurndownService from 'turndown';
4
+
5
+ export class WebConnector {
6
+ constructor(opts = {}) {
7
+ this.depth = opts.depth ?? 1;
8
+ this.rateMs = opts.rateMs ?? 1000;
9
+ this.turndown = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
10
+ }
11
+
12
+ /**
13
+ * Scrape a URL and optionally follow links up to this.depth.
14
+ * Returns the local directory path where markdown files were saved.
15
+ */
16
+ async scrape(url, outputDir) {
17
+ fs.mkdirSync(outputDir, { recursive: true });
18
+ const visited = new Set();
19
+ const baseUrl = new URL(url);
20
+ const basePath = baseUrl.pathname;
21
+
22
+ await this._crawl(url, baseUrl, basePath, outputDir, 0, visited);
23
+ return { dir: outputDir, pageCount: visited.size };
24
+ }
25
+
26
+ async _crawl(url, baseUrl, basePath, outputDir, currentDepth, visited) {
27
+ if (visited.has(url) || currentDepth > this.depth) return;
28
+ visited.add(url);
29
+
30
+ // Rate limiting — skip delay for the first page
31
+ if (visited.size > 1) await this._sleep(this.rateMs);
32
+
33
+ if (this._isInternalUrl(url)) {
34
+ console.error(` ⚠ Skipping internal URL: ${url}`);
35
+ return;
36
+ }
37
+
38
+ let html;
39
+ const controller = new AbortController();
40
+ const timeout = setTimeout(() => controller.abort(), 30000);
41
+ try {
42
+ const res = await fetch(url, {
43
+ headers: { 'User-Agent': 'BookLib/1.0 (documentation indexer)' },
44
+ redirect: 'follow',
45
+ signal: controller.signal,
46
+ });
47
+ if (!res.ok) { console.error(` ⚠ ${res.status} ${url}`); return; }
48
+ const contentLength = parseInt(res.headers.get('content-length') || '0', 10);
49
+ if (contentLength > 10 * 1024 * 1024) {
50
+ console.error(` ⚠ Skipping ${url} — too large (${contentLength} bytes)`);
51
+ return;
52
+ }
53
+ const contentType = res.headers.get('content-type') || '';
54
+ if (!contentType.includes('text/html')) return;
55
+ html = await res.text();
56
+ } catch (err) {
57
+ console.error(` ⚠ Failed: ${url} — ${err.message}`);
58
+ return;
59
+ } finally {
60
+ clearTimeout(timeout);
61
+ }
62
+
63
+ // Strip nav/footer/script/style, then convert to markdown
64
+ const cleanHtml = this._extractContent(html);
65
+ const markdown = this.turndown.turndown(cleanHtml);
66
+
67
+ const filename = this._urlToFilename(url, baseUrl);
68
+ const filePath = path.join(outputDir, filename);
69
+ fs.writeFileSync(filePath, markdown);
70
+ console.log(` ✓ ${filename} (${markdown.length} chars)`);
71
+
72
+ // Follow links only within the same domain + path prefix
73
+ if (currentDepth < this.depth) {
74
+ const links = this._extractLinks(html, baseUrl, basePath);
75
+ for (const link of links) {
76
+ await this._crawl(link, baseUrl, basePath, outputDir, currentDepth + 1, visited);
77
+ }
78
+ }
79
+ }
80
+
81
+ _extractContent(html) {
82
+ return html
83
+ .replace(/<script[\s\S]*?<\/script>/gi, '')
84
+ .replace(/<style[\s\S]*?<\/style>/gi, '')
85
+ .replace(/<nav[\s\S]*?<\/nav>/gi, '')
86
+ .replace(/<footer[\s\S]*?<\/footer>/gi, '')
87
+ .replace(/<header[\s\S]*?<\/header>/gi, '')
88
+ .replace(/<!--[\s\S]*?-->/g, '');
89
+ }
90
+
91
+ _extractLinks(html, baseUrl, basePath) {
92
+ const links = [];
93
+ const hrefRegex = /href=["']([^"']+)["']/gi;
94
+ let match;
95
+ while ((match = hrefRegex.exec(html)) !== null) {
96
+ try {
97
+ const resolved = new URL(match[1], baseUrl.origin);
98
+ if (resolved.hostname === baseUrl.hostname &&
99
+ resolved.pathname.startsWith(basePath) &&
100
+ !resolved.hash &&
101
+ !resolved.pathname.match(/\.(png|jpg|gif|svg|css|js|ico|woff|ttf|eot)$/i)) {
102
+ links.push(resolved.href);
103
+ }
104
+ } catch { /* invalid URL, skip */ }
105
+ }
106
+ return [...new Set(links)];
107
+ }
108
+
109
+ _urlToFilename(url, baseUrl) {
110
+ const parsed = new URL(url);
111
+ let name = parsed.pathname
112
+ .replace(baseUrl.pathname, '')
113
+ .replace(/^\/|\/$/g, '')
114
+ .replace(/\//g, '-') || 'index';
115
+ if (name.includes('..') || name.startsWith('/')) name = 'page-' + Date.now();
116
+ return name + '.md';
117
+ }
118
+
119
+ _isInternalUrl(url) {
120
+ const hostname = new URL(url).hostname;
121
+ if (hostname === 'localhost' || hostname === '0.0.0.0') return true;
122
+ const parts = hostname.split('.');
123
+ if (parts[0] === '127') return true;
124
+ if (parts[0] === '10') return true;
125
+ if (parts[0] === '172' && +parts[1] >= 16 && +parts[1] <= 31) return true;
126
+ if (parts[0] === '192' && parts[1] === '168') return true;
127
+ if (parts[0] === '169' && parts[1] === '254') return true;
128
+ return false;
129
+ }
130
+
131
+ _sleep(ms) {
132
+ return new Promise(r => setTimeout(r, ms));
133
+ }
134
+ }