@iflow-mcp/jakeliume-webpeel 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (547) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +313 -0
  3. package/dist/cache.d.ts +30 -0
  4. package/dist/cache.js +139 -0
  5. package/dist/cli/commands/auth.d.ts +5 -0
  6. package/dist/cli/commands/auth.js +411 -0
  7. package/dist/cli/commands/doctor.d.ts +37 -0
  8. package/dist/cli/commands/doctor.js +371 -0
  9. package/dist/cli/commands/fetch.d.ts +6 -0
  10. package/dist/cli/commands/fetch.js +1345 -0
  11. package/dist/cli/commands/guide.d.ts +2 -0
  12. package/dist/cli/commands/guide.js +183 -0
  13. package/dist/cli/commands/interact.d.ts +5 -0
  14. package/dist/cli/commands/interact.js +840 -0
  15. package/dist/cli/commands/jobs.d.ts +5 -0
  16. package/dist/cli/commands/jobs.js +997 -0
  17. package/dist/cli/commands/monitor.d.ts +12 -0
  18. package/dist/cli/commands/monitor.js +197 -0
  19. package/dist/cli/commands/observe.d.ts +12 -0
  20. package/dist/cli/commands/observe.js +158 -0
  21. package/dist/cli/commands/screenshot.d.ts +5 -0
  22. package/dist/cli/commands/screenshot.js +282 -0
  23. package/dist/cli/commands/search.d.ts +5 -0
  24. package/dist/cli/commands/search.js +1021 -0
  25. package/dist/cli/commands/setup.d.ts +13 -0
  26. package/dist/cli/commands/setup.js +244 -0
  27. package/dist/cli/commands/skill.d.ts +15 -0
  28. package/dist/cli/commands/skill.js +195 -0
  29. package/dist/cli/utils.d.ts +84 -0
  30. package/dist/cli/utils.js +806 -0
  31. package/dist/cli-auth.d.ts +75 -0
  32. package/dist/cli-auth.js +369 -0
  33. package/dist/cli.d.ts +17 -0
  34. package/dist/cli.js +99 -0
  35. package/dist/core/actions.d.ts +69 -0
  36. package/dist/core/actions.js +495 -0
  37. package/dist/core/agent.d.ts +98 -0
  38. package/dist/core/agent.js +558 -0
  39. package/dist/core/answer.d.ts +42 -0
  40. package/dist/core/answer.js +395 -0
  41. package/dist/core/application-tracker.d.ts +84 -0
  42. package/dist/core/application-tracker.js +184 -0
  43. package/dist/core/apply.d.ts +162 -0
  44. package/dist/core/apply.js +816 -0
  45. package/dist/core/auth-detection.d.ts +35 -0
  46. package/dist/core/auth-detection.js +358 -0
  47. package/dist/core/auto-extract.d.ts +82 -0
  48. package/dist/core/auto-extract.js +604 -0
  49. package/dist/core/auto-interact.d.ts +23 -0
  50. package/dist/core/auto-interact.js +246 -0
  51. package/dist/core/bm25-filter.d.ts +66 -0
  52. package/dist/core/bm25-filter.js +288 -0
  53. package/dist/core/branding.d.ts +54 -0
  54. package/dist/core/branding.js +234 -0
  55. package/dist/core/browser-fetch.d.ts +323 -0
  56. package/dist/core/browser-fetch.js +1600 -0
  57. package/dist/core/browser-pool.d.ts +91 -0
  58. package/dist/core/browser-pool.js +550 -0
  59. package/dist/core/budget.d.ts +42 -0
  60. package/dist/core/budget.js +324 -0
  61. package/dist/core/business-intel.d.ts +47 -0
  62. package/dist/core/business-intel.js +279 -0
  63. package/dist/core/cache.d.ts +13 -0
  64. package/dist/core/cache.js +121 -0
  65. package/dist/core/cf-worker-proxy.d.ts +32 -0
  66. package/dist/core/cf-worker-proxy.js +87 -0
  67. package/dist/core/challenge-detection.d.ts +26 -0
  68. package/dist/core/challenge-detection.js +468 -0
  69. package/dist/core/change-tracking.d.ts +75 -0
  70. package/dist/core/change-tracking.js +276 -0
  71. package/dist/core/chunker.d.ts +46 -0
  72. package/dist/core/chunker.js +249 -0
  73. package/dist/core/chunking.d.ts +42 -0
  74. package/dist/core/chunking.js +181 -0
  75. package/dist/core/circuit-breaker.d.ts +44 -0
  76. package/dist/core/circuit-breaker.js +85 -0
  77. package/dist/core/content-pruner.d.ts +47 -0
  78. package/dist/core/content-pruner.js +425 -0
  79. package/dist/core/cookie-cache.d.ts +60 -0
  80. package/dist/core/cookie-cache.js +163 -0
  81. package/dist/core/crawl-checkpoint.d.ts +54 -0
  82. package/dist/core/crawl-checkpoint.js +104 -0
  83. package/dist/core/crawler.d.ts +84 -0
  84. package/dist/core/crawler.js +349 -0
  85. package/dist/core/cross-verify.d.ts +27 -0
  86. package/dist/core/cross-verify.js +93 -0
  87. package/dist/core/deep-fetch.d.ts +74 -0
  88. package/dist/core/deep-fetch.js +405 -0
  89. package/dist/core/deep-research.d.ts +141 -0
  90. package/dist/core/deep-research.js +972 -0
  91. package/dist/core/design-analysis.d.ts +70 -0
  92. package/dist/core/design-analysis.js +490 -0
  93. package/dist/core/design-compare.d.ts +38 -0
  94. package/dist/core/design-compare.js +264 -0
  95. package/dist/core/diff.d.ts +61 -0
  96. package/dist/core/diff.js +289 -0
  97. package/dist/core/dns-cache.d.ts +20 -0
  98. package/dist/core/dns-cache.js +198 -0
  99. package/dist/core/documents.d.ts +23 -0
  100. package/dist/core/documents.js +123 -0
  101. package/dist/core/domain-memory.d.ts +66 -0
  102. package/dist/core/domain-memory.js +163 -0
  103. package/dist/core/domain-verify.d.ts +40 -0
  104. package/dist/core/domain-verify.js +379 -0
  105. package/dist/core/engine-ranker.d.ts +112 -0
  106. package/dist/core/engine-ranker.js +395 -0
  107. package/dist/core/extract-inline.d.ts +38 -0
  108. package/dist/core/extract-inline.js +215 -0
  109. package/dist/core/extract-listings.d.ts +38 -0
  110. package/dist/core/extract-listings.js +461 -0
  111. package/dist/core/extract.d.ts +9 -0
  112. package/dist/core/extract.js +139 -0
  113. package/dist/core/fetch-cache.d.ts +57 -0
  114. package/dist/core/fetch-cache.js +95 -0
  115. package/dist/core/fetcher.d.ts +13 -0
  116. package/dist/core/fetcher.js +12 -0
  117. package/dist/core/google-cache.d.ts +29 -0
  118. package/dist/core/google-cache.js +180 -0
  119. package/dist/core/google-serp-parser.d.ts +82 -0
  120. package/dist/core/google-serp-parser.js +287 -0
  121. package/dist/core/hotel-search.d.ts +122 -0
  122. package/dist/core/hotel-search.js +382 -0
  123. package/dist/core/http-fetch.d.ts +72 -0
  124. package/dist/core/http-fetch.js +820 -0
  125. package/dist/core/human.d.ts +175 -0
  126. package/dist/core/human.js +680 -0
  127. package/dist/core/image-caption.d.ts +44 -0
  128. package/dist/core/image-caption.js +271 -0
  129. package/dist/core/jobs.d.ts +75 -0
  130. package/dist/core/jobs.js +634 -0
  131. package/dist/core/json-ld.d.ts +15 -0
  132. package/dist/core/json-ld.js +617 -0
  133. package/dist/core/language-detect.d.ts +18 -0
  134. package/dist/core/language-detect.js +135 -0
  135. package/dist/core/links.d.ts +10 -0
  136. package/dist/core/links.js +44 -0
  137. package/dist/core/llm-extract.d.ts +71 -0
  138. package/dist/core/llm-extract.js +507 -0
  139. package/dist/core/llm-provider.d.ts +100 -0
  140. package/dist/core/llm-provider.js +702 -0
  141. package/dist/core/local-search.d.ts +60 -0
  142. package/dist/core/local-search.js +308 -0
  143. package/dist/core/logger.d.ts +28 -0
  144. package/dist/core/logger.js +104 -0
  145. package/dist/core/map.d.ts +33 -0
  146. package/dist/core/map.js +127 -0
  147. package/dist/core/markdown.d.ts +92 -0
  148. package/dist/core/markdown.js +809 -0
  149. package/dist/core/metadata.d.ts +34 -0
  150. package/dist/core/metadata.js +422 -0
  151. package/dist/core/observe.d.ts +113 -0
  152. package/dist/core/observe.js +395 -0
  153. package/dist/core/ocr.d.ts +12 -0
  154. package/dist/core/ocr.js +33 -0
  155. package/dist/core/paginate.d.ts +31 -0
  156. package/dist/core/paginate.js +106 -0
  157. package/dist/core/pdf.d.ts +8 -0
  158. package/dist/core/pdf.js +25 -0
  159. package/dist/core/peel-tls.d.ts +25 -0
  160. package/dist/core/peel-tls.js +220 -0
  161. package/dist/core/pipeline.d.ts +132 -0
  162. package/dist/core/pipeline.js +1666 -0
  163. package/dist/core/profiles.d.ts +61 -0
  164. package/dist/core/profiles.js +350 -0
  165. package/dist/core/prompt-guard.d.ts +30 -0
  166. package/dist/core/prompt-guard.js +119 -0
  167. package/dist/core/proxy-config.d.ts +90 -0
  168. package/dist/core/proxy-config.js +172 -0
  169. package/dist/core/quick-answer.d.ts +53 -0
  170. package/dist/core/quick-answer.js +833 -0
  171. package/dist/core/rate-governor.d.ts +80 -0
  172. package/dist/core/rate-governor.js +238 -0
  173. package/dist/core/readability.d.ts +57 -0
  174. package/dist/core/readability.js +533 -0
  175. package/dist/core/research.d.ts +66 -0
  176. package/dist/core/research.js +270 -0
  177. package/dist/core/retry.d.ts +60 -0
  178. package/dist/core/retry.js +119 -0
  179. package/dist/core/safe-browsing.d.ts +30 -0
  180. package/dist/core/safe-browsing.js +206 -0
  181. package/dist/core/schema-extraction.d.ts +66 -0
  182. package/dist/core/schema-extraction.js +352 -0
  183. package/dist/core/schema-postprocess.d.ts +32 -0
  184. package/dist/core/schema-postprocess.js +469 -0
  185. package/dist/core/schema-templates.d.ts +19 -0
  186. package/dist/core/schema-templates.js +143 -0
  187. package/dist/core/screenshot.d.ts +224 -0
  188. package/dist/core/screenshot.js +207 -0
  189. package/dist/core/search-engines.d.ts +25 -0
  190. package/dist/core/search-engines.js +182 -0
  191. package/dist/core/search-provider.d.ts +243 -0
  192. package/dist/core/search-provider.js +1629 -0
  193. package/dist/core/searxng-provider.d.ts +35 -0
  194. package/dist/core/searxng-provider.js +105 -0
  195. package/dist/core/selective-evidence.d.ts +151 -0
  196. package/dist/core/selective-evidence.js +389 -0
  197. package/dist/core/site-search.d.ts +44 -0
  198. package/dist/core/site-search.js +252 -0
  199. package/dist/core/sitemap.d.ts +23 -0
  200. package/dist/core/sitemap.js +105 -0
  201. package/dist/core/source-credibility.d.ts +29 -0
  202. package/dist/core/source-credibility.js +584 -0
  203. package/dist/core/source-scoring.d.ts +166 -0
  204. package/dist/core/source-scoring.js +396 -0
  205. package/dist/core/stemmer.d.ts +38 -0
  206. package/dist/core/stemmer.js +509 -0
  207. package/dist/core/strategies.d.ts +104 -0
  208. package/dist/core/strategies.js +1044 -0
  209. package/dist/core/strategy-hooks.d.ts +145 -0
  210. package/dist/core/strategy-hooks.js +74 -0
  211. package/dist/core/structured-extract.d.ts +43 -0
  212. package/dist/core/structured-extract.js +550 -0
  213. package/dist/core/summarize.d.ts +17 -0
  214. package/dist/core/summarize.js +78 -0
  215. package/dist/core/synonyms.d.ts +42 -0
  216. package/dist/core/synonyms.js +184 -0
  217. package/dist/core/system-monitor.d.ts +61 -0
  218. package/dist/core/system-monitor.js +133 -0
  219. package/dist/core/table-format.d.ts +30 -0
  220. package/dist/core/table-format.js +146 -0
  221. package/dist/core/threat-feeds.d.ts +23 -0
  222. package/dist/core/threat-feeds.js +104 -0
  223. package/dist/core/timing.d.ts +21 -0
  224. package/dist/core/timing.js +33 -0
  225. package/dist/core/transcript-export.d.ts +47 -0
  226. package/dist/core/transcript-export.js +107 -0
  227. package/dist/core/user-agents.d.ts +82 -0
  228. package/dist/core/user-agents.js +239 -0
  229. package/dist/core/vertical-search.d.ts +54 -0
  230. package/dist/core/vertical-search.js +158 -0
  231. package/dist/core/watch-manager.d.ts +175 -0
  232. package/dist/core/watch-manager.js +416 -0
  233. package/dist/core/watch.d.ts +101 -0
  234. package/dist/core/watch.js +389 -0
  235. package/dist/core/youtube.d.ts +130 -0
  236. package/dist/core/youtube.js +1175 -0
  237. package/dist/ee/challenge-re-export.d.ts +1 -0
  238. package/dist/ee/challenge-re-export.js +1 -0
  239. package/dist/ee/challenge-solver.d.ts +72 -0
  240. package/dist/ee/challenge-solver.js +720 -0
  241. package/dist/ee/domain-extractors.d.ts +8 -0
  242. package/dist/ee/domain-extractors.js +8 -0
  243. package/dist/ee/domain-intel.d.ts +16 -0
  244. package/dist/ee/domain-intel.js +133 -0
  245. package/dist/ee/extractors/allrecipes.d.ts +2 -0
  246. package/dist/ee/extractors/allrecipes.js +120 -0
  247. package/dist/ee/extractors/amazon.d.ts +2 -0
  248. package/dist/ee/extractors/amazon.js +78 -0
  249. package/dist/ee/extractors/arxiv.d.ts +2 -0
  250. package/dist/ee/extractors/arxiv.js +137 -0
  251. package/dist/ee/extractors/bestbuy.d.ts +2 -0
  252. package/dist/ee/extractors/bestbuy.js +78 -0
  253. package/dist/ee/extractors/carscom.d.ts +2 -0
  254. package/dist/ee/extractors/carscom.js +121 -0
  255. package/dist/ee/extractors/coingecko.d.ts +2 -0
  256. package/dist/ee/extractors/coingecko.js +134 -0
  257. package/dist/ee/extractors/craigslist.d.ts +2 -0
  258. package/dist/ee/extractors/craigslist.js +92 -0
  259. package/dist/ee/extractors/devto.d.ts +2 -0
  260. package/dist/ee/extractors/devto.js +135 -0
  261. package/dist/ee/extractors/ebay.d.ts +2 -0
  262. package/dist/ee/extractors/ebay.js +90 -0
  263. package/dist/ee/extractors/espn.d.ts +2 -0
  264. package/dist/ee/extractors/espn.js +260 -0
  265. package/dist/ee/extractors/etsy.d.ts +2 -0
  266. package/dist/ee/extractors/etsy.js +52 -0
  267. package/dist/ee/extractors/facebook.d.ts +2 -0
  268. package/dist/ee/extractors/facebook.js +46 -0
  269. package/dist/ee/extractors/github.d.ts +2 -0
  270. package/dist/ee/extractors/github.js +196 -0
  271. package/dist/ee/extractors/google-flights.d.ts +2 -0
  272. package/dist/ee/extractors/google-flights.js +176 -0
  273. package/dist/ee/extractors/hackernews.d.ts +2 -0
  274. package/dist/ee/extractors/hackernews.js +147 -0
  275. package/dist/ee/extractors/imdb.d.ts +2 -0
  276. package/dist/ee/extractors/imdb.js +172 -0
  277. package/dist/ee/extractors/index.d.ts +26 -0
  278. package/dist/ee/extractors/index.js +247 -0
  279. package/dist/ee/extractors/instagram.d.ts +2 -0
  280. package/dist/ee/extractors/instagram.js +102 -0
  281. package/dist/ee/extractors/kalshi.d.ts +2 -0
  282. package/dist/ee/extractors/kalshi.js +121 -0
  283. package/dist/ee/extractors/kayak-cars.d.ts +2 -0
  284. package/dist/ee/extractors/kayak-cars.js +270 -0
  285. package/dist/ee/extractors/linkedin.d.ts +2 -0
  286. package/dist/ee/extractors/linkedin.js +113 -0
  287. package/dist/ee/extractors/medium.d.ts +2 -0
  288. package/dist/ee/extractors/medium.js +130 -0
  289. package/dist/ee/extractors/news.d.ts +4 -0
  290. package/dist/ee/extractors/news.js +173 -0
  291. package/dist/ee/extractors/npm.d.ts +2 -0
  292. package/dist/ee/extractors/npm.js +86 -0
  293. package/dist/ee/extractors/pdf.d.ts +2 -0
  294. package/dist/ee/extractors/pdf.js +108 -0
  295. package/dist/ee/extractors/pinterest.d.ts +2 -0
  296. package/dist/ee/extractors/pinterest.js +34 -0
  297. package/dist/ee/extractors/polymarket.d.ts +2 -0
  298. package/dist/ee/extractors/polymarket.js +358 -0
  299. package/dist/ee/extractors/producthunt.d.ts +2 -0
  300. package/dist/ee/extractors/producthunt.js +88 -0
  301. package/dist/ee/extractors/pubmed.d.ts +2 -0
  302. package/dist/ee/extractors/pubmed.js +162 -0
  303. package/dist/ee/extractors/pypi.d.ts +2 -0
  304. package/dist/ee/extractors/pypi.js +80 -0
  305. package/dist/ee/extractors/reddit.d.ts +2 -0
  306. package/dist/ee/extractors/reddit.js +438 -0
  307. package/dist/ee/extractors/redfin.d.ts +2 -0
  308. package/dist/ee/extractors/redfin.js +156 -0
  309. package/dist/ee/extractors/semanticscholar.d.ts +2 -0
  310. package/dist/ee/extractors/semanticscholar.js +131 -0
  311. package/dist/ee/extractors/shared.d.ts +12 -0
  312. package/dist/ee/extractors/shared.js +76 -0
  313. package/dist/ee/extractors/soundcloud.d.ts +2 -0
  314. package/dist/ee/extractors/soundcloud.js +34 -0
  315. package/dist/ee/extractors/sportsbetting.d.ts +2 -0
  316. package/dist/ee/extractors/sportsbetting.js +37 -0
  317. package/dist/ee/extractors/spotify.d.ts +2 -0
  318. package/dist/ee/extractors/spotify.js +34 -0
  319. package/dist/ee/extractors/stackoverflow.d.ts +2 -0
  320. package/dist/ee/extractors/stackoverflow.js +61 -0
  321. package/dist/ee/extractors/substack.d.ts +2 -0
  322. package/dist/ee/extractors/substack.js +115 -0
  323. package/dist/ee/extractors/substackroot.d.ts +2 -0
  324. package/dist/ee/extractors/substackroot.js +46 -0
  325. package/dist/ee/extractors/tiktok.d.ts +2 -0
  326. package/dist/ee/extractors/tiktok.js +29 -0
  327. package/dist/ee/extractors/tradingview.d.ts +2 -0
  328. package/dist/ee/extractors/tradingview.js +182 -0
  329. package/dist/ee/extractors/twitch.d.ts +2 -0
  330. package/dist/ee/extractors/twitch.js +36 -0
  331. package/dist/ee/extractors/twitter.d.ts +2 -0
  332. package/dist/ee/extractors/twitter.js +327 -0
  333. package/dist/ee/extractors/types.d.ts +14 -0
  334. package/dist/ee/extractors/types.js +1 -0
  335. package/dist/ee/extractors/walmart.d.ts +2 -0
  336. package/dist/ee/extractors/walmart.js +50 -0
  337. package/dist/ee/extractors/weather.d.ts +2 -0
  338. package/dist/ee/extractors/weather.js +133 -0
  339. package/dist/ee/extractors/wikipedia.d.ts +4 -0
  340. package/dist/ee/extractors/wikipedia.js +235 -0
  341. package/dist/ee/extractors/yelp.d.ts +2 -0
  342. package/dist/ee/extractors/yelp.js +216 -0
  343. package/dist/ee/extractors/youtube.d.ts +2 -0
  344. package/dist/ee/extractors/youtube.js +189 -0
  345. package/dist/ee/extractors/zillow.d.ts +54 -0
  346. package/dist/ee/extractors/zillow.js +247 -0
  347. package/dist/ee/extractors-re-export.d.ts +1 -0
  348. package/dist/ee/extractors-re-export.js +1 -0
  349. package/dist/ee/premium-hooks.d.ts +20 -0
  350. package/dist/ee/premium-hooks.js +50 -0
  351. package/dist/ee/spa-detection.d.ts +2 -0
  352. package/dist/ee/spa-detection.js +2 -0
  353. package/dist/ee/stability.d.ts +4 -0
  354. package/dist/ee/stability.js +29 -0
  355. package/dist/ee/swr-cache.d.ts +14 -0
  356. package/dist/ee/swr-cache.js +34 -0
  357. package/dist/index.d.ts +143 -0
  358. package/dist/index.js +291 -0
  359. package/dist/integrations/index.d.ts +2 -0
  360. package/dist/integrations/index.js +2 -0
  361. package/dist/integrations/langchain.d.ts +64 -0
  362. package/dist/integrations/langchain.js +115 -0
  363. package/dist/integrations/llamaindex.d.ts +50 -0
  364. package/dist/integrations/llamaindex.js +91 -0
  365. package/dist/mcp/handlers/act.d.ts +5 -0
  366. package/dist/mcp/handlers/act.js +34 -0
  367. package/dist/mcp/handlers/definitions.d.ts +6 -0
  368. package/dist/mcp/handlers/definitions.js +395 -0
  369. package/dist/mcp/handlers/extract.d.ts +7 -0
  370. package/dist/mcp/handlers/extract.js +135 -0
  371. package/dist/mcp/handlers/fetch.d.ts +6 -0
  372. package/dist/mcp/handlers/fetch.js +98 -0
  373. package/dist/mcp/handlers/find.d.ts +5 -0
  374. package/dist/mcp/handlers/find.js +137 -0
  375. package/dist/mcp/handlers/index.d.ts +13 -0
  376. package/dist/mcp/handlers/index.js +63 -0
  377. package/dist/mcp/handlers/legacy.d.ts +25 -0
  378. package/dist/mcp/handlers/legacy.js +450 -0
  379. package/dist/mcp/handlers/meta.d.ts +6 -0
  380. package/dist/mcp/handlers/meta.js +40 -0
  381. package/dist/mcp/handlers/monitor.d.ts +5 -0
  382. package/dist/mcp/handlers/monitor.js +41 -0
  383. package/dist/mcp/handlers/observe.d.ts +8 -0
  384. package/dist/mcp/handlers/observe.js +37 -0
  385. package/dist/mcp/handlers/read.d.ts +6 -0
  386. package/dist/mcp/handlers/read.js +78 -0
  387. package/dist/mcp/handlers/see.d.ts +5 -0
  388. package/dist/mcp/handlers/see.js +75 -0
  389. package/dist/mcp/handlers/types.d.ts +29 -0
  390. package/dist/mcp/handlers/types.js +28 -0
  391. package/dist/mcp/server.d.ts +7 -0
  392. package/dist/mcp/server.js +108 -0
  393. package/dist/mcp/smart-router.d.ts +23 -0
  394. package/dist/mcp/smart-router.js +178 -0
  395. package/dist/server/app.d.ts +14 -0
  396. package/dist/server/app.js +632 -0
  397. package/dist/server/auth-store.d.ts +28 -0
  398. package/dist/server/auth-store.js +88 -0
  399. package/dist/server/bull-queues.d.ts +60 -0
  400. package/dist/server/bull-queues.js +90 -0
  401. package/dist/server/email-service.d.ts +55 -0
  402. package/dist/server/email-service.js +291 -0
  403. package/dist/server/job-queue.d.ts +100 -0
  404. package/dist/server/job-queue.js +145 -0
  405. package/dist/server/logger.d.ts +10 -0
  406. package/dist/server/logger.js +37 -0
  407. package/dist/server/middleware/audit-log.d.ts +14 -0
  408. package/dist/server/middleware/audit-log.js +73 -0
  409. package/dist/server/middleware/auth.d.ts +35 -0
  410. package/dist/server/middleware/auth.js +225 -0
  411. package/dist/server/middleware/rate-limit.d.ts +50 -0
  412. package/dist/server/middleware/rate-limit.js +270 -0
  413. package/dist/server/middleware/scope-guard.d.ts +25 -0
  414. package/dist/server/middleware/scope-guard.js +45 -0
  415. package/dist/server/middleware/url-validator.d.ts +15 -0
  416. package/dist/server/middleware/url-validator.js +201 -0
  417. package/dist/server/openapi.yaml +6418 -0
  418. package/dist/server/pg-auth-store.d.ts +146 -0
  419. package/dist/server/pg-auth-store.js +576 -0
  420. package/dist/server/pg-job-queue.d.ts +59 -0
  421. package/dist/server/pg-job-queue.js +375 -0
  422. package/dist/server/routes/activity.d.ts +6 -0
  423. package/dist/server/routes/activity.js +79 -0
  424. package/dist/server/routes/admin-active.d.ts +7 -0
  425. package/dist/server/routes/admin-active.js +120 -0
  426. package/dist/server/routes/admin-stats.d.ts +7 -0
  427. package/dist/server/routes/admin-stats.js +176 -0
  428. package/dist/server/routes/agent.d.ts +24 -0
  429. package/dist/server/routes/agent.js +480 -0
  430. package/dist/server/routes/answer.d.ts +5 -0
  431. package/dist/server/routes/answer.js +125 -0
  432. package/dist/server/routes/ask.d.ts +28 -0
  433. package/dist/server/routes/ask.js +295 -0
  434. package/dist/server/routes/batch.d.ts +6 -0
  435. package/dist/server/routes/batch.js +493 -0
  436. package/dist/server/routes/cache-warm.d.ts +25 -0
  437. package/dist/server/routes/cache-warm.js +212 -0
  438. package/dist/server/routes/cli-usage.d.ts +6 -0
  439. package/dist/server/routes/cli-usage.js +127 -0
  440. package/dist/server/routes/compat.d.ts +23 -0
  441. package/dist/server/routes/compat.js +652 -0
  442. package/dist/server/routes/crawl.d.ts +13 -0
  443. package/dist/server/routes/crawl.js +287 -0
  444. package/dist/server/routes/deep-fetch.d.ts +8 -0
  445. package/dist/server/routes/deep-fetch.js +57 -0
  446. package/dist/server/routes/deep-research.d.ts +11 -0
  447. package/dist/server/routes/deep-research.js +232 -0
  448. package/dist/server/routes/demo.d.ts +24 -0
  449. package/dist/server/routes/demo.js +517 -0
  450. package/dist/server/routes/do.d.ts +8 -0
  451. package/dist/server/routes/do.js +72 -0
  452. package/dist/server/routes/extract.d.ts +14 -0
  453. package/dist/server/routes/extract.js +325 -0
  454. package/dist/server/routes/feed.d.ts +15 -0
  455. package/dist/server/routes/feed.js +311 -0
  456. package/dist/server/routes/fetch-queue.d.ts +13 -0
  457. package/dist/server/routes/fetch-queue.js +357 -0
  458. package/dist/server/routes/fetch.d.ts +7 -0
  459. package/dist/server/routes/fetch.js +1274 -0
  460. package/dist/server/routes/go.d.ts +14 -0
  461. package/dist/server/routes/go.js +81 -0
  462. package/dist/server/routes/health.d.ts +11 -0
  463. package/dist/server/routes/health.js +141 -0
  464. package/dist/server/routes/jobs.d.ts +7 -0
  465. package/dist/server/routes/jobs.js +574 -0
  466. package/dist/server/routes/map.d.ts +11 -0
  467. package/dist/server/routes/map.js +116 -0
  468. package/dist/server/routes/mcp.d.ts +14 -0
  469. package/dist/server/routes/mcp.js +197 -0
  470. package/dist/server/routes/metrics.d.ts +37 -0
  471. package/dist/server/routes/metrics.js +149 -0
  472. package/dist/server/routes/oauth.d.ts +9 -0
  473. package/dist/server/routes/oauth.js +396 -0
  474. package/dist/server/routes/playground.d.ts +17 -0
  475. package/dist/server/routes/playground.js +283 -0
  476. package/dist/server/routes/reader.d.ts +18 -0
  477. package/dist/server/routes/reader.js +192 -0
  478. package/dist/server/routes/research.d.ts +14 -0
  479. package/dist/server/routes/research.js +482 -0
  480. package/dist/server/routes/screenshot.d.ts +22 -0
  481. package/dist/server/routes/screenshot.js +820 -0
  482. package/dist/server/routes/search.d.ts +6 -0
  483. package/dist/server/routes/search.js +874 -0
  484. package/dist/server/routes/session.d.ts +17 -0
  485. package/dist/server/routes/session.js +548 -0
  486. package/dist/server/routes/share.d.ts +18 -0
  487. package/dist/server/routes/share.js +462 -0
  488. package/dist/server/routes/smart-search/handlers/cars.d.ts +2 -0
  489. package/dist/server/routes/smart-search/handlers/cars.js +102 -0
  490. package/dist/server/routes/smart-search/handlers/flights.d.ts +2 -0
  491. package/dist/server/routes/smart-search/handlers/flights.js +72 -0
  492. package/dist/server/routes/smart-search/handlers/general.d.ts +13 -0
  493. package/dist/server/routes/smart-search/handlers/general.js +717 -0
  494. package/dist/server/routes/smart-search/handlers/hotels.d.ts +2 -0
  495. package/dist/server/routes/smart-search/handlers/hotels.js +88 -0
  496. package/dist/server/routes/smart-search/handlers/products.d.ts +2 -0
  497. package/dist/server/routes/smart-search/handlers/products.js +1309 -0
  498. package/dist/server/routes/smart-search/handlers/rental.d.ts +2 -0
  499. package/dist/server/routes/smart-search/handlers/rental.js +154 -0
  500. package/dist/server/routes/smart-search/handlers/restaurants.d.ts +2 -0
  501. package/dist/server/routes/smart-search/handlers/restaurants.js +225 -0
  502. package/dist/server/routes/smart-search/handlers/transit-verdict.d.ts +41 -0
  503. package/dist/server/routes/smart-search/handlers/transit-verdict.js +224 -0
  504. package/dist/server/routes/smart-search/index.d.ts +19 -0
  505. package/dist/server/routes/smart-search/index.js +546 -0
  506. package/dist/server/routes/smart-search/intent.d.ts +3 -0
  507. package/dist/server/routes/smart-search/intent.js +264 -0
  508. package/dist/server/routes/smart-search/llm.d.ts +16 -0
  509. package/dist/server/routes/smart-search/llm.js +70 -0
  510. package/dist/server/routes/smart-search/sources/reddit.d.ts +18 -0
  511. package/dist/server/routes/smart-search/sources/reddit.js +34 -0
  512. package/dist/server/routes/smart-search/sources/yelp.d.ts +25 -0
  513. package/dist/server/routes/smart-search/sources/yelp.js +171 -0
  514. package/dist/server/routes/smart-search/sources/youtube.d.ts +8 -0
  515. package/dist/server/routes/smart-search/sources/youtube.js +9 -0
  516. package/dist/server/routes/smart-search/types.d.ts +81 -0
  517. package/dist/server/routes/smart-search/types.js +1 -0
  518. package/dist/server/routes/smart-search/utils.d.ts +20 -0
  519. package/dist/server/routes/smart-search/utils.js +146 -0
  520. package/dist/server/routes/stats.d.ts +6 -0
  521. package/dist/server/routes/stats.js +71 -0
  522. package/dist/server/routes/stripe.d.ts +15 -0
  523. package/dist/server/routes/stripe.js +296 -0
  524. package/dist/server/routes/transcript-export.d.ts +10 -0
  525. package/dist/server/routes/transcript-export.js +178 -0
  526. package/dist/server/routes/usage.d.ts +9 -0
  527. package/dist/server/routes/usage.js +279 -0
  528. package/dist/server/routes/users.d.ts +8 -0
  529. package/dist/server/routes/users.js +1867 -0
  530. package/dist/server/routes/watch.d.ts +15 -0
  531. package/dist/server/routes/watch.js +309 -0
  532. package/dist/server/routes/webhooks.d.ts +26 -0
  533. package/dist/server/routes/webhooks.js +170 -0
  534. package/dist/server/routes/youtube.d.ts +6 -0
  535. package/dist/server/routes/youtube.js +130 -0
  536. package/dist/server/sentry.d.ts +14 -0
  537. package/dist/server/sentry.js +104 -0
  538. package/dist/server/types.d.ts +15 -0
  539. package/dist/server/types.js +7 -0
  540. package/dist/server/utils/response.d.ts +44 -0
  541. package/dist/server/utils/response.js +69 -0
  542. package/dist/server/utils/sse.d.ts +22 -0
  543. package/dist/server/utils/sse.js +38 -0
  544. package/dist/types.d.ts +552 -0
  545. package/dist/types.js +39 -0
  546. package/llms.txt +105 -0
  547. package/package.json +189 -0
@@ -0,0 +1,276 @@
1
+ /**
2
+ * Local-first content change tracking
3
+ * Stores snapshots in ~/.webpeel/snapshots/ and provides diffing
4
+ */
5
+ import { createHash } from 'crypto';
6
+ import { promises as fs } from 'fs';
7
+ import { join } from 'path';
8
+ import { homedir } from 'os';
9
+ // Snapshot storage directory
10
+ const SNAPSHOTS_DIR = join(homedir(), '.webpeel', 'snapshots');
11
+ /**
12
+ * Get storage path for a URL
13
+ */
14
+ function getSnapshotPath(url) {
15
+ const hash = createHash('sha256').update(url).digest('hex');
16
+ return join(SNAPSHOTS_DIR, `${hash}.json`);
17
+ }
18
+ /**
19
+ * Ensure snapshots directory exists
20
+ */
21
+ async function ensureSnapshotsDir() {
22
+ try {
23
+ await fs.mkdir(SNAPSHOTS_DIR, { recursive: true });
24
+ }
25
+ catch (error) {
26
+ // Ignore if already exists
27
+ }
28
+ }
29
+ /**
30
+ * Get a snapshot for a URL
31
+ *
32
+ * @param url - URL to get snapshot for
33
+ * @returns Snapshot if exists, null otherwise
34
+ *
35
+ * @example
36
+ * ```typescript
37
+ * const snapshot = await getSnapshot('https://example.com');
38
+ * if (snapshot) {
39
+ * console.log('Last scraped:', new Date(snapshot.timestamp));
40
+ * }
41
+ * ```
42
+ */
43
+ export async function getSnapshot(url) {
44
+ try {
45
+ const path = getSnapshotPath(url);
46
+ const data = await fs.readFile(path, 'utf-8');
47
+ return JSON.parse(data);
48
+ }
49
+ catch (error) {
50
+ return null;
51
+ }
52
+ }
53
+ /**
54
+ * Save a snapshot for a URL
55
+ */
56
+ async function saveSnapshot(snapshot) {
57
+ await ensureSnapshotsDir();
58
+ const path = getSnapshotPath(snapshot.url);
59
+ await fs.writeFile(path, JSON.stringify(snapshot, null, 2), 'utf-8');
60
+ }
61
+ /**
62
+ * Simple LCS-based unified diff implementation
63
+ * Returns unified diff format and change statistics
64
+ */
65
+ function computeDiff(oldContent, newContent) {
66
+ const oldLines = oldContent.split('\n');
67
+ const newLines = newContent.split('\n');
68
+ // Compute LCS (Longest Common Subsequence) using dynamic programming
69
+ const m = oldLines.length;
70
+ const n = newLines.length;
71
+ const lcs = Array(m + 1).fill(null).map(() => Array(n + 1).fill(0));
72
+ for (let i = 1; i <= m; i++) {
73
+ for (let j = 1; j <= n; j++) {
74
+ if (oldLines[i - 1] === newLines[j - 1]) {
75
+ lcs[i][j] = lcs[i - 1][j - 1] + 1;
76
+ }
77
+ else {
78
+ lcs[i][j] = Math.max(lcs[i - 1][j], lcs[i][j - 1]);
79
+ }
80
+ }
81
+ }
82
+ // Backtrack to build diff
83
+ const changes = [];
84
+ let i = m;
85
+ let j = n;
86
+ while (i > 0 || j > 0) {
87
+ if (i > 0 && j > 0 && oldLines[i - 1] === newLines[j - 1]) {
88
+ changes.unshift({ type: 'normal', line: j, content: newLines[j - 1] });
89
+ i--;
90
+ j--;
91
+ }
92
+ else if (j > 0 && (i === 0 || lcs[i][j - 1] >= lcs[i - 1][j])) {
93
+ changes.unshift({ type: 'add', line: j, content: newLines[j - 1] });
94
+ j--;
95
+ }
96
+ else if (i > 0) {
97
+ changes.unshift({ type: 'del', line: i, content: oldLines[i - 1] });
98
+ i--;
99
+ }
100
+ }
101
+ // Count additions and deletions
102
+ let additions = 0;
103
+ let deletions = 0;
104
+ for (const change of changes) {
105
+ if (change.type === 'add')
106
+ additions++;
107
+ if (change.type === 'del')
108
+ deletions++;
109
+ }
110
+ // Build unified diff text
111
+ const diffLines = [];
112
+ let contextStart = 0;
113
+ for (let idx = 0; idx < changes.length; idx++) {
114
+ const change = changes[idx];
115
+ // Find chunks of changes
116
+ if (change.type !== 'normal') {
117
+ // Add context header
118
+ const chunkStart = Math.max(0, idx - 3);
119
+ const chunkEnd = Math.min(changes.length, idx + 10);
120
+ // Skip if we're continuing from previous chunk
121
+ if (idx > contextStart) {
122
+ diffLines.push(`@@ -${chunkStart + 1},${chunkEnd - chunkStart} +${chunkStart + 1},${chunkEnd - chunkStart} @@`);
123
+ }
124
+ // Add changes
125
+ for (let k = chunkStart; k < chunkEnd; k++) {
126
+ const c = changes[k];
127
+ const prefix = c.type === 'add' ? '+' : c.type === 'del' ? '-' : ' ';
128
+ diffLines.push(`${prefix}${c.content}`);
129
+ }
130
+ contextStart = chunkEnd;
131
+ idx = chunkEnd - 1;
132
+ }
133
+ }
134
+ return {
135
+ text: diffLines.join('\n'),
136
+ additions,
137
+ deletions,
138
+ changes,
139
+ };
140
+ }
141
+ /**
142
+ * Track content changes for a URL
143
+ * Compares with previous snapshot and saves new one
144
+ *
145
+ * @param url - URL being tracked
146
+ * @param content - Current content
147
+ * @param fingerprint - Content fingerprint (SHA256 hash)
148
+ * @returns Change detection result
149
+ *
150
+ * @example
151
+ * ```typescript
152
+ * const result = await trackChange('https://example.com', content, fingerprint);
153
+ * if (result.changeStatus === 'changed') {
154
+ * console.log('Content changed!');
155
+ * console.log(`+${result.diff.additions} -${result.diff.deletions}`);
156
+ * }
157
+ * ```
158
+ */
159
+ export async function trackChange(url, content, fingerprint) {
160
+ try {
161
+ const previous = await getSnapshot(url);
162
+ if (!previous) {
163
+ // First time seeing this URL
164
+ await saveSnapshot({
165
+ url,
166
+ fingerprint,
167
+ content,
168
+ timestamp: Date.now(),
169
+ });
170
+ return {
171
+ changeStatus: 'new',
172
+ previousScrapeAt: null,
173
+ };
174
+ }
175
+ // Compare fingerprints
176
+ if (previous.fingerprint === fingerprint) {
177
+ // Content unchanged, just update timestamp
178
+ await saveSnapshot({
179
+ ...previous,
180
+ timestamp: Date.now(),
181
+ });
182
+ return {
183
+ changeStatus: 'same',
184
+ previousScrapeAt: new Date(previous.timestamp).toISOString(),
185
+ };
186
+ }
187
+ // Content changed - compute diff
188
+ const diff = computeDiff(previous.content, content);
189
+ // Save new snapshot
190
+ await saveSnapshot({
191
+ url,
192
+ fingerprint,
193
+ content,
194
+ timestamp: Date.now(),
195
+ metadata: {
196
+ previousFingerprint: previous.fingerprint,
197
+ previousTimestamp: previous.timestamp,
198
+ },
199
+ });
200
+ return {
201
+ changeStatus: 'changed',
202
+ previousScrapeAt: new Date(previous.timestamp).toISOString(),
203
+ diff,
204
+ };
205
+ }
206
+ catch (error) {
207
+ console.error('Change tracking error:', error);
208
+ // On error, treat as new
209
+ return {
210
+ changeStatus: 'new',
211
+ previousScrapeAt: null,
212
+ };
213
+ }
214
+ }
215
+ /**
216
+ * Clear snapshots matching a URL pattern
217
+ *
218
+ * @param urlPattern - Optional regex pattern to match URLs (if not provided, clears all)
219
+ * @returns Number of snapshots cleared
220
+ *
221
+ * @example
222
+ * ```typescript
223
+ * // Clear all snapshots
224
+ * const count = await clearSnapshots();
225
+ *
226
+ * // Clear specific domain
227
+ * const count = await clearSnapshots('example\\.com');
228
+ * ```
229
+ */
230
+ export async function clearSnapshots(urlPattern) {
231
+ try {
232
+ await ensureSnapshotsDir();
233
+ const files = await fs.readdir(SNAPSHOTS_DIR);
234
+ let cleared = 0;
235
+ const pattern = urlPattern ? (() => {
236
+ if (urlPattern.length > 200)
237
+ throw new Error('URL pattern too long (max 200 chars)');
238
+ try {
239
+ return new RegExp(urlPattern);
240
+ }
241
+ catch {
242
+ throw new Error(`Invalid regex: ${urlPattern}`);
243
+ }
244
+ })() : null;
245
+ for (const file of files) {
246
+ if (!file.endsWith('.json'))
247
+ continue;
248
+ const path = join(SNAPSHOTS_DIR, file);
249
+ if (pattern) {
250
+ // Check if URL matches pattern
251
+ try {
252
+ const data = await fs.readFile(path, 'utf-8');
253
+ const snapshot = JSON.parse(data);
254
+ if (pattern.test(snapshot.url)) {
255
+ await fs.unlink(path);
256
+ cleared++;
257
+ }
258
+ }
259
+ catch (e) {
260
+ if (process.env.DEBUG)
261
+ console.debug('[webpeel]', 'snapshot parse failed:', e instanceof Error ? e.message : e);
262
+ }
263
+ }
264
+ else {
265
+ // Clear all
266
+ await fs.unlink(path);
267
+ cleared++;
268
+ }
269
+ }
270
+ return cleared;
271
+ }
272
+ catch (error) {
273
+ console.error('Clear snapshots error:', error);
274
+ return 0;
275
+ }
276
+ }
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Content chunker for RAG pipelines.
3
+ * Splits markdown content into overlapping chunks with rich metadata.
4
+ */
5
+ export interface ChunkOptions {
6
+ /** Max tokens per chunk (approximate, using ~4 chars/token) */
7
+ maxTokens?: number;
8
+ /** Overlap tokens between chunks */
9
+ overlap?: number;
10
+ /** Chunking strategy */
11
+ strategy?: 'section' | 'paragraph' | 'fixed';
12
+ }
13
+ export interface ContentChunk {
14
+ /** Chunk index (0-based) */
15
+ index: number;
16
+ /** The chunk text content */
17
+ text: string;
18
+ /** Approximate token count (~4 chars per token) */
19
+ tokenCount: number;
20
+ /** Word count */
21
+ wordCount: number;
22
+ /** Section heading this chunk belongs to (if any) */
23
+ section: string | null;
24
+ /** Section depth (1=h1, 2=h2, etc.) */
25
+ sectionDepth: number | null;
26
+ /** Character offset in original content */
27
+ startOffset: number;
28
+ /** Character end offset */
29
+ endOffset: number;
30
+ }
31
+ export interface ChunkResult {
32
+ /** Array of content chunks */
33
+ chunks: ContentChunk[];
34
+ /** Total chunks */
35
+ totalChunks: number;
36
+ /** Original content length (chars) */
37
+ originalLength: number;
38
+ /** Chunking strategy used */
39
+ strategy: string;
40
+ /** Options used */
41
+ options: Required<ChunkOptions>;
42
+ }
43
+ /**
44
+ * Split content into RAG-ready chunks with metadata.
45
+ */
46
+ export declare function chunkContent(content: string, options?: ChunkOptions): ChunkResult;
@@ -0,0 +1,249 @@
1
+ /**
2
+ * Content chunker for RAG pipelines.
3
+ * Splits markdown content into overlapping chunks with rich metadata.
4
+ */
5
+ const DEFAULT_MAX_TOKENS = 512;
6
+ const DEFAULT_OVERLAP = 50;
7
+ const CHARS_PER_TOKEN = 4; // rough approximation
8
+ /**
9
+ * Split content into RAG-ready chunks with metadata.
10
+ */
11
+ export function chunkContent(content, options = {}) {
12
+ const maxTokens = options.maxTokens || DEFAULT_MAX_TOKENS;
13
+ const overlap = options.overlap || DEFAULT_OVERLAP;
14
+ const strategy = options.strategy || 'section';
15
+ const opts = { maxTokens, overlap, strategy };
16
+ let chunks;
17
+ switch (strategy) {
18
+ case 'section':
19
+ chunks = chunkBySection(content, maxTokens, overlap);
20
+ break;
21
+ case 'paragraph':
22
+ chunks = chunkByParagraph(content, maxTokens, overlap);
23
+ break;
24
+ case 'fixed':
25
+ chunks = chunkByFixed(content, maxTokens, overlap);
26
+ break;
27
+ default:
28
+ chunks = chunkBySection(content, maxTokens, overlap);
29
+ }
30
+ return {
31
+ chunks,
32
+ totalChunks: chunks.length,
33
+ originalLength: content.length,
34
+ strategy,
35
+ options: opts,
36
+ };
37
+ }
38
+ /**
39
+ * Section-based chunking (recommended for RAG).
40
+ * Splits on markdown headings (## / ### etc.), then splits large sections by paragraph.
41
+ * Each chunk includes its section heading for context.
42
+ */
43
+ function chunkBySection(content, maxTokens, overlap) {
44
+ const maxChars = maxTokens * CHARS_PER_TOKEN;
45
+ const overlapChars = overlap * CHARS_PER_TOKEN;
46
+ const chunks = [];
47
+ // Split content into sections by headings
48
+ const sections = splitByHeadings(content);
49
+ let chunkIndex = 0;
50
+ for (const section of sections) {
51
+ const { heading, depth, body, startOffset } = section;
52
+ if (!body.trim())
53
+ continue;
54
+ // If section fits in one chunk, use it directly
55
+ if (body.length <= maxChars) {
56
+ const text = heading ? `${heading}\n\n${body.trim()}` : body.trim();
57
+ chunks.push({
58
+ index: chunkIndex++,
59
+ text,
60
+ tokenCount: Math.ceil(text.length / CHARS_PER_TOKEN),
61
+ wordCount: text.split(/\s+/).filter(Boolean).length,
62
+ section: heading ? heading.replace(/^#+\s*/, '') : null,
63
+ sectionDepth: depth,
64
+ startOffset,
65
+ endOffset: startOffset + body.length,
66
+ });
67
+ }
68
+ else {
69
+ // Large section — split by paragraphs with overlap
70
+ const paragraphs = body.split(/\n\n+/).filter(p => p.trim());
71
+ let currentText = '';
72
+ let currentStart = startOffset;
73
+ for (const para of paragraphs) {
74
+ const candidate = currentText ? `${currentText}\n\n${para}` : para;
75
+ if (candidate.length > maxChars && currentText) {
76
+ // Emit current chunk
77
+ const text = heading ? `${heading}\n\n${currentText.trim()}` : currentText.trim();
78
+ chunks.push({
79
+ index: chunkIndex++,
80
+ text,
81
+ tokenCount: Math.ceil(text.length / CHARS_PER_TOKEN),
82
+ wordCount: text.split(/\s+/).filter(Boolean).length,
83
+ section: heading ? heading.replace(/^#+\s*/, '') : null,
84
+ sectionDepth: depth,
85
+ startOffset: currentStart,
86
+ endOffset: currentStart + currentText.length,
87
+ });
88
+ // Start new chunk with overlap from end of previous
89
+ if (overlapChars > 0 && currentText.length > overlapChars) {
90
+ currentText = currentText.slice(-overlapChars) + '\n\n' + para;
91
+ }
92
+ else {
93
+ currentText = para;
94
+ }
95
+ currentStart = startOffset + body.indexOf(para);
96
+ }
97
+ else {
98
+ currentText = candidate;
99
+ }
100
+ }
101
+ // Emit remaining
102
+ if (currentText.trim()) {
103
+ const text = heading ? `${heading}\n\n${currentText.trim()}` : currentText.trim();
104
+ chunks.push({
105
+ index: chunkIndex++,
106
+ text,
107
+ tokenCount: Math.ceil(text.length / CHARS_PER_TOKEN),
108
+ wordCount: text.split(/\s+/).filter(Boolean).length,
109
+ section: heading ? heading.replace(/^#+\s*/, '') : null,
110
+ sectionDepth: depth,
111
+ startOffset: currentStart,
112
+ endOffset: currentStart + currentText.length,
113
+ });
114
+ }
115
+ }
116
+ }
117
+ return chunks;
118
+ }
119
+ /**
120
+ * Paragraph-based chunking.
121
+ * Groups paragraphs together up to maxTokens, with overlap.
122
+ */
123
+ function chunkByParagraph(content, maxTokens, overlap) {
124
+ const maxChars = maxTokens * CHARS_PER_TOKEN;
125
+ const overlapChars = overlap * CHARS_PER_TOKEN;
126
+ const chunks = [];
127
+ const paragraphs = content.split(/\n\n+/).filter(p => p.trim());
128
+ let currentText = '';
129
+ let currentStart = 0;
130
+ let chunkIndex = 0;
131
+ // Track current section heading
132
+ let currentHeading = null;
133
+ let currentDepth = null;
134
+ for (const para of paragraphs) {
135
+ // Check if paragraph is a heading
136
+ const headingMatch = para.match(/^(#{1,6})\s+(.+)/);
137
+ if (headingMatch) {
138
+ currentHeading = headingMatch[2];
139
+ currentDepth = headingMatch[1].length;
140
+ }
141
+ const candidate = currentText ? `${currentText}\n\n${para}` : para;
142
+ if (candidate.length > maxChars && currentText) {
143
+ chunks.push({
144
+ index: chunkIndex++,
145
+ text: currentText.trim(),
146
+ tokenCount: Math.ceil(currentText.length / CHARS_PER_TOKEN),
147
+ wordCount: currentText.split(/\s+/).filter(Boolean).length,
148
+ section: currentHeading,
149
+ sectionDepth: currentDepth,
150
+ startOffset: currentStart,
151
+ endOffset: currentStart + currentText.length,
152
+ });
153
+ if (overlapChars > 0 && currentText.length > overlapChars) {
154
+ currentText = currentText.slice(-overlapChars) + '\n\n' + para;
155
+ }
156
+ else {
157
+ currentText = para;
158
+ }
159
+ currentStart = content.indexOf(para, currentStart);
160
+ }
161
+ else {
162
+ currentText = candidate;
163
+ }
164
+ }
165
+ if (currentText.trim()) {
166
+ chunks.push({
167
+ index: chunkIndex++,
168
+ text: currentText.trim(),
169
+ tokenCount: Math.ceil(currentText.length / CHARS_PER_TOKEN),
170
+ wordCount: currentText.split(/\s+/).filter(Boolean).length,
171
+ section: currentHeading,
172
+ sectionDepth: currentDepth,
173
+ startOffset: currentStart,
174
+ endOffset: currentStart + currentText.length,
175
+ });
176
+ }
177
+ return chunks;
178
+ }
179
+ /**
180
+ * Fixed-size chunking with overlap.
181
+ * Simple character-based splitting for predictable chunk sizes.
182
+ */
183
+ function chunkByFixed(content, maxTokens, overlap) {
184
+ const maxChars = maxTokens * CHARS_PER_TOKEN;
185
+ const overlapChars = overlap * CHARS_PER_TOKEN;
186
+ const step = Math.max(maxChars - overlapChars, 100);
187
+ const chunks = [];
188
+ let chunkIndex = 0;
189
+ for (let i = 0; i < content.length; i += step) {
190
+ const text = content.slice(i, i + maxChars).trim();
191
+ if (!text)
192
+ continue;
193
+ // Try to find section heading within this chunk
194
+ const headingMatch = text.match(/^(#{1,6})\s+(.+)/m);
195
+ chunks.push({
196
+ index: chunkIndex++,
197
+ text,
198
+ tokenCount: Math.ceil(text.length / CHARS_PER_TOKEN),
199
+ wordCount: text.split(/\s+/).filter(Boolean).length,
200
+ section: headingMatch ? headingMatch[2] : null,
201
+ sectionDepth: headingMatch ? headingMatch[1].length : null,
202
+ startOffset: i,
203
+ endOffset: Math.min(i + maxChars, content.length),
204
+ });
205
+ }
206
+ return chunks;
207
+ }
208
+ /** Split content into sections based on markdown headings */
209
+ function splitByHeadings(content) {
210
+ const lines = content.split('\n');
211
+ const sections = [];
212
+ let currentHeading = null;
213
+ let currentDepth = null;
214
+ let currentBody = [];
215
+ let currentStart = 0;
216
+ let offset = 0;
217
+ for (const line of lines) {
218
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)/);
219
+ if (headingMatch) {
220
+ // Save previous section
221
+ if (currentBody.length > 0 || currentHeading) {
222
+ sections.push({
223
+ heading: currentHeading,
224
+ depth: currentDepth,
225
+ body: currentBody.join('\n'),
226
+ startOffset: currentStart,
227
+ });
228
+ }
229
+ currentHeading = line;
230
+ currentDepth = headingMatch[1].length;
231
+ currentBody = [];
232
+ currentStart = offset;
233
+ }
234
+ else {
235
+ currentBody.push(line);
236
+ }
237
+ offset += line.length + 1; // +1 for newline
238
+ }
239
+ // Don't forget last section
240
+ if (currentBody.length > 0 || currentHeading) {
241
+ sections.push({
242
+ heading: currentHeading,
243
+ depth: currentDepth,
244
+ body: currentBody.join('\n'),
245
+ startOffset: currentStart,
246
+ });
247
+ }
248
+ return sections;
249
+ }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Smart content chunking for LLM processing.
3
+ *
4
+ * Splits content into manageable pieces with configurable overlap and strategy.
5
+ * Zero external dependencies; target <1ms for typical page content.
6
+ */
7
+ export interface ChunkOptions {
8
+ /** Target tokens per chunk. Default: 4000 */
9
+ chunkSize?: number;
10
+ /** Overlap tokens between chunks. Default: 200 */
11
+ overlap?: number;
12
+ /** Chunking strategy. Default: 'semantic' */
13
+ strategy?: 'fixed' | 'semantic' | 'paragraph';
14
+ }
15
+ export interface Chunk {
16
+ /** Chunk index (0-based) */
17
+ index: number;
18
+ /** Chunk content */
19
+ content: string;
20
+ /** Estimated tokens in this chunk */
21
+ tokens: number;
22
+ /** Character offset in original content */
23
+ startOffset: number;
24
+ /** Whether this is the last chunk */
25
+ isLast: boolean;
26
+ }
27
+ export interface ChunkResult {
28
+ /** Array of content chunks */
29
+ chunks: Chunk[];
30
+ /** Total chunks */
31
+ totalChunks: number;
32
+ /** Total tokens across all chunks */
33
+ totalTokens: number;
34
+ /** Original content tokens */
35
+ originalTokens: number;
36
+ }
37
+ /** Estimate token count using chars/4 heuristic. Accurate within ±10%. */
38
+ export declare function estimateTokens(text: string): number;
39
+ /**
40
+ * Split content into chunks suitable for LLM processing.
41
+ */
42
+ export declare function chunkContent(content: string, options?: ChunkOptions): ChunkResult;