webcontext-ai 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +583 -0
  3. package/dist/browser/manager.d.ts +47 -0
  4. package/dist/browser/manager.d.ts.map +1 -0
  5. package/dist/browser/manager.js +215 -0
  6. package/dist/browser/manager.js.map +1 -0
  7. package/dist/cache/cache.d.ts +22 -0
  8. package/dist/cache/cache.d.ts.map +1 -0
  9. package/dist/cache/cache.js +150 -0
  10. package/dist/cache/cache.js.map +1 -0
  11. package/dist/chunking/chunker.d.ts +26 -0
  12. package/dist/chunking/chunker.d.ts.map +1 -0
  13. package/dist/chunking/chunker.js +208 -0
  14. package/dist/chunking/chunker.js.map +1 -0
  15. package/dist/cli/index.d.ts +3 -0
  16. package/dist/cli/index.d.ts.map +1 -0
  17. package/dist/cli/index.js +406 -0
  18. package/dist/cli/index.js.map +1 -0
  19. package/dist/core/pipeline.d.ts +35 -0
  20. package/dist/core/pipeline.d.ts.map +1 -0
  21. package/dist/core/pipeline.js +476 -0
  22. package/dist/core/pipeline.js.map +1 -0
  23. package/dist/core/stream.d.ts +48 -0
  24. package/dist/core/stream.d.ts.map +1 -0
  25. package/dist/core/stream.js +72 -0
  26. package/dist/core/stream.js.map +1 -0
  27. package/dist/core/types.d.ts +259 -0
  28. package/dist/core/types.d.ts.map +1 -0
  29. package/dist/core/types.js +4 -0
  30. package/dist/core/types.js.map +1 -0
  31. package/dist/export/index.d.ts +3 -0
  32. package/dist/export/index.d.ts.map +1 -0
  33. package/dist/export/index.js +8 -0
  34. package/dist/export/index.js.map +1 -0
  35. package/dist/export/templates.d.ts +25 -0
  36. package/dist/export/templates.d.ts.map +1 -0
  37. package/dist/export/templates.js +76 -0
  38. package/dist/export/templates.js.map +1 -0
  39. package/dist/export/vectordb.d.ts +21 -0
  40. package/dist/export/vectordb.d.ts.map +1 -0
  41. package/dist/export/vectordb.js +101 -0
  42. package/dist/export/vectordb.js.map +1 -0
  43. package/dist/extractors/content.d.ts +23 -0
  44. package/dist/extractors/content.d.ts.map +1 -0
  45. package/dist/extractors/content.js +328 -0
  46. package/dist/extractors/content.js.map +1 -0
  47. package/dist/extractors/github.d.ts +19 -0
  48. package/dist/extractors/github.d.ts.map +1 -0
  49. package/dist/extractors/github.js +150 -0
  50. package/dist/extractors/github.js.map +1 -0
  51. package/dist/extractors/images.d.ts +20 -0
  52. package/dist/extractors/images.d.ts.map +1 -0
  53. package/dist/extractors/images.js +73 -0
  54. package/dist/extractors/images.js.map +1 -0
  55. package/dist/extractors/pdf.d.ts +11 -0
  56. package/dist/extractors/pdf.d.ts.map +1 -0
  57. package/dist/extractors/pdf.js +107 -0
  58. package/dist/extractors/pdf.js.map +1 -0
  59. package/dist/extractors/screenshot.d.ts +21 -0
  60. package/dist/extractors/screenshot.d.ts.map +1 -0
  61. package/dist/extractors/screenshot.js +85 -0
  62. package/dist/extractors/screenshot.js.map +1 -0
  63. package/dist/index.d.ts +70 -0
  64. package/dist/index.d.ts.map +1 -0
  65. package/dist/index.js +206 -0
  66. package/dist/index.js.map +1 -0
  67. package/dist/mcp-server.d.ts +3 -0
  68. package/dist/mcp-server.d.ts.map +1 -0
  69. package/dist/mcp-server.js +108 -0
  70. package/dist/mcp-server.js.map +1 -0
  71. package/dist/sdk/client.d.ts +48 -0
  72. package/dist/sdk/client.d.ts.map +1 -0
  73. package/dist/sdk/client.js +120 -0
  74. package/dist/sdk/client.js.map +1 -0
  75. package/dist/sdk/mcp.d.ts +12 -0
  76. package/dist/sdk/mcp.d.ts.map +1 -0
  77. package/dist/sdk/mcp.js +146 -0
  78. package/dist/sdk/mcp.js.map +1 -0
  79. package/dist/sdk/server.d.ts +5 -0
  80. package/dist/sdk/server.d.ts.map +1 -0
  81. package/dist/sdk/server.js +158 -0
  82. package/dist/sdk/server.js.map +1 -0
  83. package/dist/search/vector.d.ts +26 -0
  84. package/dist/search/vector.d.ts.map +1 -0
  85. package/dist/search/vector.js +142 -0
  86. package/dist/search/vector.js.map +1 -0
  87. package/dist/transformers/markdown.d.ts +21 -0
  88. package/dist/transformers/markdown.d.ts.map +1 -0
  89. package/dist/transformers/markdown.js +242 -0
  90. package/dist/transformers/markdown.js.map +1 -0
  91. package/dist/utils/dedup.d.ts +20 -0
  92. package/dist/utils/dedup.d.ts.map +1 -0
  93. package/dist/utils/dedup.js +61 -0
  94. package/dist/utils/dedup.js.map +1 -0
  95. package/dist/utils/index.d.ts +6 -0
  96. package/dist/utils/index.d.ts.map +1 -0
  97. package/dist/utils/index.js +15 -0
  98. package/dist/utils/index.js.map +1 -0
  99. package/dist/utils/metrics.d.ts +16 -0
  100. package/dist/utils/metrics.d.ts.map +1 -0
  101. package/dist/utils/metrics.js +28 -0
  102. package/dist/utils/metrics.js.map +1 -0
  103. package/dist/utils/scheduler.d.ts +19 -0
  104. package/dist/utils/scheduler.d.ts.map +1 -0
  105. package/dist/utils/scheduler.js +63 -0
  106. package/dist/utils/scheduler.js.map +1 -0
  107. package/dist/utils/sitemap.d.ts +17 -0
  108. package/dist/utils/sitemap.d.ts.map +1 -0
  109. package/dist/utils/sitemap.js +118 -0
  110. package/dist/utils/sitemap.js.map +1 -0
  111. package/dist/utils/validation.d.ts +142 -0
  112. package/dist/utils/validation.d.ts.map +1 -0
  113. package/dist/utils/validation.js +35 -0
  114. package/dist/utils/validation.js.map +1 -0
  115. package/dist/utils/webhook.d.ts +21 -0
  116. package/dist/utils/webhook.d.ts.map +1 -0
  117. package/dist/utils/webhook.js +108 -0
  118. package/dist/utils/webhook.js.map +1 -0
  119. package/package.json +109 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scheduler.d.ts","sourceRoot":"","sources":["../../src/utils/scheduler.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAE5D;;;GAGG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,IAAI,CAA0C;IAEtD,QAAQ,CAAC,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,KAAK,OAAO,CAAC,WAAW,CAAC,GAAG,IAAI;IAYjH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI;IAKxB,SAAS,IAAI,IAAI;IAKjB,QAAQ,IAAI,MAAM,EAAE;IAEpB;;;;OAIG;IACH,OAAO,CAAC,cAAc;CAmBvB"}
@@ -0,0 +1,63 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CrawlScheduler = void 0;
4
+ /**
5
+ * Simple cron-like scheduler for periodic re-crawling.
6
+ * Uses setInterval with parsed cron expressions for basic scheduling.
7
+ */
8
+ class CrawlScheduler {
9
+ jobs = new Map();
10
+ schedule(id, config, executor) {
11
+ this.cancel(id);
12
+ const interval = this.cronToInterval(config.cron);
13
+ const timer = setInterval(async () => {
14
+ for (const url of config.urls) {
15
+ const result = await executor(url, config.options);
16
+ config.onComplete?.(result);
17
+ }
18
+ }, interval);
19
+ this.jobs.set(id, timer);
20
+ }
21
+ cancel(id) {
22
+ const timer = this.jobs.get(id);
23
+ if (timer) {
24
+ clearInterval(timer);
25
+ this.jobs.delete(id);
26
+ }
27
+ }
28
+ cancelAll() {
29
+ for (const timer of this.jobs.values())
30
+ clearInterval(timer);
31
+ this.jobs.clear();
32
+ }
33
+ listJobs() { return [...this.jobs.keys()]; }
34
+ /**
35
+ * Parse a cron expression into a millisecond interval.
36
+ * Supports: *\/N for minutes/hours, day-of-week specific (runs daily),
37
+ * and common patterns. Falls back to 1 hour for unsupported expressions.
38
+ */
39
+ cronToInterval(cron) {
40
+ const parts = cron.trim().split(/\s+/);
41
+ if (parts.length < 5)
42
+ return 60 * 60 * 1000; // fallback: 1 hour
43
+ const [minute, hour, dayOfMonth, , dayOfWeek] = parts;
44
+ // */N minutes (e.g., "*/5 * * * *" = every 5 min)
45
+ if (minute.startsWith('*/'))
46
+ return parseInt(minute.slice(2)) * 60 * 1000;
47
+ // */N hours (e.g., "0 */2 * * *" = every 2 hours)
48
+ if (hour.startsWith('*/'))
49
+ return parseInt(hour.slice(2)) * 60 * 60 * 1000;
50
+ // Daily at specific time (e.g., "0 9 * * *" or "0 9 * * MON")
51
+ if (minute !== '*' && hour !== '*' && dayOfMonth === '*')
52
+ return 24 * 60 * 60 * 1000;
53
+ // Every hour (e.g., "0 * * * *")
54
+ if (minute !== '*' && hour === '*')
55
+ return 60 * 60 * 1000;
56
+ // Every minute ("* * * * *")
57
+ if (minute === '*')
58
+ return 60 * 1000;
59
+ return 60 * 60 * 1000; // fallback: 1 hour
60
+ }
61
+ }
62
+ exports.CrawlScheduler = CrawlScheduler;
63
+ //# sourceMappingURL=scheduler.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scheduler.js","sourceRoot":"","sources":["../../src/utils/scheduler.ts"],"names":[],"mappings":";;;AAEA;;;GAGG;AACH,MAAa,cAAc;IACjB,IAAI,GAAgC,IAAI,GAAG,EAAE,CAAC;IAEtD,QAAQ,CAAC,EAAU,EAAE,MAAsB,EAAE,QAA6D;QACxG,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAChB,MAAM,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAClD,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,IAAI,EAAE;YACnC,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;gBAC9B,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC,OAAO,CAAC,CAAC;gBACnD,MAAM,CAAC,UAAU,EAAE,CAAC,MAAM,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC,EAAE,QAAQ,CAAC,CAAC;QACb,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;IAC3B,CAAC;IAED,MAAM,CAAC,EAAU;QACf,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChC,IAAI,KAAK,EAAE,CAAC;YAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAAC,CAAC;IAC5D,CAAC;IAED,SAAS;QACP,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAAE,aAAa,CAAC,KAAK,CAAC,CAAC;QAC7D,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;IACpB,CAAC;IAED,QAAQ,KAAe,OAAO,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAEtD;;;;OAIG;IACK,cAAc,CAAC,IAAY;QACjC,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC,mBAAmB;QAEhE,MAAM,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,AAAD,EAAG,SAAS,CAAC,GAAG,KAAK,CAAC;QAEtD,kDAAkD;QAClD,IAAI,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC;YAAE,OAAO,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC;QAC1E,kDAAkD;QAClD,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;YAAE,OAAO,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;QAC3E,8DAA8D;QAC9D,IAAI,MAAM,KAAK,GAAG,IAAI,IAAI,KAAK,GAAG,IAAI,UAAU,KAAK,GAAG;YAAE,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;QACrF,iCAAiC;QACjC,IAAI,MAAM,KAAK,GAAG,IAAI,IAAI,KAAK,GAAG;YAAE,OAAO,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;QAC1D,6BAA6B;QAC7B,IAAI,MAAM,KAAK,GAAG;YAAE,OAAO,EAAE,GAAG,IAAI,CAAC;QAErC,OAAO,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC,mBAAmB;IAC5C,CAAC;CACF;AAnDD,wCAmDC"}
@@ -0,0 +1,17 @@
1
+ import { SitemapEntry } from '../core/types';
2
+ /**
3
+ * Parse sitemap.xml and sitemap index files.
4
+ * Supports: standard sitemaps, sitemap indexes, gzipped sitemaps.
5
+ */
6
+ export declare class SitemapParser {
7
+ private userAgent;
8
+ constructor(userAgent?: string);
9
+ /** Parse a sitemap URL, handling both sitemap indexes and regular sitemaps */
10
+ parse(sitemapUrl: string): Promise<SitemapEntry[]>;
11
+ /** Discover sitemap URL from robots.txt or common locations */
12
+ discover(baseUrl: string): Promise<string | null>;
13
+ private fetchXml;
14
+ private parseEntries;
15
+ private parseSitemapIndex;
16
+ }
17
+ //# sourceMappingURL=sitemap.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sitemap.d.ts","sourceRoot":"","sources":["../../src/utils/sitemap.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAE7C;;;GAGG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,SAAS,CAAS;gBAEd,SAAS,GAAE,MAAyB;IAIhD,8EAA8E;IACxE,KAAK,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAYxD,+DAA+D;IACzD,QAAQ,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;YA4BzC,QAAQ;IAUtB,OAAO,CAAC,YAAY;IAuBpB,OAAO,CAAC,iBAAiB;CAW1B"}
@@ -0,0 +1,118 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || function (mod) {
19
+ if (mod && mod.__esModule) return mod;
20
+ var result = {};
21
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
+ __setModuleDefault(result, mod);
23
+ return result;
24
+ };
25
+ Object.defineProperty(exports, "__esModule", { value: true });
26
+ exports.SitemapParser = void 0;
27
+ const cheerio = __importStar(require("cheerio"));
28
+ /**
29
+ * Parse sitemap.xml and sitemap index files.
30
+ * Supports: standard sitemaps, sitemap indexes, gzipped sitemaps.
31
+ */
32
+ class SitemapParser {
33
+ userAgent;
34
+ constructor(userAgent = 'WebContext/1.0') {
35
+ this.userAgent = userAgent;
36
+ }
37
+ /** Parse a sitemap URL, handling both sitemap indexes and regular sitemaps */
38
+ async parse(sitemapUrl) {
39
+ const xml = await this.fetchXml(sitemapUrl);
40
+ const sitemapUrls = this.parseSitemapIndex(xml);
41
+ if (sitemapUrls.length > 0) {
42
+ const results = await Promise.all(sitemapUrls.map((url) => this.parse(url)));
43
+ return results.flat();
44
+ }
45
+ return this.parseEntries(xml);
46
+ }
47
+ /** Discover sitemap URL from robots.txt or common locations */
48
+ async discover(baseUrl) {
49
+ const base = baseUrl.replace(/\/$/, '');
50
+ try {
51
+ const res = await fetch(`${base}/robots.txt`, {
52
+ headers: { 'User-Agent': this.userAgent },
53
+ });
54
+ if (res.ok) {
55
+ const text = await res.text();
56
+ const match = text.match(/^Sitemap:\s*(.+)$/im);
57
+ if (match)
58
+ return match[1].trim();
59
+ }
60
+ }
61
+ catch { }
62
+ const commonPaths = ['/sitemap.xml', '/sitemap_index.xml'];
63
+ for (const path of commonPaths) {
64
+ try {
65
+ const res = await fetch(`${base}${path}`, {
66
+ method: 'HEAD',
67
+ headers: { 'User-Agent': this.userAgent },
68
+ });
69
+ if (res.ok)
70
+ return `${base}${path}`;
71
+ }
72
+ catch { }
73
+ }
74
+ return null;
75
+ }
76
+ async fetchXml(url) {
77
+ const res = await fetch(url, {
78
+ headers: { 'User-Agent': this.userAgent },
79
+ });
80
+ if (!res.ok) {
81
+ throw new Error(`Failed to fetch sitemap: ${url} (${res.status})`);
82
+ }
83
+ return res.text();
84
+ }
85
+ parseEntries(xml) {
86
+ const $ = cheerio.load(xml, { xmlMode: true });
87
+ const entries = [];
88
+ $('url').each((_, el) => {
89
+ const loc = $(el).find('loc').text().trim();
90
+ if (!loc)
91
+ return;
92
+ const entry = { url: loc };
93
+ const lastmod = $(el).find('lastmod').text().trim();
94
+ const changefreq = $(el).find('changefreq').text().trim();
95
+ const priority = $(el).find('priority').text().trim();
96
+ if (lastmod)
97
+ entry.lastmod = lastmod;
98
+ if (changefreq)
99
+ entry.changefreq = changefreq;
100
+ if (priority)
101
+ entry.priority = parseFloat(priority);
102
+ entries.push(entry);
103
+ });
104
+ return entries;
105
+ }
106
+ parseSitemapIndex(xml) {
107
+ const $ = cheerio.load(xml, { xmlMode: true });
108
+ const urls = [];
109
+ $('sitemapindex sitemap loc').each((_, el) => {
110
+ const loc = $(el).text().trim();
111
+ if (loc)
112
+ urls.push(loc);
113
+ });
114
+ return urls;
115
+ }
116
+ }
117
+ exports.SitemapParser = SitemapParser;
118
+ //# sourceMappingURL=sitemap.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sitemap.js","sourceRoot":"","sources":["../../src/utils/sitemap.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,iDAAmC;AAGnC;;;GAGG;AACH,MAAa,aAAa;IAChB,SAAS,CAAS;IAE1B,YAAY,YAAoB,gBAAgB;QAC9C,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,8EAA8E;IAC9E,KAAK,CAAC,KAAK,CAAC,UAAkB;QAC5B,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;QAC5C,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC;QAEhD,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC7E,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC;QACxB,CAAC;QAED,OAAO,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAChC,CAAC;IAED,+DAA+D;IAC/D,KAAK,CAAC,QAAQ,CAAC,OAAe;QAC5B,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAExC,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,aAAa,EAAE;gBAC5C,OAAO,EAAE,EAAE,YAAY,EAAE,IAAI,CAAC,SAAS,EAAE;aAC1C,CAAC,CAAC;YACH,IAAI,GAAG,CAAC,EAAE,EAAE,CAAC;gBACX,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;gBAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;gBAChD,IAAI,KAAK;oBAAE,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YACpC,CAAC;QACH,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;QAEV,MAAM,WAAW,GAAG,CAAC,cAAc,EAAE,oBAAoB,CAAC,CAAC;QAC3D,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAC/B,IAAI,CAAC;gBACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,GAAG,IAAI,EAAE,EAAE;oBACxC,MAAM,EAAE,MAAM;oBACd,OAAO,EAAE,EAAE,YAAY,EAAE,IAAI,CAAC,SAAS,EAAE;iBAC1C,CAAC,CAAC;gBACH,IAAI,GAAG,CAAC,EAAE;oBAAE,OAAO,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC;YACtC,CAAC;YAAC,MAAM,CAAC,CAAA,CAAC;QACZ,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,KAAK,CAAC,QAAQ,CAAC,GAAW;QAChC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAC3B,OAAO,EAAE,EAAE,YAAY,EAAE,IAAI,CAAC,SAAS,EAAE;SAC1C,CAAC,CAAC;QACH,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,KAAK,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC;QACrE,CAAC;QACD,OAAO,GAAG,CAAC,IAAI,EAAE,CAAC;IACpB,CAAC;IAEO,YAAY,CAAC,GAAW;QAC9B,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QAC/C,MAAM,OAAO,GAAmB,EAAE,CAAC;QAEnC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;YACtB,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YAC5C,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,MAAM,KAAK,GAAiB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;YACzC,MAAM,OAAO,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YACpD,MAAM,UAAU,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YAC1D,MAAM,QAAQ,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YAEtD,IAAI,OAAO;gBAAE,KAAK,CAAC,OAAO,GAAG,OAAO,CAAC;YACrC,IAAI,UAAU;gBAAE,KAAK,CAAC,UAAU,GAAG,UAAU,CAAC;YAC9C,IAAI,QAAQ;gBAAE,KAAK,CAAC,QAAQ,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;YAEpD,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACtB,CAAC,CAAC,CAAC;QAEH,OAAO,OAAO,CAAC;IACjB,CAAC;IAEO,iBAAiB,CAAC,GAAW;QACnC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QAC/C,MAAM,IAAI,GAAa,EAAE,CAAC;QAE1B,CAAC,CAAC,0BAA0B,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;YAC3C,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YAChC,IAAI,GAAG;gBAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC1B,CAAC,CAAC,CAAC;QAEH,OAAO,IAAI,CAAC;IACd,CAAC;CACF;AA7FD,sCA6FC"}
@@ -0,0 +1,142 @@
1
+ import { z } from 'zod';
2
+ import { CrawlOptions, WebContextConfig } from '../core/types';
3
+ /** Zod schemas for input validation */
4
+ export declare const urlSchema: z.ZodString;
5
+ export declare const crawlOptionsSchema: z.ZodObject<{
6
+ url: z.ZodString;
7
+ depth: z.ZodOptional<z.ZodNumber>;
8
+ maxPages: z.ZodOptional<z.ZodNumber>;
9
+ timeout: z.ZodOptional<z.ZodNumber>;
10
+ delay: z.ZodOptional<z.ZodNumber>;
11
+ respectRobotsTxt: z.ZodOptional<z.ZodBoolean>;
12
+ includeSitemap: z.ZodOptional<z.ZodBoolean>;
13
+ includePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
14
+ excludePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
15
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
16
+ url: z.ZodString;
17
+ depth: z.ZodOptional<z.ZodNumber>;
18
+ maxPages: z.ZodOptional<z.ZodNumber>;
19
+ timeout: z.ZodOptional<z.ZodNumber>;
20
+ delay: z.ZodOptional<z.ZodNumber>;
21
+ respectRobotsTxt: z.ZodOptional<z.ZodBoolean>;
22
+ includeSitemap: z.ZodOptional<z.ZodBoolean>;
23
+ includePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
24
+ excludePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
25
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
26
+ url: z.ZodString;
27
+ depth: z.ZodOptional<z.ZodNumber>;
28
+ maxPages: z.ZodOptional<z.ZodNumber>;
29
+ timeout: z.ZodOptional<z.ZodNumber>;
30
+ delay: z.ZodOptional<z.ZodNumber>;
31
+ respectRobotsTxt: z.ZodOptional<z.ZodBoolean>;
32
+ includeSitemap: z.ZodOptional<z.ZodBoolean>;
33
+ includePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
34
+ excludePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
35
+ }, z.ZodTypeAny, "passthrough">>;
36
+ export declare const webContextConfigSchema: z.ZodObject<{
37
+ baseUrl: z.ZodString;
38
+ outputDir: z.ZodOptional<z.ZodString>;
39
+ crawlOptions: z.ZodOptional<z.ZodObject<{
40
+ url: z.ZodString;
41
+ depth: z.ZodOptional<z.ZodNumber>;
42
+ maxPages: z.ZodOptional<z.ZodNumber>;
43
+ timeout: z.ZodOptional<z.ZodNumber>;
44
+ delay: z.ZodOptional<z.ZodNumber>;
45
+ respectRobotsTxt: z.ZodOptional<z.ZodBoolean>;
46
+ includeSitemap: z.ZodOptional<z.ZodBoolean>;
47
+ includePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
48
+ excludePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
49
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
50
+ url: z.ZodString;
51
+ depth: z.ZodOptional<z.ZodNumber>;
52
+ maxPages: z.ZodOptional<z.ZodNumber>;
53
+ timeout: z.ZodOptional<z.ZodNumber>;
54
+ delay: z.ZodOptional<z.ZodNumber>;
55
+ respectRobotsTxt: z.ZodOptional<z.ZodBoolean>;
56
+ includeSitemap: z.ZodOptional<z.ZodBoolean>;
57
+ includePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
58
+ excludePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
59
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
60
+ url: z.ZodString;
61
+ depth: z.ZodOptional<z.ZodNumber>;
62
+ maxPages: z.ZodOptional<z.ZodNumber>;
63
+ timeout: z.ZodOptional<z.ZodNumber>;
64
+ delay: z.ZodOptional<z.ZodNumber>;
65
+ respectRobotsTxt: z.ZodOptional<z.ZodBoolean>;
66
+ includeSitemap: z.ZodOptional<z.ZodBoolean>;
67
+ includePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
68
+ excludePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
69
+ }, z.ZodTypeAny, "passthrough">>>;
70
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
71
+ baseUrl: z.ZodString;
72
+ outputDir: z.ZodOptional<z.ZodString>;
73
+ crawlOptions: z.ZodOptional<z.ZodObject<{
74
+ url: z.ZodString;
75
+ depth: z.ZodOptional<z.ZodNumber>;
76
+ maxPages: z.ZodOptional<z.ZodNumber>;
77
+ timeout: z.ZodOptional<z.ZodNumber>;
78
+ delay: z.ZodOptional<z.ZodNumber>;
79
+ respectRobotsTxt: z.ZodOptional<z.ZodBoolean>;
80
+ includeSitemap: z.ZodOptional<z.ZodBoolean>;
81
+ includePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
82
+ excludePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
83
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
84
+ url: z.ZodString;
85
+ depth: z.ZodOptional<z.ZodNumber>;
86
+ maxPages: z.ZodOptional<z.ZodNumber>;
87
+ timeout: z.ZodOptional<z.ZodNumber>;
88
+ delay: z.ZodOptional<z.ZodNumber>;
89
+ respectRobotsTxt: z.ZodOptional<z.ZodBoolean>;
90
+ includeSitemap: z.ZodOptional<z.ZodBoolean>;
91
+ includePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
92
+ excludePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
93
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
94
+ url: z.ZodString;
95
+ depth: z.ZodOptional<z.ZodNumber>;
96
+ maxPages: z.ZodOptional<z.ZodNumber>;
97
+ timeout: z.ZodOptional<z.ZodNumber>;
98
+ delay: z.ZodOptional<z.ZodNumber>;
99
+ respectRobotsTxt: z.ZodOptional<z.ZodBoolean>;
100
+ includeSitemap: z.ZodOptional<z.ZodBoolean>;
101
+ includePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
102
+ excludePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
103
+ }, z.ZodTypeAny, "passthrough">>>;
104
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
105
+ baseUrl: z.ZodString;
106
+ outputDir: z.ZodOptional<z.ZodString>;
107
+ crawlOptions: z.ZodOptional<z.ZodObject<{
108
+ url: z.ZodString;
109
+ depth: z.ZodOptional<z.ZodNumber>;
110
+ maxPages: z.ZodOptional<z.ZodNumber>;
111
+ timeout: z.ZodOptional<z.ZodNumber>;
112
+ delay: z.ZodOptional<z.ZodNumber>;
113
+ respectRobotsTxt: z.ZodOptional<z.ZodBoolean>;
114
+ includeSitemap: z.ZodOptional<z.ZodBoolean>;
115
+ includePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
116
+ excludePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
117
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
118
+ url: z.ZodString;
119
+ depth: z.ZodOptional<z.ZodNumber>;
120
+ maxPages: z.ZodOptional<z.ZodNumber>;
121
+ timeout: z.ZodOptional<z.ZodNumber>;
122
+ delay: z.ZodOptional<z.ZodNumber>;
123
+ respectRobotsTxt: z.ZodOptional<z.ZodBoolean>;
124
+ includeSitemap: z.ZodOptional<z.ZodBoolean>;
125
+ includePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
126
+ excludePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
127
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
128
+ url: z.ZodString;
129
+ depth: z.ZodOptional<z.ZodNumber>;
130
+ maxPages: z.ZodOptional<z.ZodNumber>;
131
+ timeout: z.ZodOptional<z.ZodNumber>;
132
+ delay: z.ZodOptional<z.ZodNumber>;
133
+ respectRobotsTxt: z.ZodOptional<z.ZodBoolean>;
134
+ includeSitemap: z.ZodOptional<z.ZodBoolean>;
135
+ includePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
136
+ excludePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
137
+ }, z.ZodTypeAny, "passthrough">>>;
138
+ }, z.ZodTypeAny, "passthrough">>;
139
+ export declare function validateUrl(url: string): string;
140
+ export declare function validateCrawlOptions(options: unknown): CrawlOptions;
141
+ export declare function validateConfig(config: unknown): WebContextConfig;
142
+ //# sourceMappingURL=validation.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validation.d.ts","sourceRoot":"","sources":["../../src/utils/validation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAE/D,uCAAuC;AACvC,eAAO,MAAM,SAAS,aAAuC,CAAC;AAE9D,eAAO,MAAM,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gCAUf,CAAC;AAEjB,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gCAInB,CAAC;AAEjB,wBAAgB,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAE/C;AAED,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,OAAO,GAAG,YAAY,CAEnE;AAED,wBAAgB,cAAc,CAAC,MAAM,EAAE,OAAO,GAAG,gBAAgB,CAEhE"}
@@ -0,0 +1,35 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.validateConfig = exports.validateCrawlOptions = exports.validateUrl = exports.webContextConfigSchema = exports.crawlOptionsSchema = exports.urlSchema = void 0;
4
+ const zod_1 = require("zod");
5
+ /** Zod schemas for input validation */
6
+ exports.urlSchema = zod_1.z.string().url('Invalid URL format');
7
+ exports.crawlOptionsSchema = zod_1.z.object({
8
+ url: exports.urlSchema,
9
+ depth: zod_1.z.number().int().min(0, 'Depth must be >= 0').max(10, 'Depth must be <= 10').optional(),
10
+ maxPages: zod_1.z.number().int().min(1, 'maxPages must be >= 1').max(10000, 'maxPages must be <= 10000').optional(),
11
+ timeout: zod_1.z.number().int().min(1000, 'Timeout must be >= 1000ms').max(120000, 'Timeout must be <= 120000ms').optional(),
12
+ delay: zod_1.z.number().int().min(0, 'Delay must be >= 0').max(60000, 'Delay must be <= 60000ms').optional(),
13
+ respectRobotsTxt: zod_1.z.boolean().optional(),
14
+ includeSitemap: zod_1.z.boolean().optional(),
15
+ includePatterns: zod_1.z.array(zod_1.z.string()).optional(),
16
+ excludePatterns: zod_1.z.array(zod_1.z.string()).optional(),
17
+ }).passthrough();
18
+ exports.webContextConfigSchema = zod_1.z.object({
19
+ baseUrl: exports.urlSchema,
20
+ outputDir: zod_1.z.string().min(1, 'Output directory is required').optional(),
21
+ crawlOptions: exports.crawlOptionsSchema.optional(),
22
+ }).passthrough();
23
+ function validateUrl(url) {
24
+ return exports.urlSchema.parse(url);
25
+ }
26
+ exports.validateUrl = validateUrl;
27
+ function validateCrawlOptions(options) {
28
+ return exports.crawlOptionsSchema.parse(options);
29
+ }
30
+ exports.validateCrawlOptions = validateCrawlOptions;
31
+ function validateConfig(config) {
32
+ return exports.webContextConfigSchema.parse(config);
33
+ }
34
+ exports.validateConfig = validateConfig;
35
+ //# sourceMappingURL=validation.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validation.js","sourceRoot":"","sources":["../../src/utils/validation.ts"],"names":[],"mappings":";;;AAAA,6BAAwB;AAGxB,uCAAuC;AAC1B,QAAA,SAAS,GAAG,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;AAEjD,QAAA,kBAAkB,GAAG,OAAC,CAAC,MAAM,CAAC;IACzC,GAAG,EAAE,iBAAS;IACd,KAAK,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,EAAE,oBAAoB,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,qBAAqB,CAAC,CAAC,QAAQ,EAAE;IAC9F,QAAQ,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,EAAE,uBAAuB,CAAC,CAAC,GAAG,CAAC,KAAK,EAAE,2BAA2B,CAAC,CAAC,QAAQ,EAAE;IAC7G,OAAO,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,2BAA2B,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,6BAA6B,CAAC,CAAC,QAAQ,EAAE;IACtH,KAAK,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,EAAE,oBAAoB,CAAC,CAAC,GAAG,CAAC,KAAK,EAAE,0BAA0B,CAAC,CAAC,QAAQ,EAAE;IACtG,gBAAgB,EAAE,OAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;IACxC,cAAc,EAAE,OAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE;IACtC,eAAe,EAAE,OAAC,CAAC,KAAK,CAAC,OAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;IAC/C,eAAe,EAAE,OAAC,CAAC,KAAK,CAAC,OAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;CAChD,CAAC,CAAC,WAAW,EAAE,CAAC;AAEJ,QAAA,sBAAsB,GAAG,OAAC,CAAC,MAAM,CAAC;IAC7C,OAAO,EAAE,iBAAS;IAClB,SAAS,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,EAAE,8BAA8B,CAAC,CAAC,QAAQ,EAAE;IACvE,YAAY,EAAE,0BAAkB,CAAC,QAAQ,EAAE;CAC5C,CAAC,CAAC,WAAW,EAAE,CAAC;AAEjB,SAAgB,WAAW,CAAC,GAAW;IACrC,OAAO,iBAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;AAC9B,CAAC;AAFD,kCAEC;AAED,SAAgB,oBAAoB,CAAC,OAAgB;IACnD,OAAO,0BAAkB,CAAC,KAAK,CAAC,OAAO,CAAiB,CAAC;AAC3D,CAAC;AAFD,oDAEC;AAED,SAAgB,cAAc,CAAC,MAAe;IAC5C,OAAO,8BAAsB,CAAC,KAAK,CAAC,MAAM,CAAqB,CAAC;AAClE,CAAC;AAFD,wCAEC"}
@@ -0,0 +1,21 @@
1
+ import { CrawlResult, ContentDiff } from '../core/types';
2
+ export interface WebhookConfig {
3
+ url: string;
4
+ secret?: string;
5
+ events: Array<'crawl.complete' | 'crawl.error' | 'content.changed'>;
6
+ headers?: Record<string, string>;
7
+ }
8
+ /**
9
+ * Webhook notification system for crawl events.
10
+ * Sends POST requests to configured URLs when events occur.
11
+ */
12
+ export declare class WebhookNotifier {
13
+ private configs;
14
+ register(config: WebhookConfig): void;
15
+ unregister(url: string): void;
16
+ notifyCrawlComplete(result: CrawlResult): Promise<void>;
17
+ notifyCrawlError(url: string, error: string): Promise<void>;
18
+ notifyContentChanged(diffs: ContentDiff[]): Promise<void>;
19
+ private send;
20
+ }
21
+ //# sourceMappingURL=webhook.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"webhook.d.ts","sourceRoot":"","sources":["../../src/utils/webhook.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAEzD,MAAM,WAAW,aAAa;IAC5B,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,KAAK,CAAC,gBAAgB,GAAG,aAAa,GAAG,iBAAiB,CAAC,CAAC;IACpE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC;AAED;;;GAGG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,OAAO,CAAuB;IAEtC,QAAQ,CAAC,MAAM,EAAE,aAAa,GAAG,IAAI;IAIrC,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAIvB,mBAAmB,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAgBvD,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAU3D,oBAAoB,CAAC,KAAK,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;YAkBjD,IAAI;CAwBnB"}
@@ -0,0 +1,108 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || function (mod) {
19
+ if (mod && mod.__esModule) return mod;
20
+ var result = {};
21
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
+ __setModuleDefault(result, mod);
23
+ return result;
24
+ };
25
+ Object.defineProperty(exports, "__esModule", { value: true });
26
+ exports.WebhookNotifier = void 0;
27
+ /**
28
+ * Webhook notification system for crawl events.
29
+ * Sends POST requests to configured URLs when events occur.
30
+ */
31
+ class WebhookNotifier {
32
+ configs = [];
33
+ register(config) {
34
+ this.configs.push(config);
35
+ }
36
+ unregister(url) {
37
+ this.configs = this.configs.filter(c => c.url !== url);
38
+ }
39
+ async notifyCrawlComplete(result) {
40
+ const subscribers = this.configs.filter(c => c.events.includes('crawl.complete'));
41
+ const payload = {
42
+ event: 'crawl.complete',
43
+ timestamp: new Date().toISOString(),
44
+ data: {
45
+ source: result.context.source,
46
+ pagesProcessed: result.stats.pagesProcessed,
47
+ totalTokens: result.stats.totalTokens,
48
+ duration: result.stats.duration,
49
+ errors: result.stats.errors.length,
50
+ },
51
+ };
52
+ await this.send(subscribers, payload);
53
+ }
54
+ async notifyCrawlError(url, error) {
55
+ const subscribers = this.configs.filter(c => c.events.includes('crawl.error'));
56
+ const payload = {
57
+ event: 'crawl.error',
58
+ timestamp: new Date().toISOString(),
59
+ data: { url, error },
60
+ };
61
+ await this.send(subscribers, payload);
62
+ }
63
+ async notifyContentChanged(diffs) {
64
+ if (!diffs.length)
65
+ return;
66
+ const subscribers = this.configs.filter(c => c.events.includes('content.changed'));
67
+ const payload = {
68
+ event: 'content.changed',
69
+ timestamp: new Date().toISOString(),
70
+ data: {
71
+ changedPages: diffs.length,
72
+ diffs: diffs.map(d => ({
73
+ url: d.url,
74
+ addedSections: d.addedSections,
75
+ removedSections: d.removedSections,
76
+ })),
77
+ },
78
+ };
79
+ await this.send(subscribers, payload);
80
+ }
81
+ async send(subscribers, payload) {
82
+ const promises = subscribers.map(async (config) => {
83
+ try {
84
+ const headers = {
85
+ 'Content-Type': 'application/json',
86
+ ...config.headers,
87
+ };
88
+ if (config.secret) {
89
+ const { createHmac } = await Promise.resolve().then(() => __importStar(require('crypto')));
90
+ const signature = createHmac('sha256', config.secret)
91
+ .update(JSON.stringify(payload))
92
+ .digest('hex');
93
+ headers['X-Webhook-Signature'] = signature;
94
+ }
95
+ await fetch(config.url, {
96
+ method: 'POST',
97
+ headers,
98
+ body: JSON.stringify(payload),
99
+ signal: AbortSignal.timeout(10000),
100
+ });
101
+ }
102
+ catch { }
103
+ });
104
+ await Promise.allSettled(promises);
105
+ }
106
+ }
107
+ exports.WebhookNotifier = WebhookNotifier;
108
+ //# sourceMappingURL=webhook.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"webhook.js","sourceRoot":"","sources":["../../src/utils/webhook.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AASA;;;GAGG;AACH,MAAa,eAAe;IAClB,OAAO,GAAoB,EAAE,CAAC;IAEtC,QAAQ,CAAC,MAAqB;QAC5B,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC5B,CAAC;IAED,UAAU,CAAC,GAAW;QACpB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,GAAG,CAAC,CAAC;IACzD,CAAC;IAED,KAAK,CAAC,mBAAmB,CAAC,MAAmB;QAC3C,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC,CAAC;QAClF,MAAM,OAAO,GAAG;YACd,KAAK,EAAE,gBAAgB;YACvB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,IAAI,EAAE;gBACJ,MAAM,EAAE,MAAM,CAAC,OAAO,CAAC,MAAM;gBAC7B,cAAc,EAAE,MAAM,CAAC,KAAK,CAAC,cAAc;gBAC3C,WAAW,EAAE,MAAM,CAAC,KAAK,CAAC,WAAW;gBACrC,QAAQ,EAAE,MAAM,CAAC,KAAK,CAAC,QAAQ;gBAC/B,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM;aACnC;SACF,CAAC;QACF,MAAM,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACxC,CAAC;IAED,KAAK,CAAC,gBAAgB,CAAC,GAAW,EAAE,KAAa;QAC/C,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC;QAC/E,MAAM,OAAO,GAAG;YACd,KAAK,EAAE,aAAa;YACpB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,IAAI,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE;SACrB,CAAC;QACF,MAAM,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACxC,CAAC;IAED,KAAK,CAAC,oBAAoB,CAAC,KAAoB;QAC7C,IAAI,CAAC,KAAK,CAAC,MAAM;YAAE,OAAO;QAC1B,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC,CAAC;QACnF,MAAM,OAAO,GAAG;YACd,KAAK,EAAE,iBAAiB;YACxB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,IAAI,EAAE;gBACJ,YAAY,EAAE,KAAK,CAAC,MAAM;gBAC1B,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACrB,GAAG,EAAE,CAAC,CAAC,GAAG;oBACV,aAAa,EAAE,CAAC,CAAC,aAAa;oBAC9B,eAAe,EAAE,CAAC,CAAC,eAAe;iBACnC,CAAC,CAAC;aACJ;SACF,CAAC;QACF,MAAM,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACxC,CAAC;IAEO,KAAK,CAAC,IAAI,CAAC,WAA4B,EAAE,OAAY;QAC3D,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE;YAChD,IAAI,CAAC;gBACH,MAAM,OAAO,GAA2B;oBACtC,cAAc,EAAE,kBAAkB;oBAClC,GAAG,MAAM,CAAC,OAAO;iBAClB,CAAC;gBACF,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;oBAClB,MAAM,EAAE,UAAU,EAAE,GAAG,wDAAa,QAAQ,GAAC,CAAC;oBAC9C,MAAM,SAAS,GAAG,UAAU,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC;yBAClD,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;yBAC/B,MAAM,CAAC,KAAK,CAAC,CAAC;oBACjB,OAAO,CAAC,qBAAqB,CAAC,GAAG,SAAS,CAAC;gBAC7C,CAAC;gBACD,MAAM,KAAK,CAAC,MAAM,CAAC,GAAG,EAAE;oBACtB,MAAM,EAAE,MAAM;oBACd,OAAO;oBACP,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;oBAC7B,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,KAAK,CAAC;iBACnC,CAAC,CAAC;YACL,CAAC;YAAC,MAAM,CAAC,CAAA,CAAC;QACZ,CAAC,CAAC,CAAC;QACH,MAAM,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC;CACF;AA/ED,0CA+EC"}
package/package.json ADDED
@@ -0,0 +1,109 @@
1
+ {
2
+ "name": "webcontext-ai",
3
+ "version": "1.0.0",
4
+ "description": "Turn any web content into clean AI-ready context — with crawling, chunking, semantic search, and MCP tools",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "bin": {
8
+ "webcontext": "./dist/cli/index.js",
9
+ "webcontext-mcp": "./dist/mcp-server.js"
10
+ },
11
+ "exports": {
12
+ ".": {
13
+ "types": "./dist/index.d.ts",
14
+ "require": "./dist/index.js"
15
+ },
16
+ "./sdk/client": {
17
+ "types": "./dist/sdk/client.d.ts",
18
+ "require": "./dist/sdk/client.js"
19
+ },
20
+ "./sdk/mcp": {
21
+ "types": "./dist/sdk/mcp.d.ts",
22
+ "require": "./dist/sdk/mcp.js"
23
+ },
24
+ "./search": {
25
+ "types": "./dist/search/vector.d.ts",
26
+ "require": "./dist/search/vector.js"
27
+ },
28
+ "./utils": {
29
+ "types": "./dist/utils/index.d.ts",
30
+ "require": "./dist/utils/index.js"
31
+ },
32
+ "./export": {
33
+ "types": "./dist/export/index.d.ts",
34
+ "require": "./dist/export/index.js"
35
+ }
36
+ },
37
+ "files": [
38
+ "dist",
39
+ "README.md",
40
+ "LICENSE"
41
+ ],
42
+ "scripts": {
43
+ "build": "tsc",
44
+ "prepublishOnly": "npm run build",
45
+ "dev": "ts-node src/index.ts",
46
+ "cli": "ts-node src/cli/index.ts",
47
+ "start": "node dist/cli/index.js",
48
+ "serve": "node dist/cli/index.js serve",
49
+ "test": "jest",
50
+ "lint": "eslint src/"
51
+ },
52
+ "keywords": [
53
+ "web-scraping",
54
+ "ai-context",
55
+ "llm",
56
+ "rag",
57
+ "markdown",
58
+ "crawling",
59
+ "web-crawler",
60
+ "documentation",
61
+ "firecrawl",
62
+ "mcp",
63
+ "vector-search",
64
+ "sitemap",
65
+ "tfidf",
66
+ "semantic-search",
67
+ "mcp-server",
68
+ "langchain",
69
+ "pdf-extraction",
70
+ "github",
71
+ "deduplication"
72
+ ],
73
+ "author": "sumeethmoolya",
74
+ "repository": {
75
+ "type": "git",
76
+ "url": "https://github.com/Sumeeth-24/webScrapper-ai.git"
77
+ },
78
+ "homepage": "https://github.com/Sumeeth-24/webScrapper-ai#readme",
79
+ "license": "MIT",
80
+ "dependencies": {
81
+ "turndown": "7.2.0",
82
+ "cheerio": "1.0.0-rc.12",
83
+ "tiktoken": "1.0.15",
84
+ "commander": "12.1.0",
85
+ "ora": "5.4.1",
86
+ "chalk": "4.1.2",
87
+ "p-queue": "6.6.2",
88
+ "robots-parser": "3.0.1",
89
+ "lru-cache": "10.2.2",
90
+ "zod": "3.23.8",
91
+ "express": "4.19.2",
92
+ "cors": "2.8.5"
93
+ },
94
+ "optionalDependencies": {
95
+ "playwright": "1.44.0",
96
+ "pdf-parse": "1.1.1"
97
+ },
98
+ "devDependencies": {
99
+ "typescript": "5.4.5",
100
+ "@types/node": "20.12.12",
101
+ "@types/turndown": "5.0.4",
102
+ "@types/express": "4.17.21",
103
+ "@types/cors": "2.8.17",
104
+ "jest": "29.7.0",
105
+ "ts-jest": "29.1.4",
106
+ "ts-node": "10.9.2",
107
+ "eslint": "9.3.0"
108
+ }
109
+ }