webcontext-ai 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +583 -0
  3. package/dist/browser/manager.d.ts +47 -0
  4. package/dist/browser/manager.d.ts.map +1 -0
  5. package/dist/browser/manager.js +215 -0
  6. package/dist/browser/manager.js.map +1 -0
  7. package/dist/cache/cache.d.ts +22 -0
  8. package/dist/cache/cache.d.ts.map +1 -0
  9. package/dist/cache/cache.js +150 -0
  10. package/dist/cache/cache.js.map +1 -0
  11. package/dist/chunking/chunker.d.ts +26 -0
  12. package/dist/chunking/chunker.d.ts.map +1 -0
  13. package/dist/chunking/chunker.js +208 -0
  14. package/dist/chunking/chunker.js.map +1 -0
  15. package/dist/cli/index.d.ts +3 -0
  16. package/dist/cli/index.d.ts.map +1 -0
  17. package/dist/cli/index.js +406 -0
  18. package/dist/cli/index.js.map +1 -0
  19. package/dist/core/pipeline.d.ts +35 -0
  20. package/dist/core/pipeline.d.ts.map +1 -0
  21. package/dist/core/pipeline.js +476 -0
  22. package/dist/core/pipeline.js.map +1 -0
  23. package/dist/core/stream.d.ts +48 -0
  24. package/dist/core/stream.d.ts.map +1 -0
  25. package/dist/core/stream.js +72 -0
  26. package/dist/core/stream.js.map +1 -0
  27. package/dist/core/types.d.ts +259 -0
  28. package/dist/core/types.d.ts.map +1 -0
  29. package/dist/core/types.js +4 -0
  30. package/dist/core/types.js.map +1 -0
  31. package/dist/export/index.d.ts +3 -0
  32. package/dist/export/index.d.ts.map +1 -0
  33. package/dist/export/index.js +8 -0
  34. package/dist/export/index.js.map +1 -0
  35. package/dist/export/templates.d.ts +25 -0
  36. package/dist/export/templates.d.ts.map +1 -0
  37. package/dist/export/templates.js +76 -0
  38. package/dist/export/templates.js.map +1 -0
  39. package/dist/export/vectordb.d.ts +21 -0
  40. package/dist/export/vectordb.d.ts.map +1 -0
  41. package/dist/export/vectordb.js +101 -0
  42. package/dist/export/vectordb.js.map +1 -0
  43. package/dist/extractors/content.d.ts +23 -0
  44. package/dist/extractors/content.d.ts.map +1 -0
  45. package/dist/extractors/content.js +328 -0
  46. package/dist/extractors/content.js.map +1 -0
  47. package/dist/extractors/github.d.ts +19 -0
  48. package/dist/extractors/github.d.ts.map +1 -0
  49. package/dist/extractors/github.js +150 -0
  50. package/dist/extractors/github.js.map +1 -0
  51. package/dist/extractors/images.d.ts +20 -0
  52. package/dist/extractors/images.d.ts.map +1 -0
  53. package/dist/extractors/images.js +73 -0
  54. package/dist/extractors/images.js.map +1 -0
  55. package/dist/extractors/pdf.d.ts +11 -0
  56. package/dist/extractors/pdf.d.ts.map +1 -0
  57. package/dist/extractors/pdf.js +107 -0
  58. package/dist/extractors/pdf.js.map +1 -0
  59. package/dist/extractors/screenshot.d.ts +21 -0
  60. package/dist/extractors/screenshot.d.ts.map +1 -0
  61. package/dist/extractors/screenshot.js +85 -0
  62. package/dist/extractors/screenshot.js.map +1 -0
  63. package/dist/index.d.ts +70 -0
  64. package/dist/index.d.ts.map +1 -0
  65. package/dist/index.js +206 -0
  66. package/dist/index.js.map +1 -0
  67. package/dist/mcp-server.d.ts +3 -0
  68. package/dist/mcp-server.d.ts.map +1 -0
  69. package/dist/mcp-server.js +108 -0
  70. package/dist/mcp-server.js.map +1 -0
  71. package/dist/sdk/client.d.ts +48 -0
  72. package/dist/sdk/client.d.ts.map +1 -0
  73. package/dist/sdk/client.js +120 -0
  74. package/dist/sdk/client.js.map +1 -0
  75. package/dist/sdk/mcp.d.ts +12 -0
  76. package/dist/sdk/mcp.d.ts.map +1 -0
  77. package/dist/sdk/mcp.js +146 -0
  78. package/dist/sdk/mcp.js.map +1 -0
  79. package/dist/sdk/server.d.ts +5 -0
  80. package/dist/sdk/server.d.ts.map +1 -0
  81. package/dist/sdk/server.js +158 -0
  82. package/dist/sdk/server.js.map +1 -0
  83. package/dist/search/vector.d.ts +26 -0
  84. package/dist/search/vector.d.ts.map +1 -0
  85. package/dist/search/vector.js +142 -0
  86. package/dist/search/vector.js.map +1 -0
  87. package/dist/transformers/markdown.d.ts +21 -0
  88. package/dist/transformers/markdown.d.ts.map +1 -0
  89. package/dist/transformers/markdown.js +242 -0
  90. package/dist/transformers/markdown.js.map +1 -0
  91. package/dist/utils/dedup.d.ts +20 -0
  92. package/dist/utils/dedup.d.ts.map +1 -0
  93. package/dist/utils/dedup.js +61 -0
  94. package/dist/utils/dedup.js.map +1 -0
  95. package/dist/utils/index.d.ts +6 -0
  96. package/dist/utils/index.d.ts.map +1 -0
  97. package/dist/utils/index.js +15 -0
  98. package/dist/utils/index.js.map +1 -0
  99. package/dist/utils/metrics.d.ts +16 -0
  100. package/dist/utils/metrics.d.ts.map +1 -0
  101. package/dist/utils/metrics.js +28 -0
  102. package/dist/utils/metrics.js.map +1 -0
  103. package/dist/utils/scheduler.d.ts +19 -0
  104. package/dist/utils/scheduler.d.ts.map +1 -0
  105. package/dist/utils/scheduler.js +63 -0
  106. package/dist/utils/scheduler.js.map +1 -0
  107. package/dist/utils/sitemap.d.ts +17 -0
  108. package/dist/utils/sitemap.d.ts.map +1 -0
  109. package/dist/utils/sitemap.js +118 -0
  110. package/dist/utils/sitemap.js.map +1 -0
  111. package/dist/utils/validation.d.ts +142 -0
  112. package/dist/utils/validation.d.ts.map +1 -0
  113. package/dist/utils/validation.js +35 -0
  114. package/dist/utils/validation.js.map +1 -0
  115. package/dist/utils/webhook.d.ts +21 -0
  116. package/dist/utils/webhook.d.ts.map +1 -0
  117. package/dist/utils/webhook.js +108 -0
  118. package/dist/utils/webhook.js.map +1 -0
  119. package/package.json +109 -0
@@ -0,0 +1,259 @@
1
+ export interface SitemapEntry {
2
+ url: string;
3
+ lastmod?: string;
4
+ changefreq?: string;
5
+ priority?: number;
6
+ }
7
+ export interface RetryConfig {
8
+ maxRetries: number;
9
+ backoffMs: number;
10
+ backoffMultiplier: number;
11
+ retryOn: number[];
12
+ }
13
+ export interface RateLimitConfig {
14
+ requestsPerSecond: number;
15
+ burstSize: number;
16
+ }
17
+ export interface CrawlProgress {
18
+ pagesProcessed: number;
19
+ totalDiscovered: number;
20
+ currentUrl: string;
21
+ status: 'crawling' | 'paused' | 'complete' | 'error';
22
+ }
23
+ export interface PluginHook {
24
+ name: string;
25
+ phase: 'pre-fetch' | 'post-fetch' | 'pre-extract' | 'post-extract' | 'pre-transform' | 'post-transform' | 'pre-chunk' | 'post-chunk';
26
+ }
27
+ export interface WebContextPlugin {
28
+ name: string;
29
+ hooks: Record<string, (ctx: any) => Promise<any>>;
30
+ }
31
+ export interface CrawlOptions {
32
+ url: string;
33
+ depth?: number;
34
+ maxPages?: number;
35
+ includePatterns?: string[];
36
+ excludePatterns?: string[];
37
+ waitForSelector?: string;
38
+ timeout?: number;
39
+ headers?: Record<string, string>;
40
+ cookies?: Cookie[];
41
+ auth?: AuthConfig;
42
+ respectRobotsTxt?: boolean;
43
+ delay?: number;
44
+ javascript?: boolean;
45
+ focusMode?: FocusMode;
46
+ cache?: boolean;
47
+ cacheTTL?: number;
48
+ retry?: RetryConfig;
49
+ rateLimit?: RateLimitConfig;
50
+ sitemapUrl?: string;
51
+ checkpoint?: boolean;
52
+ checkpointDir?: string;
53
+ plugins?: WebContextPlugin[];
54
+ onProgress?: (progress: CrawlProgress) => void;
55
+ }
56
+ export interface Cookie {
57
+ name: string;
58
+ value: string;
59
+ domain: string;
60
+ path?: string;
61
+ }
62
+ export interface AuthConfig {
63
+ type: 'basic' | 'bearer' | 'cookie' | 'custom';
64
+ credentials: Record<string, string>;
65
+ }
66
+ export type FocusMode = 'full' | 'article' | 'code' | 'api' | 'readme' | 'section';
67
+ export interface ExtractedContent {
68
+ url: string;
69
+ title: string;
70
+ description?: string;
71
+ markdown: string;
72
+ html?: string;
73
+ text: string;
74
+ codeBlocks: CodeBlock[];
75
+ headings: Heading[];
76
+ links: LinkInfo[];
77
+ metadata: PageMetadata;
78
+ timestamp: string;
79
+ }
80
+ export interface CodeBlock {
81
+ language: string;
82
+ code: string;
83
+ context?: string;
84
+ lineNumbers?: boolean;
85
+ }
86
+ export interface Heading {
87
+ level: number;
88
+ text: string;
89
+ id?: string;
90
+ }
91
+ export interface LinkInfo {
92
+ href: string;
93
+ text: string;
94
+ isInternal: boolean;
95
+ }
96
+ export interface PageMetadata {
97
+ author?: string;
98
+ publishedDate?: string;
99
+ modifiedDate?: string;
100
+ language?: string;
101
+ framework?: string;
102
+ library?: string;
103
+ tags?: string[];
104
+ ogImage?: string;
105
+ canonical?: string;
106
+ siteName?: string;
107
+ type?: ContentType;
108
+ version?: string;
109
+ }
110
+ export type ContentType = 'documentation' | 'api-reference' | 'blog-post' | 'readme' | 'tutorial' | 'article' | 'changelog' | 'unknown';
111
+ export interface ChunkOptions {
112
+ maxTokens?: number;
113
+ overlap?: number;
114
+ strategy?: ChunkStrategy;
115
+ preserveCodeBlocks?: boolean;
116
+ preserveHeadings?: boolean;
117
+ }
118
+ export type ChunkStrategy = 'semantic' | 'fixed' | 'heading' | 'paragraph';
119
+ export interface ContentChunk {
120
+ id: string;
121
+ content: string;
122
+ tokens: number;
123
+ metadata: ChunkMetadata;
124
+ }
125
+ export interface ChunkMetadata {
126
+ sourceUrl: string;
127
+ title: string;
128
+ headingPath: string[];
129
+ chunkIndex: number;
130
+ totalChunks: number;
131
+ hasCode: boolean;
132
+ language?: string;
133
+ }
134
+ export interface CrawlCheckpoint {
135
+ visitedUrls: string[];
136
+ pendingUrls: string[];
137
+ pages: ExtractedContent[];
138
+ errors: CrawlError[];
139
+ timestamp: string;
140
+ }
141
+ export interface EmbeddingResult {
142
+ id: string;
143
+ vector: number[];
144
+ content: string;
145
+ metadata: ChunkMetadata;
146
+ }
147
+ export interface SearchResult {
148
+ chunk: ContentChunk;
149
+ score: number;
150
+ }
151
+ export interface ContentDiff {
152
+ url: string;
153
+ previousHash: string;
154
+ currentHash: string;
155
+ changed: boolean;
156
+ addedSections: string[];
157
+ removedSections: string[];
158
+ }
159
+ export interface ScheduleConfig {
160
+ cron: string;
161
+ urls: string[];
162
+ options: Partial<CrawlOptions>;
163
+ onComplete?: (result: CrawlResult) => void;
164
+ }
165
+ export interface MetricsData {
166
+ crawlsTotal: number;
167
+ pagesTotal: number;
168
+ tokensTotal: number;
169
+ cacheHits: number;
170
+ cacheMisses: number;
171
+ avgDuration: number;
172
+ errors: number;
173
+ }
174
+ export interface ContextPacket {
175
+ id: string;
176
+ source: string;
177
+ chunks: ContentChunk[];
178
+ summary?: string;
179
+ totalTokens: number;
180
+ metadata: PacketMetadata;
181
+ format: OutputFormat;
182
+ }
183
+ export interface PacketMetadata {
184
+ crawledAt: string;
185
+ pageCount: number;
186
+ contentType: ContentType;
187
+ framework?: string;
188
+ version?: string;
189
+ relationships: PageRelationship[];
190
+ }
191
+ export interface PageRelationship {
192
+ from: string;
193
+ to: string;
194
+ type: 'links-to' | 'parent-of' | 'related-to' | 'next' | 'previous';
195
+ }
196
+ export type OutputFormat = 'markdown' | 'json' | 'chunks' | 'context-packet';
197
+ export interface WebContextConfig {
198
+ browser?: BrowserConfig;
199
+ extraction?: ExtractionConfig;
200
+ chunking?: ChunkOptions;
201
+ output?: OutputConfig;
202
+ cache?: CacheConfig;
203
+ concurrency?: number;
204
+ retry?: RetryConfig;
205
+ rateLimit?: RateLimitConfig;
206
+ plugins?: WebContextPlugin[];
207
+ metrics?: boolean;
208
+ }
209
+ export interface BrowserConfig {
210
+ headless?: boolean;
211
+ proxy?: string;
212
+ userAgent?: string;
213
+ viewport?: {
214
+ width: number;
215
+ height: number;
216
+ };
217
+ }
218
+ export interface ExtractionConfig {
219
+ removeSelectors?: string[];
220
+ contentSelectors?: string[];
221
+ preserveImages?: boolean;
222
+ preserveTables?: boolean;
223
+ maxContentLength?: number;
224
+ }
225
+ export interface OutputConfig {
226
+ format: OutputFormat;
227
+ includeMetadata?: boolean;
228
+ includeSourceLinks?: boolean;
229
+ compressWhitespace?: boolean;
230
+ }
231
+ export interface CacheConfig {
232
+ enabled: boolean;
233
+ ttl: number;
234
+ maxSize: number;
235
+ directory?: string;
236
+ contentHashing?: boolean;
237
+ }
238
+ export interface CrawlResult {
239
+ pages: ExtractedContent[];
240
+ context: ContextPacket;
241
+ stats: CrawlStats;
242
+ diffs?: ContentDiff[];
243
+ }
244
+ export interface CrawlStats {
245
+ pagesProcessed: number;
246
+ totalTokens: number;
247
+ duration: number;
248
+ errors: CrawlError[];
249
+ cached: number;
250
+ cacheHits: number;
251
+ cacheMisses: number;
252
+ retries: number;
253
+ }
254
+ export interface CrawlError {
255
+ url: string;
256
+ error: string;
257
+ statusCode?: number;
258
+ }
259
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/core/types.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,WAAW;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,aAAa;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,UAAU,GAAG,QAAQ,GAAG,UAAU,GAAG,OAAO,CAAC;CACtD;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,WAAW,GAAG,YAAY,GAAG,aAAa,GAAG,cAAc,GAAG,eAAe,GAAG,gBAAgB,GAAG,WAAW,GAAG,YAAY,CAAC;CACtI;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,GAAG,KAAK,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;CACnD;AAED,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,IAAI,CAAC,EAAE,UAAU,CAAC;IAClB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,eAAe,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,OAAO,CAAC,EAAE,gBAAgB,EAAE,CAAC;IAC7B,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,aAAa,KAAK,IAAI,CAAC;CAChD;AAED,MAAM,WAAW,MAAM;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAC/C,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACrC;AAED,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,SAAS,GAAG,MAAM,GAAG,KAAK,GAAG,QAAQ,GAAG,SAAS,CAAC;AAEnF,MAAM,WAAW,gBAAgB;IAC/B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,SAAS,EAAE,CAAC;IACxB,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,QAAQ,EAAE,YAAY,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,SAAS;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,MAAM,WAAW,OAAO;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,CAAC,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,OAAO,CAAC;CACrB;AAED,MAAM,WAAW,YAAY;IAC3B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,IAAI,CAAC,EAAE,WAAW,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,MAAM,WAAW,GACnB,eAAe,GACf,eAAe,GACf,WAAW,GACX,QAAQ,GACR,UAAU,GACV,SAAS,GACT,WAAW,GACX,SAAS,CAAC;AAEd,MAAM,WAAW,YAAY;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED,MAAM,MAAM,aAAa,GAAG,UAAU,GAAG,OAAO,GAAG,SAAS,GAAG,WAAW,CAAC;AAE3E,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,aAAa,CAAC;CACzB;AAED,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,KAAK,EAAE,gBAAgB,EAAE,CAAC;IAC1B,MAAM,EAAE,UAAU,EAAE,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,aAAa,CAAC;CACzB;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,YAAY,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,OAAO,CAAC;IACjB,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,eAAe,EAAE,MAAM,EAAE,CAAC;CAC3B;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,OAAO,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;IAC/B,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,WAAW,KAAK,IAAI,CAAC;CAC5C;AAED,MAAM,WAAW,WAAW;IAC1B,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,YAAY,EAAE,CAAC;IACvB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,cAAc,CAAC;IACzB,MAAM,EAAE,YAAY,CAAC;CACtB;AAED,MAAM,WAAW,cAAc;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,WAAW,CAAC;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,gBAAgB,EAAE,CAAC;CACnC;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,UAAU,GAAG,WAAW,GAAG,YAAY,GAAG,MAAM,GAAG,UAAU,CAAC;CACrE;AAED,MAAM,MAAM,YAAY,GAAG,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,gBAAgB,CAAC;AAE7E,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,EAAE,aAAa,CAAC;IACxB,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B,QAAQ,CAAC,EAAE,YAAY,CAAC;IACxB,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,SAAS,CAAC,EAAE,eAAe,CAAC;IAC5B,OAAO,CAAC,EAAE,gBAAgB,EAAE,CAAC;IAC7B,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;CAC9C;AAED,MAAM,WAAW,gBAAgB;IAC/B,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,YAAY,CAAC;IACrB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,kBAAkB,CAAC,EAAE,OAAO,CAAC;CAC9B;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,OAAO,CAAC;IACjB,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,gBAAgB,EAAE,CAAC;IAC1B,OAAO,EAAE,aAAa,CAAC;IACvB,KAAK,EAAE,UAAU,CAAC;IAClB,KAAK,CAAC,EAAE,WAAW,EAAE,CAAC;CACvB;AAED,MAAM,WAAW,UAAU;IACzB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,UAAU,EAAE,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB"}
@@ -0,0 +1,4 @@
1
+ "use strict";
2
+ // Core types for the webcontext package
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/core/types.ts"],"names":[],"mappings":";AAAA,wCAAwC"}
@@ -0,0 +1,3 @@
1
+ export { VectorDBExporter, VectorDBExportOptions } from './vectordb';
2
+ export { OutputFormatter, OutputTemplate } from './templates';
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/export/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,MAAM,YAAY,CAAC;AACrE,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC"}
@@ -0,0 +1,8 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.OutputFormatter = exports.VectorDBExporter = void 0;
4
+ var vectordb_1 = require("./vectordb");
5
+ Object.defineProperty(exports, "VectorDBExporter", { enumerable: true, get: function () { return vectordb_1.VectorDBExporter; } });
6
+ var templates_1 = require("./templates");
7
+ Object.defineProperty(exports, "OutputFormatter", { enumerable: true, get: function () { return templates_1.OutputFormatter; } });
8
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/export/index.ts"],"names":[],"mappings":";;;AAAA,uCAAqE;AAA5D,4GAAA,gBAAgB,OAAA;AACzB,yCAA8D;AAArD,4GAAA,eAAe,OAAA"}
@@ -0,0 +1,25 @@
1
+ import { ExtractedContent, ContextPacket } from '../core/types';
2
+ export interface OutputTemplate {
3
+ name: string;
4
+ /** Template string with placeholders: {{title}}, {{url}}, {{markdown}}, {{chunks}}, {{summary}}, {{tokens}} */
5
+ template: string;
6
+ }
7
+ /**
8
+ * Custom output template engine.
9
+ * Formats extracted content using configurable templates.
10
+ */
11
+ export declare class OutputFormatter {
12
+ private templates;
13
+ constructor();
14
+ /** Register a custom template */
15
+ register(template: OutputTemplate): void;
16
+ /** List available template names */
17
+ list(): string[];
18
+ /** Format a single page using a template */
19
+ formatPage(page: ExtractedContent, templateName?: string): string;
20
+ /** Format a full context packet */
21
+ formatContext(context: ContextPacket, templateName?: string): string;
22
+ /** Format multiple pages */
23
+ formatPages(pages: ExtractedContent[], templateName?: string, separator?: string): string;
24
+ }
25
+ //# sourceMappingURL=templates.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"templates.d.ts","sourceRoot":"","sources":["../../src/export/templates.ts"],"names":[],"mappings":"AAAA,OAAO,EAAgB,gBAAgB,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAE9E,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,+GAA+G;IAC/G,QAAQ,EAAE,MAAM,CAAC;CAClB;AAyBD;;;GAGG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,SAAS,CAA0C;;IAQ3D,iCAAiC;IACjC,QAAQ,CAAC,QAAQ,EAAE,cAAc,GAAG,IAAI;IAIxC,oCAAoC;IACpC,IAAI,IAAI,MAAM,EAAE;IAIhB,4CAA4C;IAC5C,UAAU,CAAC,IAAI,EAAE,gBAAgB,EAAE,YAAY,GAAE,MAAkB,GAAG,MAAM;IAY5E,mCAAmC;IACnC,aAAa,CAAC,OAAO,EAAE,aAAa,EAAE,YAAY,GAAE,MAAkB,GAAG,MAAM;IAa/E,4BAA4B;IAC5B,WAAW,CAAC,KAAK,EAAE,gBAAgB,EAAE,EAAE,YAAY,GAAE,MAAkB,EAAE,SAAS,GAAE,MAAsB,GAAG,MAAM;CAGpH"}
@@ -0,0 +1,76 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.OutputFormatter = void 0;
4
+ const BUILT_IN_TEMPLATES = {
5
+ default: {
6
+ name: 'default',
7
+ template: '# {{title}}\n\nSource: {{url}}\n\n{{markdown}}',
8
+ },
9
+ llm: {
10
+ name: 'llm',
11
+ template: '<context source="{{url}}" tokens="{{tokens}}">\n{{markdown}}\n</context>',
12
+ },
13
+ 'xml-tags': {
14
+ name: 'xml-tags',
15
+ template: '<document>\n<title>{{title}}</title>\n<source>{{url}}</source>\n<content>\n{{markdown}}\n</content>\n</document>',
16
+ },
17
+ summary: {
18
+ name: 'summary',
19
+ template: '## {{title}}\n\n> {{summary}}\n\n**Source:** {{url}} | **Tokens:** {{tokens}}\n\n---\n\n{{markdown}}',
20
+ },
21
+ minimal: {
22
+ name: 'minimal',
23
+ template: '{{markdown}}',
24
+ },
25
+ };
26
+ /**
27
+ * Custom output template engine.
28
+ * Formats extracted content using configurable templates.
29
+ */
30
+ class OutputFormatter {
31
+ templates = new Map();
32
+ constructor() {
33
+ for (const [key, tmpl] of Object.entries(BUILT_IN_TEMPLATES)) {
34
+ this.templates.set(key, tmpl);
35
+ }
36
+ }
37
+ /** Register a custom template */
38
+ register(template) {
39
+ this.templates.set(template.name, template);
40
+ }
41
+ /** List available template names */
42
+ list() {
43
+ return [...this.templates.keys()];
44
+ }
45
+ /** Format a single page using a template */
46
+ formatPage(page, templateName = 'default') {
47
+ const tmpl = this.templates.get(templateName);
48
+ if (!tmpl)
49
+ throw new Error(`Unknown template: ${templateName}. Available: ${this.list().join(', ')}`);
50
+ return tmpl.template
51
+ .replace(/\{\{title\}\}/g, page.title)
52
+ .replace(/\{\{url\}\}/g, page.url)
53
+ .replace(/\{\{markdown\}\}/g, page.markdown)
54
+ .replace(/\{\{summary\}\}/g, page.description || '')
55
+ .replace(/\{\{tokens\}\}/g, String(Math.ceil(page.markdown.length / 4)));
56
+ }
57
+ /** Format a full context packet */
58
+ formatContext(context, templateName = 'default') {
59
+ const tmpl = this.templates.get(templateName);
60
+ if (!tmpl)
61
+ throw new Error(`Unknown template: ${templateName}. Available: ${this.list().join(', ')}`);
62
+ return tmpl.template
63
+ .replace(/\{\{title\}\}/g, context.source)
64
+ .replace(/\{\{url\}\}/g, context.source)
65
+ .replace(/\{\{markdown\}\}/g, context.chunks.map(c => c.content).join('\n\n'))
66
+ .replace(/\{\{summary\}\}/g, context.summary || '')
67
+ .replace(/\{\{tokens\}\}/g, String(context.totalTokens))
68
+ .replace(/\{\{chunks\}\}/g, String(context.chunks.length));
69
+ }
70
+ /** Format multiple pages */
71
+ formatPages(pages, templateName = 'default', separator = '\n\n---\n\n') {
72
+ return pages.map(p => this.formatPage(p, templateName)).join(separator);
73
+ }
74
+ }
75
+ exports.OutputFormatter = OutputFormatter;
76
+ //# sourceMappingURL=templates.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"templates.js","sourceRoot":"","sources":["../../src/export/templates.ts"],"names":[],"mappings":";;;AAQA,MAAM,kBAAkB,GAAmC;IACzD,OAAO,EAAE;QACP,IAAI,EAAE,SAAS;QACf,QAAQ,EAAE,gDAAgD;KAC3D;IACD,GAAG,EAAE;QACH,IAAI,EAAE,KAAK;QACX,QAAQ,EAAE,0EAA0E;KACrF;IACD,UAAU,EAAE;QACV,IAAI,EAAE,UAAU;QAChB,QAAQ,EAAE,kHAAkH;KAC7H;IACD,OAAO,EAAE;QACP,IAAI,EAAE,SAAS;QACf,QAAQ,EAAE,sGAAsG;KACjH;IACD,OAAO,EAAE;QACP,IAAI,EAAE,SAAS;QACf,QAAQ,EAAE,cAAc;KACzB;CACF,CAAC;AAEF;;;GAGG;AACH,MAAa,eAAe;IAClB,SAAS,GAAgC,IAAI,GAAG,EAAE,CAAC;IAE3D;QACE,KAAK,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,EAAE,CAAC;YAC7D,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED,iCAAiC;IACjC,QAAQ,CAAC,QAAwB;QAC/B,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;IAC9C,CAAC;IAED,oCAAoC;IACpC,IAAI;QACF,OAAO,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IACpC,CAAC;IAED,4CAA4C;IAC5C,UAAU,CAAC,IAAsB,EAAE,eAAuB,SAAS;QACjE,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QAC9C,IAAI,CAAC,IAAI;YAAE,MAAM,IAAI,KAAK,CAAC,qBAAqB,YAAY,gBAAgB,IAAI,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEtG,OAAO,IAAI,CAAC,QAAQ;aACjB,OAAO,CAAC,gBAAgB,EAAE,IAAI,CAAC,KAAK,CAAC;aACrC,OAAO,CAAC,cAAc,EAAE,IAAI,CAAC,GAAG,CAAC;aACjC,OAAO,CAAC,mBAAmB,EAAE,IAAI,CAAC,QAAQ,CAAC;aAC3C,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;aACnD,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7E,CAAC;IAED,mCAAmC;IACnC,aAAa,CAAC,OAAsB,EAAE,eAAuB,SAAS;QACpE,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QAC9C,IAAI,CAAC,IAAI;YAAE,MAAM,IAAI,KAAK,CAAC,qBAAqB,YAAY,gBAAgB,IAAI,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEtG,OAAO,IAAI,CAAC,QAAQ;aACjB,OAAO,CAAC,gBAAgB,EAAE,OAAO,CAAC,MAAM,CAAC;aACzC,OAAO,CAAC,cAAc,EAAE,OAAO,CAAC,MAAM,CAAC;aACvC,OAAO,CAAC,mBAAmB,EAAE,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;aAC7E,OAAO,CAAC,kBAAkB,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;aAClD,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;aACvD,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;IAC/D,CAAC;IAED,4BAA4B;IAC5B,WAAW,CAAC,KAAyB,EAAE,eAAuB,SAAS,EAAE,YAAoB,aAAa;QACxG,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC1E,CAAC;CACF;AAlDD,0CAkDC"}
@@ -0,0 +1,21 @@
1
+ import { ContentChunk } from '../core/types';
2
+ export interface VectorDBExportOptions {
3
+ format: 'pinecone' | 'chroma' | 'weaviate' | 'qdrant' | 'json';
4
+ namespace?: string;
5
+ collection?: string;
6
+ includeMetadata?: boolean;
7
+ }
8
+ /**
9
+ * Export chunks in formats ready for vector database import.
10
+ * Generates JSON files compatible with each DB's bulk import API.
11
+ */
12
+ export declare class VectorDBExporter {
13
+ exportChunks(chunks: ContentChunk[], options: VectorDBExportOptions): string;
14
+ exportToFile(chunks: ContentChunk[], options: VectorDBExportOptions, outputPath: string): void;
15
+ private toPinecone;
16
+ private toChroma;
17
+ private toWeaviate;
18
+ private toQdrant;
19
+ private toJSON;
20
+ }
21
+ //# sourceMappingURL=vectordb.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vectordb.d.ts","sourceRoot":"","sources":["../../src/export/vectordb.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAG7C,MAAM,WAAW,qBAAqB;IACpC,MAAM,EAAE,UAAU,GAAG,QAAQ,GAAG,UAAU,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC/D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B;AAwBD;;;GAGG;AACH,qBAAa,gBAAgB;IAC3B,YAAY,CAAC,MAAM,EAAE,YAAY,EAAE,EAAE,OAAO,EAAE,qBAAqB,GAAG,MAAM;IAU5E,YAAY,CAAC,MAAM,EAAE,YAAY,EAAE,EAAE,OAAO,EAAE,qBAAqB,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI;IAK9F,OAAO,CAAC,UAAU;IAmBlB,OAAO,CAAC,QAAQ;IAoBhB,OAAO,CAAC,UAAU;IAmBlB,OAAO,CAAC,QAAQ;IAkBhB,OAAO,CAAC,MAAM;CAGf"}
@@ -0,0 +1,101 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.VectorDBExporter = void 0;
4
+ const fs_1 = require("fs");
5
+ /**
6
+ * Export chunks in formats ready for vector database import.
7
+ * Generates JSON files compatible with each DB's bulk import API.
8
+ */
9
+ class VectorDBExporter {
10
+ exportChunks(chunks, options) {
11
+ switch (options.format) {
12
+ case 'pinecone': return this.toPinecone(chunks, options);
13
+ case 'chroma': return this.toChroma(chunks, options);
14
+ case 'weaviate': return this.toWeaviate(chunks, options);
15
+ case 'qdrant': return this.toQdrant(chunks, options);
16
+ case 'json':
17
+ default: return this.toJSON(chunks);
18
+ }
19
+ }
20
+ exportToFile(chunks, options, outputPath) {
21
+ const data = this.exportChunks(chunks, options);
22
+ (0, fs_1.writeFileSync)(outputPath, data);
23
+ }
24
+ toPinecone(chunks, options) {
25
+ const records = chunks.map(chunk => ({
26
+ id: chunk.id,
27
+ metadata: {
28
+ content: chunk.content,
29
+ source: chunk.metadata.sourceUrl,
30
+ title: chunk.metadata.title,
31
+ headingPath: chunk.metadata.headingPath.join(' > '),
32
+ chunkIndex: chunk.metadata.chunkIndex,
33
+ totalChunks: chunk.metadata.totalChunks,
34
+ hasCode: chunk.metadata.hasCode,
35
+ language: chunk.metadata.language || '',
36
+ tokens: chunk.tokens,
37
+ ...(options.namespace ? { namespace: options.namespace } : {}),
38
+ },
39
+ }));
40
+ return JSON.stringify({ vectors: records, namespace: options.namespace || '' }, null, 2);
41
+ }
42
+ toChroma(chunks, options) {
43
+ const records = chunks.map(chunk => ({
44
+ id: chunk.id,
45
+ document: chunk.content,
46
+ metadata: {
47
+ source: chunk.metadata.sourceUrl,
48
+ title: chunk.metadata.title,
49
+ headingPath: chunk.metadata.headingPath.join(' > '),
50
+ chunkIndex: chunk.metadata.chunkIndex,
51
+ hasCode: chunk.metadata.hasCode,
52
+ language: chunk.metadata.language || '',
53
+ tokens: chunk.tokens,
54
+ },
55
+ }));
56
+ return JSON.stringify({
57
+ collection: options.collection || 'webcontext',
58
+ documents: records,
59
+ }, null, 2);
60
+ }
61
+ toWeaviate(chunks, options) {
62
+ const records = chunks.map(chunk => ({
63
+ class: options.collection || 'WebContent',
64
+ properties: {
65
+ content: chunk.content,
66
+ source: chunk.metadata.sourceUrl,
67
+ title: chunk.metadata.title,
68
+ headingPath: chunk.metadata.headingPath.join(' > '),
69
+ chunkIndex: chunk.metadata.chunkIndex,
70
+ totalChunks: chunk.metadata.totalChunks,
71
+ hasCode: chunk.metadata.hasCode,
72
+ language: chunk.metadata.language || '',
73
+ tokens: chunk.tokens,
74
+ chunkId: chunk.id,
75
+ },
76
+ }));
77
+ return JSON.stringify(records, null, 2);
78
+ }
79
+ toQdrant(chunks, options) {
80
+ const records = chunks.map(chunk => ({
81
+ id: chunk.id,
82
+ payload: {
83
+ content: chunk.content,
84
+ source: chunk.metadata.sourceUrl,
85
+ title: chunk.metadata.title,
86
+ headingPath: chunk.metadata.headingPath.join(' > '),
87
+ chunkIndex: chunk.metadata.chunkIndex,
88
+ totalChunks: chunk.metadata.totalChunks,
89
+ hasCode: chunk.metadata.hasCode,
90
+ language: chunk.metadata.language || '',
91
+ tokens: chunk.tokens,
92
+ },
93
+ }));
94
+ return JSON.stringify({ collection: options.collection || 'webcontext', points: records }, null, 2);
95
+ }
96
+ toJSON(chunks) {
97
+ return JSON.stringify(chunks, null, 2);
98
+ }
99
+ }
100
+ exports.VectorDBExporter = VectorDBExporter;
101
+ //# sourceMappingURL=vectordb.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vectordb.js","sourceRoot":"","sources":["../../src/export/vectordb.ts"],"names":[],"mappings":";;;AACA,2BAAmC;AA+BnC;;;GAGG;AACH,MAAa,gBAAgB;IAC3B,YAAY,CAAC,MAAsB,EAAE,OAA8B;QACjE,QAAQ,OAAO,CAAC,MAAM,EAAE,CAAC;YACvB,KAAK,UAAU,CAAC,CAAC,OAAO,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YACzD,KAAK,QAAQ,CAAC,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YACrD,KAAK,UAAU,CAAC,CAAC,OAAO,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YACzD,KAAK,QAAQ,CAAC,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YACrD,KAAK,MAAM,CAAC;YAAC,OAAO,CAAC,CAAC,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;IAED,YAAY,CAAC,MAAsB,EAAE,OAA8B,EAAE,UAAkB;QACrF,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAChD,IAAA,kBAAa,EAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IAClC,CAAC;IAEO,UAAU,CAAC,MAAsB,EAAE,OAA8B;QACvE,MAAM,OAAO,GAAqB,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACrD,EAAE,EAAE,KAAK,CAAC,EAAE;YACZ,QAAQ,EAAE;gBACR,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,MAAM,EAAE,KAAK,CAAC,QAAQ,CAAC,SAAS;gBAChC,KAAK,EAAE,KAAK,CAAC,QAAQ,CAAC,KAAK;gBAC3B,WAAW,EAAE,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;gBACnD,UAAU,EAAE,KAAK,CAAC,QAAQ,CAAC,UAAU;gBACrC,WAAW,EAAE,KAAK,CAAC,QAAQ,CAAC,WAAW;gBACvC,OAAO,EAAE,KAAK,CAAC,QAAQ,CAAC,OAAO;gBAC/B,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,IAAI,EAAE;gBACvC,MAAM,EAAE,KAAK,CAAC,MAAM;gBACpB,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aAC/D;SACF,CAAC,CAAC,CAAC;QACJ,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IAC3F,CAAC;IAEO,QAAQ,CAAC,MAAsB,EAAE,OAA8B;QACrE,MAAM,OAAO,GAAmB,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACnD,EAAE,EAAE,KAAK,CAAC,EAAE;YACZ,QAAQ,EAAE,KAAK,CAAC,OAAO;YACvB,QAAQ,EAAE;gBACR,MAAM,EAAE,KAAK,CAAC,QAAQ,CAAC,SAAS;gBAChC,KAAK,EAAE,KAAK,CAAC,QAAQ,CAAC,KAAK;gBAC3B,WAAW,EAAE,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;gBACnD,UAAU,EAAE,KAAK,CAAC,QAAQ,CAAC,UAAU;gBACrC,OAAO,EAAE,KAAK,CAAC,QAAQ,CAAC,OAAO;gBAC/B,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,IAAI,EAAE;gBACvC,MAAM,EAAE,KAAK,CAAC,MAAM;aACrB;SACF,CAAC,CAAC,CAAC;QACJ,OAAO,IAAI,CAAC,SAAS,CAAC;YACpB,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,YAAY;YAC9C,SAAS,EAAE,OAAO;SACnB,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IACd,CAAC;IAEO,UAAU,CAAC,MAAsB,EAAE,OAA8B;QACvE,MAAM,OAAO,GAAqB,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACrD,KAAK,EAAE,OAAO,CAAC,UAAU,IAAI,YAAY;YACzC,UAAU,EAAE;gBACV,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,MAAM,EAAE,KAAK,CAAC,QAAQ,CAAC,SAAS;gBAChC,KAAK,EAAE,KAAK,CAAC,QAAQ,CAAC,KAAK;gBAC3B,WAAW,EAAE,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;gBACnD,UAAU,EAAE,KAAK,CAAC,QAAQ,CAAC,UAAU;gBACrC,WAAW,EAAE,KAAK,CAAC,QAAQ,CAAC,WAAW;gBACvC,OAAO,EAAE,KAAK,CAAC,QAAQ,CAAC,OAAO;gBAC/B,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,IAAI,EAAE;gBACvC,MAAM,EAAE,KAAK,CAAC,MAAM;gBACpB,OAAO,EAAE,KAAK,CAAC,EAAE;aAClB;SACF,CAAC,CAAC,CAAC;QACJ,OAAO,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IAC1C,CAAC;IAEO,QAAQ,CAAC,MAAsB,EAAE,OAA8B;QACrE,MAAM,OAAO,GAAmB,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACnD,EAAE,EAAE,KAAK,CAAC,EAAE;YACZ,OAAO,EAAE;gBACP,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,MAAM,EAAE,KAAK,CAAC,QAAQ,CAAC,SAAS;gBAChC,KAAK,EAAE,KAAK,CAAC,QAAQ,CAAC,KAAK;gBAC3B,WAAW,EAAE,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;gBACnD,UAAU,EAAE,KAAK,CAAC,QAAQ,CAAC,UAAU;gBACrC,WAAW,EAAE,KAAK,CAAC,QAAQ,CAAC,WAAW;gBACvC,OAAO,EAAE,KAAK,CAAC,QAAQ,CAAC,OAAO;gBAC/B,QAAQ,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,IAAI,EAAE;gBACvC,MAAM,EAAE,KAAK,CAAC,MAAM;aACrB;SACF,CAAC,CAAC,CAAC;QACJ,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,YAAY,EAAE,MAAM,EAAE,OAAO,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IACtG,CAAC;IAEO,MAAM,CAAC,MAAsB;QACnC,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IACzC,CAAC;CACF;AA/FD,4CA+FC"}
@@ -0,0 +1,23 @@
1
+ import * as cheerio from 'cheerio';
2
+ import { ExtractedContent, CodeBlock, FocusMode } from '../core/types';
3
+ /**
4
+ * Content extractor that cleans HTML and extracts structured content.
5
+ * Uses Readability algorithm + custom heuristics for developer content.
6
+ */
7
+ export declare class ContentExtractor {
8
+ private static NOISE_SELECTORS;
9
+ private static CONTENT_SELECTORS;
10
+ extract(html: string, url: string, focusMode?: FocusMode): ExtractedContent;
11
+ private findContentElement;
12
+ private extractTitle;
13
+ private extractCodeBlocks;
14
+ private extractHeadings;
15
+ private extractLinks;
16
+ private extractMetadata;
17
+ private detectContentType;
18
+ private detectFramework;
19
+ private detectVersion;
20
+ extractOpenAPIEndpoints($: cheerio.CheerioAPI, container: cheerio.Cheerio<any>): CodeBlock[];
21
+ private detectLanguage;
22
+ }
23
+ //# sourceMappingURL=content.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"content.d.ts","sourceRoot":"","sources":["../../src/extractors/content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,gBAAgB,EAAE,SAAS,EAAgD,SAAS,EAAE,MAAM,eAAe,CAAC;AAErH;;;GAGG;AACH,qBAAa,gBAAgB;IAE3B,OAAO,CAAC,MAAM,CAAC,eAAe,CAY5B;IAGF,OAAO,CAAC,MAAM,CAAC,iBAAiB,CAM9B;IAEF,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,GAAE,SAAkB,GAAG,gBAAgB;IA6CnF,OAAO,CAAC,kBAAkB;IA8C1B,OAAO,CAAC,YAAY;IAMpB,OAAO,CAAC,iBAAiB;IAoBzB,OAAO,CAAC,eAAe;IAavB,OAAO,CAAC,YAAY;IAsBpB,OAAO,CAAC,eAAe;IAkBvB,OAAO,CAAC,iBAAiB;IAYzB,OAAO,CAAC,eAAe;IAUvB,OAAO,CAAC,aAAa;IAYrB,uBAAuB,CAAC,CAAC,EAAE,OAAO,CAAC,UAAU,EAAE,SAAS,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,SAAS,EAAE;IAsC5F,OAAO,CAAC,cAAc;CAmCvB"}