@cosmocoder/mcp-web-docs 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +368 -0
  3. package/build/__mocks__/embeddings.d.ts +17 -0
  4. package/build/__mocks__/embeddings.js +66 -0
  5. package/build/__mocks__/embeddings.js.map +1 -0
  6. package/build/config.d.ts +44 -0
  7. package/build/config.js +158 -0
  8. package/build/config.js.map +1 -0
  9. package/build/config.test.d.ts +1 -0
  10. package/build/config.test.js +165 -0
  11. package/build/config.test.js.map +1 -0
  12. package/build/crawler/auth.d.ts +128 -0
  13. package/build/crawler/auth.js +546 -0
  14. package/build/crawler/auth.js.map +1 -0
  15. package/build/crawler/auth.test.d.ts +1 -0
  16. package/build/crawler/auth.test.js +174 -0
  17. package/build/crawler/auth.test.js.map +1 -0
  18. package/build/crawler/base.d.ts +24 -0
  19. package/build/crawler/base.js +149 -0
  20. package/build/crawler/base.js.map +1 -0
  21. package/build/crawler/base.test.d.ts +1 -0
  22. package/build/crawler/base.test.js +234 -0
  23. package/build/crawler/base.test.js.map +1 -0
  24. package/build/crawler/browser-config.d.ts +2 -0
  25. package/build/crawler/browser-config.js +29 -0
  26. package/build/crawler/browser-config.js.map +1 -0
  27. package/build/crawler/browser-config.test.d.ts +1 -0
  28. package/build/crawler/browser-config.test.js +56 -0
  29. package/build/crawler/browser-config.test.js.map +1 -0
  30. package/build/crawler/cheerio.d.ts +11 -0
  31. package/build/crawler/cheerio.js +134 -0
  32. package/build/crawler/cheerio.js.map +1 -0
  33. package/build/crawler/chromium.d.ts +21 -0
  34. package/build/crawler/chromium.js +596 -0
  35. package/build/crawler/chromium.js.map +1 -0
  36. package/build/crawler/content-extractor-types.d.ts +25 -0
  37. package/build/crawler/content-extractor-types.js +2 -0
  38. package/build/crawler/content-extractor-types.js.map +1 -0
  39. package/build/crawler/content-extractors.d.ts +9 -0
  40. package/build/crawler/content-extractors.js +9 -0
  41. package/build/crawler/content-extractors.js.map +1 -0
  42. package/build/crawler/content-utils.d.ts +2 -0
  43. package/build/crawler/content-utils.js +22 -0
  44. package/build/crawler/content-utils.js.map +1 -0
  45. package/build/crawler/content-utils.test.d.ts +1 -0
  46. package/build/crawler/content-utils.test.js +99 -0
  47. package/build/crawler/content-utils.test.js.map +1 -0
  48. package/build/crawler/crawlee-crawler.d.ts +63 -0
  49. package/build/crawler/crawlee-crawler.js +342 -0
  50. package/build/crawler/crawlee-crawler.js.map +1 -0
  51. package/build/crawler/crawlee-crawler.test.d.ts +1 -0
  52. package/build/crawler/crawlee-crawler.test.js +280 -0
  53. package/build/crawler/crawlee-crawler.test.js.map +1 -0
  54. package/build/crawler/default-extractor.d.ts +4 -0
  55. package/build/crawler/default-extractor.js +26 -0
  56. package/build/crawler/default-extractor.js.map +1 -0
  57. package/build/crawler/default-extractor.test.d.ts +1 -0
  58. package/build/crawler/default-extractor.test.js +200 -0
  59. package/build/crawler/default-extractor.test.js.map +1 -0
  60. package/build/crawler/default.d.ts +11 -0
  61. package/build/crawler/default.js +138 -0
  62. package/build/crawler/default.js.map +1 -0
  63. package/build/crawler/docs-crawler.d.ts +26 -0
  64. package/build/crawler/docs-crawler.js +97 -0
  65. package/build/crawler/docs-crawler.js.map +1 -0
  66. package/build/crawler/docs-crawler.test.d.ts +1 -0
  67. package/build/crawler/docs-crawler.test.js +185 -0
  68. package/build/crawler/docs-crawler.test.js.map +1 -0
  69. package/build/crawler/factory.d.ts +6 -0
  70. package/build/crawler/factory.js +83 -0
  71. package/build/crawler/factory.js.map +1 -0
  72. package/build/crawler/github-pages-extractor.d.ts +4 -0
  73. package/build/crawler/github-pages-extractor.js +33 -0
  74. package/build/crawler/github-pages-extractor.js.map +1 -0
  75. package/build/crawler/github-pages-extractor.test.d.ts +1 -0
  76. package/build/crawler/github-pages-extractor.test.js +184 -0
  77. package/build/crawler/github-pages-extractor.test.js.map +1 -0
  78. package/build/crawler/github.d.ts +20 -0
  79. package/build/crawler/github.js +181 -0
  80. package/build/crawler/github.js.map +1 -0
  81. package/build/crawler/github.test.d.ts +1 -0
  82. package/build/crawler/github.test.js +326 -0
  83. package/build/crawler/github.test.js.map +1 -0
  84. package/build/crawler/puppeteer.d.ts +16 -0
  85. package/build/crawler/puppeteer.js +191 -0
  86. package/build/crawler/puppeteer.js.map +1 -0
  87. package/build/crawler/queue-manager.d.ts +43 -0
  88. package/build/crawler/queue-manager.js +169 -0
  89. package/build/crawler/queue-manager.js.map +1 -0
  90. package/build/crawler/queue-manager.test.d.ts +1 -0
  91. package/build/crawler/queue-manager.test.js +509 -0
  92. package/build/crawler/queue-manager.test.js.map +1 -0
  93. package/build/crawler/site-rules.d.ts +11 -0
  94. package/build/crawler/site-rules.js +104 -0
  95. package/build/crawler/site-rules.js.map +1 -0
  96. package/build/crawler/site-rules.test.d.ts +1 -0
  97. package/build/crawler/site-rules.test.js +139 -0
  98. package/build/crawler/site-rules.test.js.map +1 -0
  99. package/build/crawler/storybook-extractor.d.ts +34 -0
  100. package/build/crawler/storybook-extractor.js +767 -0
  101. package/build/crawler/storybook-extractor.js.map +1 -0
  102. package/build/crawler/storybook-extractor.test.d.ts +1 -0
  103. package/build/crawler/storybook-extractor.test.js +491 -0
  104. package/build/crawler/storybook-extractor.test.js.map +1 -0
  105. package/build/embeddings/fastembed.d.ts +25 -0
  106. package/build/embeddings/fastembed.js +188 -0
  107. package/build/embeddings/fastembed.js.map +1 -0
  108. package/build/embeddings/fastembed.test.d.ts +1 -0
  109. package/build/embeddings/fastembed.test.js +307 -0
  110. package/build/embeddings/fastembed.test.js.map +1 -0
  111. package/build/embeddings/openai.d.ts +8 -0
  112. package/build/embeddings/openai.js +56 -0
  113. package/build/embeddings/openai.js.map +1 -0
  114. package/build/embeddings/types.d.ts +4 -0
  115. package/build/embeddings/types.js +2 -0
  116. package/build/embeddings/types.js.map +1 -0
  117. package/build/index.d.ts +2 -0
  118. package/build/index.js +1007 -0
  119. package/build/index.js.map +1 -0
  120. package/build/index.test.d.ts +1 -0
  121. package/build/index.test.js +364 -0
  122. package/build/index.test.js.map +1 -0
  123. package/build/indexing/queue-manager.d.ts +36 -0
  124. package/build/indexing/queue-manager.js +86 -0
  125. package/build/indexing/queue-manager.js.map +1 -0
  126. package/build/indexing/queue-manager.test.d.ts +1 -0
  127. package/build/indexing/queue-manager.test.js +257 -0
  128. package/build/indexing/queue-manager.test.js.map +1 -0
  129. package/build/indexing/status.d.ts +39 -0
  130. package/build/indexing/status.js +207 -0
  131. package/build/indexing/status.js.map +1 -0
  132. package/build/indexing/status.test.d.ts +1 -0
  133. package/build/indexing/status.test.js +246 -0
  134. package/build/indexing/status.test.js.map +1 -0
  135. package/build/processor/content.d.ts +16 -0
  136. package/build/processor/content.js +286 -0
  137. package/build/processor/content.js.map +1 -0
  138. package/build/processor/content.test.d.ts +1 -0
  139. package/build/processor/content.test.js +369 -0
  140. package/build/processor/content.test.js.map +1 -0
  141. package/build/processor/markdown.d.ts +11 -0
  142. package/build/processor/markdown.js +256 -0
  143. package/build/processor/markdown.js.map +1 -0
  144. package/build/processor/markdown.test.d.ts +1 -0
  145. package/build/processor/markdown.test.js +312 -0
  146. package/build/processor/markdown.test.js.map +1 -0
  147. package/build/processor/metadata-parser.d.ts +37 -0
  148. package/build/processor/metadata-parser.js +245 -0
  149. package/build/processor/metadata-parser.js.map +1 -0
  150. package/build/processor/metadata-parser.test.d.ts +1 -0
  151. package/build/processor/metadata-parser.test.js +357 -0
  152. package/build/processor/metadata-parser.test.js.map +1 -0
  153. package/build/processor/processor.d.ts +8 -0
  154. package/build/processor/processor.js +190 -0
  155. package/build/processor/processor.js.map +1 -0
  156. package/build/processor/processor.test.d.ts +1 -0
  157. package/build/processor/processor.test.js +357 -0
  158. package/build/processor/processor.test.js.map +1 -0
  159. package/build/rag/cache.d.ts +10 -0
  160. package/build/rag/cache.js +10 -0
  161. package/build/rag/cache.js.map +1 -0
  162. package/build/rag/code-generator.d.ts +11 -0
  163. package/build/rag/code-generator.js +30 -0
  164. package/build/rag/code-generator.js.map +1 -0
  165. package/build/rag/context-assembler.d.ts +23 -0
  166. package/build/rag/context-assembler.js +113 -0
  167. package/build/rag/context-assembler.js.map +1 -0
  168. package/build/rag/docs-search.d.ts +55 -0
  169. package/build/rag/docs-search.js +380 -0
  170. package/build/rag/docs-search.js.map +1 -0
  171. package/build/rag/pipeline.d.ts +26 -0
  172. package/build/rag/pipeline.js +91 -0
  173. package/build/rag/pipeline.js.map +1 -0
  174. package/build/rag/query-processor.d.ts +14 -0
  175. package/build/rag/query-processor.js +57 -0
  176. package/build/rag/query-processor.js.map +1 -0
  177. package/build/rag/reranker.d.ts +55 -0
  178. package/build/rag/reranker.js +210 -0
  179. package/build/rag/reranker.js.map +1 -0
  180. package/build/rag/response-generator.d.ts +20 -0
  181. package/build/rag/response-generator.js +101 -0
  182. package/build/rag/response-generator.js.map +1 -0
  183. package/build/rag/retriever.d.ts +19 -0
  184. package/build/rag/retriever.js +111 -0
  185. package/build/rag/retriever.js.map +1 -0
  186. package/build/rag/validator.d.ts +22 -0
  187. package/build/rag/validator.js +128 -0
  188. package/build/rag/validator.js.map +1 -0
  189. package/build/rag/version-manager.d.ts +23 -0
  190. package/build/rag/version-manager.js +98 -0
  191. package/build/rag/version-manager.js.map +1 -0
  192. package/build/setupTests.d.ts +4 -0
  193. package/build/setupTests.js +50 -0
  194. package/build/setupTests.js.map +1 -0
  195. package/build/storage/storage.d.ts +38 -0
  196. package/build/storage/storage.js +700 -0
  197. package/build/storage/storage.js.map +1 -0
  198. package/build/storage/storage.test.d.ts +1 -0
  199. package/build/storage/storage.test.js +338 -0
  200. package/build/storage/storage.test.js.map +1 -0
  201. package/build/types/rag.d.ts +27 -0
  202. package/build/types/rag.js +2 -0
  203. package/build/types/rag.js.map +1 -0
  204. package/build/types.d.ts +120 -0
  205. package/build/types.js +2 -0
  206. package/build/types.js.map +1 -0
  207. package/build/util/content-utils.d.ts +31 -0
  208. package/build/util/content-utils.js +120 -0
  209. package/build/util/content-utils.js.map +1 -0
  210. package/build/util/content.d.ts +1 -0
  211. package/build/util/content.js +16 -0
  212. package/build/util/content.js.map +1 -0
  213. package/build/util/docs.d.ts +1 -0
  214. package/build/util/docs.js +26 -0
  215. package/build/util/docs.js.map +1 -0
  216. package/build/util/docs.test.d.ts +1 -0
  217. package/build/util/docs.test.js +49 -0
  218. package/build/util/docs.test.js.map +1 -0
  219. package/build/util/favicon.d.ts +6 -0
  220. package/build/util/favicon.js +88 -0
  221. package/build/util/favicon.js.map +1 -0
  222. package/build/util/favicon.test.d.ts +1 -0
  223. package/build/util/favicon.test.js +140 -0
  224. package/build/util/favicon.test.js.map +1 -0
  225. package/build/util/logger.d.ts +17 -0
  226. package/build/util/logger.js +72 -0
  227. package/build/util/logger.js.map +1 -0
  228. package/build/util/logger.test.d.ts +1 -0
  229. package/build/util/logger.test.js +46 -0
  230. package/build/util/logger.test.js.map +1 -0
  231. package/build/util/security.d.ts +312 -0
  232. package/build/util/security.js +719 -0
  233. package/build/util/security.js.map +1 -0
  234. package/build/util/security.test.d.ts +1 -0
  235. package/build/util/security.test.js +524 -0
  236. package/build/util/security.test.js.map +1 -0
  237. package/build/util/site-detector.d.ts +22 -0
  238. package/build/util/site-detector.js +42 -0
  239. package/build/util/site-detector.js.map +1 -0
  240. package/package.json +112 -0
@@ -0,0 +1,357 @@
1
+ import { WebDocumentProcessor } from './processor.js';
2
+ import { createMockEmbeddings, createFailingEmbeddings } from '../__mocks__/embeddings.js';
3
+ describe('WebDocumentProcessor', () => {
4
+ let processor;
5
+ let mockEmbeddings;
6
+ beforeEach(() => {
7
+ mockEmbeddings = createMockEmbeddings();
8
+ processor = new WebDocumentProcessor(mockEmbeddings, 500);
9
+ });
10
+ describe('process', () => {
11
+ it('should process HTML content', async () => {
12
+ const crawlResult = {
13
+ url: 'https://example.com/docs/page',
14
+ path: '/docs/page',
15
+ title: 'Test Page',
16
+ content: `
17
+ <html>
18
+ <body>
19
+ <main>
20
+ <h1>Test Documentation</h1>
21
+ <p>This is some test content for the documentation page.</p>
22
+ <h2>Features</h2>
23
+ <p>Here are some features of our product.</p>
24
+ </main>
25
+ </body>
26
+ </html>
27
+ `,
28
+ };
29
+ const result = await processor.process(crawlResult);
30
+ expect(result).toBeDefined();
31
+ expect(result.metadata.url).toBe(crawlResult.url);
32
+ // Title may come from crawl result or H1, depending on processor logic
33
+ expect(result.metadata.title).toBeTruthy();
34
+ expect(result.chunks.length).toBeGreaterThan(0);
35
+ expect(result.chunks[0].vector.length).toBe(mockEmbeddings.dimensions);
36
+ });
37
+ it('should process markdown content', async () => {
38
+ const crawlResult = {
39
+ url: 'https://example.com/docs/readme.md',
40
+ path: '/docs/readme.md',
41
+ title: 'README',
42
+ content: `# Project README
43
+
44
+ This is the README for our project.
45
+
46
+ ## Installation
47
+
48
+ Run the following command:
49
+
50
+ \`\`\`bash
51
+ npm install example-package
52
+ \`\`\`
53
+
54
+ ## Usage
55
+
56
+ Here's how to use the package.
57
+ `,
58
+ };
59
+ const result = await processor.process(crawlResult);
60
+ expect(result).toBeDefined();
61
+ // Title may come from crawl result or H1
62
+ expect(result.metadata.title).toBeTruthy();
63
+ expect(result.chunks.length).toBeGreaterThan(0);
64
+ });
65
+ it('should process pre-extracted content from Storybook', async () => {
66
+ const crawlResult = {
67
+ url: 'https://storybook.example.com/button',
68
+ path: '/button',
69
+ title: 'Button',
70
+ content: `# Button Component
71
+
72
+ A versatile button component.
73
+
74
+ ## Props
75
+
76
+ | Prop | Type |
77
+ |------|------|
78
+ | variant | string |
79
+
80
+ ## Example
81
+
82
+ \`\`\`jsx
83
+ <Button variant="primary">Click</Button>
84
+ \`\`\`
85
+ `,
86
+ extractorUsed: 'StorybookExtractor',
87
+ };
88
+ const result = await processor.process(crawlResult);
89
+ expect(result).toBeDefined();
90
+ expect(result.metadata.title).toBe('Button Component');
91
+ expect(result.chunks.length).toBeGreaterThan(0);
92
+ });
93
+ it('should process pre-extracted content from GitHub Pages', async () => {
94
+ const crawlResult = {
95
+ url: 'https://user.github.io/repo/',
96
+ path: '/',
97
+ title: 'GitHub Pages',
98
+ content: `# Welcome to GitHub Pages
99
+
100
+ This is documentation hosted on GitHub Pages.
101
+
102
+ ## Getting Started
103
+
104
+ Follow these steps to get started.
105
+ `,
106
+ extractorUsed: 'GithubPagesExtractor',
107
+ };
108
+ const result = await processor.process(crawlResult);
109
+ expect(result).toBeDefined();
110
+ expect(result.chunks.length).toBeGreaterThan(0);
111
+ });
112
+ it('should create chunks with proper metadata', async () => {
113
+ const crawlResult = {
114
+ url: 'https://example.com/api',
115
+ path: '/api',
116
+ title: 'API',
117
+ content: `
118
+ <html>
119
+ <body>
120
+ <main>
121
+ <h1>API Reference</h1>
122
+ <p>This document describes the API endpoints.</p>
123
+ <h2>GET /users</h2>
124
+ <p>Returns a list of users.</p>
125
+ <pre><code>
126
+ {
127
+ "users": [...]
128
+ }
129
+ </code></pre>
130
+ </main>
131
+ </body>
132
+ </html>
133
+ `,
134
+ };
135
+ const result = await processor.process(crawlResult);
136
+ expect(result).toBeDefined();
137
+ result.chunks.forEach((chunk) => {
138
+ expect(chunk.url).toBe(crawlResult.url);
139
+ expect(chunk.path).toBe(crawlResult.path);
140
+ expect(chunk.vector).toHaveLength(mockEmbeddings.dimensions);
141
+ expect(chunk.metadata).toBeDefined();
142
+ expect(['overview', 'api', 'example', 'usage']).toContain(chunk.metadata.type);
143
+ });
144
+ });
145
+ it('should handle large content by creating multiple chunks', async () => {
146
+ const longContent = Array(50)
147
+ .fill(null)
148
+ .map((_, i) => `
149
+ <h2>Section ${i + 1}</h2>
150
+ <p>This is the content for section ${i + 1}. It contains some text that will need to be chunked appropriately for the embedding model. Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>
151
+ `)
152
+ .join('\n');
153
+ const crawlResult = {
154
+ url: 'https://example.com/long',
155
+ path: '/long',
156
+ title: 'Long Document',
157
+ content: `
158
+ <html>
159
+ <body>
160
+ <main>
161
+ <h1>Long Document</h1>
162
+ ${longContent}
163
+ </main>
164
+ </body>
165
+ </html>
166
+ `,
167
+ };
168
+ const result = await processor.process(crawlResult);
169
+ expect(result).toBeDefined();
170
+ expect(result.chunks.length).toBeGreaterThan(1);
171
+ });
172
+ it('should throw error for content that cannot be parsed', async () => {
173
+ const crawlResult = {
174
+ url: 'https://example.com/empty',
175
+ path: '/empty',
176
+ title: 'Empty',
177
+ content: '', // Empty content
178
+ };
179
+ await expect(processor.process(crawlResult)).rejects.toThrow();
180
+ });
181
+ it('should throw error for whitespace-only content', async () => {
182
+ const crawlResult = {
183
+ url: 'https://example.com/whitespace',
184
+ path: '/whitespace',
185
+ title: 'Whitespace',
186
+ content: ' \n\n ',
187
+ };
188
+ await expect(processor.process(crawlResult)).rejects.toThrow();
189
+ });
190
+ it('should handle embedding failures gracefully', async () => {
191
+ const failingEmbeddings = createFailingEmbeddings();
192
+ const failingProcessor = new WebDocumentProcessor(failingEmbeddings, 500);
193
+ const crawlResult = {
194
+ url: 'https://example.com/test',
195
+ path: '/test',
196
+ title: 'Test',
197
+ content: `
198
+ <html>
199
+ <body>
200
+ <main>
201
+ <h1>Test</h1>
202
+ <p>Some content here.</p>
203
+ </main>
204
+ </body>
205
+ </html>
206
+ `,
207
+ };
208
+ await expect(failingProcessor.process(crawlResult)).rejects.toThrow('Embeddings service unavailable');
209
+ });
210
+ it('should set lastIndexed date', async () => {
211
+ const crawlResult = {
212
+ url: 'https://example.com/dated',
213
+ path: '/dated',
214
+ title: 'Dated',
215
+ content: `
216
+ <html>
217
+ <body>
218
+ <main>
219
+ <h1>Document</h1>
220
+ <p>Content with date.</p>
221
+ </main>
222
+ </body>
223
+ </html>
224
+ `,
225
+ };
226
+ const before = new Date();
227
+ const result = await processor.process(crawlResult);
228
+ const after = new Date();
229
+ expect(result.metadata.lastIndexed.getTime()).toBeGreaterThanOrEqual(before.getTime());
230
+ expect(result.metadata.lastIndexed.getTime()).toBeLessThanOrEqual(after.getTime());
231
+ });
232
+ it('should respect maxChunkSize parameter', async () => {
233
+ const smallChunkProcessor = new WebDocumentProcessor(mockEmbeddings, 100);
234
+ const crawlResult = {
235
+ url: 'https://example.com/chunks',
236
+ path: '/chunks',
237
+ title: 'Chunks',
238
+ content: `
239
+ <html>
240
+ <body>
241
+ <main>
242
+ <h1>Document</h1>
243
+ <p>This is a longer paragraph that should be split into multiple chunks when using a small chunk size. The semantic chunker should create appropriate boundaries.</p>
244
+ <p>Another paragraph with additional content that needs to be processed and chunked appropriately.</p>
245
+ </main>
246
+ </body>
247
+ </html>
248
+ `,
249
+ };
250
+ const result = await smallChunkProcessor.process(crawlResult);
251
+ // With smaller chunk size, should create more chunks
252
+ expect(result.chunks.length).toBeGreaterThanOrEqual(1);
253
+ });
254
+ it('should handle MDX files', async () => {
255
+ const crawlResult = {
256
+ url: 'https://example.com/docs/component.mdx',
257
+ path: '/docs/component.mdx',
258
+ title: 'MDX Component',
259
+ content: `# MDX Component
260
+
261
+ This is an MDX file with JSX.
262
+
263
+ <MyComponent prop="value">
264
+ Children content
265
+ </MyComponent>
266
+
267
+ ## Usage
268
+
269
+ \`\`\`jsx
270
+ import { MyComponent } from 'library';
271
+ \`\`\`
272
+ `,
273
+ };
274
+ const result = await processor.process(crawlResult);
275
+ expect(result).toBeDefined();
276
+ expect(result.metadata.title).toBe('MDX Component');
277
+ });
278
+ it('should process DefaultExtractor content', async () => {
279
+ const crawlResult = {
280
+ url: 'https://example.com/default',
281
+ path: '/default',
282
+ title: 'Default Extracted',
283
+ content: `Page Title
284
+
285
+ This is content extracted by the default extractor.
286
+ It's plain text without HTML markup.
287
+
288
+ Another section of content here.`,
289
+ extractorUsed: 'DefaultExtractor',
290
+ };
291
+ const result = await processor.process(crawlResult);
292
+ expect(result).toBeDefined();
293
+ expect(result.chunks.length).toBeGreaterThan(0);
294
+ });
295
+ });
296
+ describe('chunk metadata detection', () => {
297
+ it('should detect API content type', async () => {
298
+ const crawlResult = {
299
+ url: 'https://example.com/api-ref',
300
+ path: '/api-ref',
301
+ title: 'API Reference',
302
+ content: `
303
+ <html>
304
+ <body>
305
+ <main>
306
+ <h1>API Reference</h1>
307
+ <h2>GET /api/users</h2>
308
+ <p>Returns array of users</p>
309
+ <h3>Parameters</h3>
310
+ <p>limit: number - Maximum results</p>
311
+ <h3>Response</h3>
312
+ <pre><code>{"users": []}</code></pre>
313
+ </main>
314
+ </body>
315
+ </html>
316
+ `,
317
+ };
318
+ const result = await processor.process(crawlResult);
319
+ expect(result).toBeDefined();
320
+ // Should detect API-related content
321
+ // May or may not detect as API depending on content
322
+ expect(result.chunks.length).toBeGreaterThan(0);
323
+ });
324
+ it('should detect example content type', async () => {
325
+ const crawlResult = {
326
+ url: 'https://example.com/examples',
327
+ path: '/examples',
328
+ title: 'Examples',
329
+ content: `
330
+ <html>
331
+ <body>
332
+ <main>
333
+ <h1>Code Examples</h1>
334
+ <h2>Basic Example</h2>
335
+ <pre><code>
336
+ const result = doSomething();
337
+ console.log(result);
338
+ </code></pre>
339
+ <h2>Advanced Example</h2>
340
+ <pre><code>
341
+ const config = { advanced: true };
342
+ const result = doSomething(config);
343
+ </code></pre>
344
+ </main>
345
+ </body>
346
+ </html>
347
+ `,
348
+ };
349
+ const result = await processor.process(crawlResult);
350
+ expect(result).toBeDefined();
351
+ // Should have extracted code blocks
352
+ const hasCodeContent = result.chunks.some((c) => c.content.includes('doSomething'));
353
+ expect(hasCodeContent).toBe(true);
354
+ });
355
+ });
356
+ });
357
+ //# sourceMappingURL=processor.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processor.test.js","sourceRoot":"","sources":["../../src/processor/processor.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AACtD,OAAO,EAAE,oBAAoB,EAAE,uBAAuB,EAAE,MAAM,4BAA4B,CAAC;AAI3F,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;IACpC,IAAI,SAA+B,CAAC;IACpC,IAAI,cAAkC,CAAC;IAEvC,UAAU,CAAC,GAAG,EAAE;QACd,cAAc,GAAG,oBAAoB,EAAE,CAAC;QACxC,SAAS,GAAG,IAAI,oBAAoB,CAAC,cAAc,EAAE,GAAG,CAAC,CAAC;IAC5D,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE;QACvB,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;YAC3C,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,+BAA+B;gBACpC,IAAI,EAAE,YAAY;gBAClB,KAAK,EAAE,WAAW;gBAClB,OAAO,EAAE;;;;;;;;;;;SAWR;aACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YAEpD,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7B,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;YAClD,uEAAuE;YACvE,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;YAC3C,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;YAChD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC;QACzE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,iCAAiC,EAAE,KAAK,IAAI,EAAE;YAC/C,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,oCAAoC;gBACzC,IAAI,EAAE,iBAAiB;gBACvB,KAAK,EAAE,QAAQ;gBACf,OAAO,EAAE;;;;;;;;;;;;;;;CAehB;aACM,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YAEpD,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7B,yCAAyC;YACzC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;YAC3C,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;YACnE,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,sCAAsC;gBAC3C,IAAI,EAAE,SAAS;gBACf,KAAK,EAAE,QAAQ;gBACf,OAAO,EAAE;;;;;;;;;;;;;;;CAehB;gBACO,aAAa,EAAE,oBAAoB;aACpC,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YAEpD,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7B,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;YACvD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wDAAwD,EAAE,KAAK,IAAI,EAAE;YACtE,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,8BAA8B;gBACnC,IAAI,EAAE,GAAG;gBACT,KAAK,EAAE,cAAc;gBACrB,OAAO,EAAE;;;;;;;CAOhB;gBACO,aAAa,EAAE,sBAAsB;aACtC,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YAEpD,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,yBAAyB;gBAC9B,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,KAAK;gBACZ,OAAO,EAAE;;;;;;;;;;;;;;;;SAgBR;aACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YAEpD,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7B,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;gBAC9B,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;gBACxC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;gBAC1C,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC;gBAC7D,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;gBACrC,MAAM,CAAC,CAAC,UAAU,EAAE,KAAK,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;YACjF,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,yDAAyD,EAAE,KAAK,IAAI,EAAE;YACvE,MAAM,WAAW,GAAG,KAAK,CAAC,EAAE,CAAC;iBAC1B,IAAI,CAAC,IAAI,CAAC;iBACV,GAAG,CACF,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;sBACE,CAAC,GAAG,CAAC;6CACkB,CAAC,GAAG,CAAC;OAC3C,CACE;iBACA,IAAI,CAAC,IAAI,CAAC,CAAC;YAEd,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,0BAA0B;gBAC/B,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,eAAe;gBACtB,OAAO,EAAE;;;;;kBAKC,WAAW;;;;SAIpB;aACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YAEpD,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,sDAAsD,EAAE,KAAK,IAAI,EAAE;YACpE,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,2BAA2B;gBAChC,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,OAAO;gBACd,OAAO,EAAE,EAAE,EAAE,gBAAgB;aAC9B,CAAC;YAEF,MAAM,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QACjE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAC9D,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,gCAAgC;gBACrC,IAAI,EAAE,aAAa;gBACnB,KAAK,EAAE,YAAY;gBACnB,OAAO,EAAE,YAAY;aACtB,CAAC;YAEF,MAAM,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QACjE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;YAC3D,MAAM,iBAAiB,GAAG,uBAAuB,EAAE,CAAC;YACpD,MAAM,gBAAgB,GAAG,IAAI,oBAAoB,CAAC,iBAAiB,EAAE,GAAG,CAAC,CAAC;YAE1E,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,0BAA0B;gBAC/B,IAAI,EAAE,OAAO;gBACb,KAAK,EAAE,MAAM;gBACb,OAAO,EAAE;;;;;;;;;SASR;aACF,CAAC;YAEF,MAAM,MAAM,CAAC,gBAAgB,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,gCAAgC,CAAC,CAAC;QACxG,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;YAC3C,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,2BAA2B;gBAChC,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,OAAO;gBACd,OAAO,EAAE;;;;;;;;;SASR;aACF,CAAC;YAEF,MAAM,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YAC1B,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YACpD,MAAM,KAAK,GAAG,IAAI,IAAI,EAAE,CAAC;YAEzB,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC,CAAC,sBAAsB,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;YACvF,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC,CAAC,mBAAmB,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACrF,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;YACrD,MAAM,mBAAmB,GAAG,IAAI,oBAAoB,CAAC,cAAc,EAAE,GAAG,CAAC,CAAC;YAE1E,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,4BAA4B;gBACjC,IAAI,EAAE,SAAS;gBACf,KAAK,EAAE,QAAQ;gBACf,OAAO,EAAE;;;;;;;;;;SAUR;aACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,mBAAmB,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YAE9D,qDAAqD;YACrD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,yBAAyB,EAAE,KAAK,IAAI,EAAE;YACvC,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,wCAAwC;gBAC7C,IAAI,EAAE,qBAAqB;gBAC3B,KAAK,EAAE,eAAe;gBACtB,OAAO,EAAE;;;;;;;;;;;;;CAahB;aACM,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YAEpD,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7B,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QACtD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;YACvD,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,6BAA6B;gBAClC,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,mBAAmB;gBAC1B,OAAO,EAAE;;;;;iCAKgB;gBACzB,aAAa,EAAE,kBAAkB;aAClC,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YAEpD,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,0BAA0B,EAAE,GAAG,EAAE;QACxC,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;YAC9C,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,6BAA6B;gBAClC,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,eAAe;gBACtB,OAAO,EAAE;;;;;;;;;;;;;;SAcR;aACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YAEpD,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7B,oCAAoC;YACpC,oDAAoD;YACpD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;YAClD,MAAM,WAAW,GAAgB;gBAC/B,GAAG,EAAE,8BAA8B;gBACnC,IAAI,EAAE,WAAW;gBACjB,KAAK,EAAE,UAAU;gBACjB,OAAO,EAAE;;;;;;;;;;;;;;;;;;SAkBR;aACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YAEpD,MAAM,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7B,oCAAoC;YACpC,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC;YACpF,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,10 @@
1
+ import { GeneratedResponse } from "./response-generator.js";
2
+ export interface CachedResponse {
3
+ query: string;
4
+ response: GeneratedResponse;
5
+ }
6
+ export declare class RAGCache {
7
+ private cache;
8
+ getCachedResponse(query: string): Promise<CachedResponse | null>;
9
+ cacheResponse(query: string, response: GeneratedResponse): Promise<void>;
10
+ }
@@ -0,0 +1,10 @@
1
+ export class RAGCache {
2
+ cache = new Map();
3
+ async getCachedResponse(query) {
4
+ return this.cache.get(query) || null;
5
+ }
6
+ async cacheResponse(query, response) {
7
+ this.cache.set(query, { query, response });
8
+ }
9
+ }
10
+ //# sourceMappingURL=cache.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cache.js","sourceRoot":"","sources":["../../src/rag/cache.ts"],"names":[],"mappings":"AAOA,MAAM,OAAO,QAAQ;IACX,KAAK,GAAgC,IAAI,GAAG,EAAE,CAAC;IAEvD,KAAK,CAAC,iBAAiB,CAAC,KAAa;QACnC,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC;IACvC,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,KAAa,EAAE,QAA2B;QAC5D,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC7C,CAAC;CACF"}
@@ -0,0 +1,11 @@
1
+ export interface CodeExample {
2
+ imports: string;
3
+ props: string;
4
+ usage: string;
5
+ }
6
+ export declare class CodeGenerator {
7
+ generateCodeExample(component: any, context: any): Promise<CodeExample>;
8
+ private generateImports;
9
+ private generateProps;
10
+ private generateUsage;
11
+ }
@@ -0,0 +1,30 @@
1
+ export class CodeGenerator {
2
+ async generateCodeExample(component, context) {
3
+ // Extract component name and props from the component info
4
+ const componentName = component.name || 'Example';
5
+ const componentProps = component.props || {};
6
+ // Generate imports based on context
7
+ const imports = this.generateImports(componentName, context);
8
+ // Generate props string
9
+ const props = this.generateProps(componentProps);
10
+ // Generate usage example
11
+ const usage = this.generateUsage(componentName, props);
12
+ return { imports, props, usage };
13
+ }
14
+ generateImports(componentName, context) {
15
+ // Look for import path in context
16
+ const importPath = context.importPath || 'example';
17
+ return `import ${componentName} from '${importPath}';`;
18
+ }
19
+ generateProps(props) {
20
+ return Object.entries(props)
21
+ .map(([key, value]) => `${key}: ${JSON.stringify(value)}`)
22
+ .join(', ');
23
+ }
24
+ generateUsage(componentName, props) {
25
+ return props
26
+ ? `<${componentName} ${props} />`
27
+ : `<${componentName} />`;
28
+ }
29
+ }
30
+ //# sourceMappingURL=code-generator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"code-generator.js","sourceRoot":"","sources":["../../src/rag/code-generator.ts"],"names":[],"mappings":"AAMA,MAAM,OAAO,aAAa;IACxB,KAAK,CAAC,mBAAmB,CAAC,SAAc,EAAE,OAAY;QACpD,2DAA2D;QAC3D,MAAM,aAAa,GAAG,SAAS,CAAC,IAAI,IAAI,SAAS,CAAC;QAClD,MAAM,cAAc,GAAG,SAAS,CAAC,KAAK,IAAI,EAAE,CAAC;QAE7C,oCAAoC;QACpC,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;QAE7D,wBAAwB;QACxB,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,CAAC;QAEjD,yBAAyB;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;QAEvD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IACnC,CAAC;IAEO,eAAe,CAAC,aAAqB,EAAE,OAAY;QACzD,kCAAkC;QAClC,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,SAAS,CAAC;QACnD,OAAO,UAAU,aAAa,UAAU,UAAU,IAAI,CAAC;IACzD,CAAC;IAEO,aAAa,CAAC,KAA0B;QAC9C,OAAO,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC;aACzB,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC;aACzD,IAAI,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC;IAEO,aAAa,CAAC,aAAqB,EAAE,KAAa;QACxD,OAAO,KAAK;YACV,CAAC,CAAC,IAAI,aAAa,IAAI,KAAK,KAAK;YACjC,CAAC,CAAC,IAAI,aAAa,KAAK,CAAC;IAC7B,CAAC;CACF"}
@@ -0,0 +1,23 @@
1
+ import { EnhancedChunk } from "../types/rag.js";
2
+ import { ComponentRelationship } from "../crawler/content-extractor-types.js";
3
+ export interface AssembledContext {
4
+ hierarchicalContext: EnhancedChunk[];
5
+ relationships: ComponentRelationship[];
6
+ metadata: {
7
+ summary: string;
8
+ topics: string[];
9
+ complexity: 'basic' | 'intermediate' | 'advanced';
10
+ prerequisites: string[];
11
+ frameworks: string[];
12
+ languages: string[];
13
+ };
14
+ }
15
+ export declare class ContextAssembler {
16
+ assembleContext(chunks: EnhancedChunk[]): Promise<AssembledContext>;
17
+ private groupChunksByType;
18
+ private extractRelationships;
19
+ private buildHierarchy;
20
+ private consolidateMetadata;
21
+ private generateSummary;
22
+ private deduplicateRelationships;
23
+ }
@@ -0,0 +1,113 @@
1
+ export class ContextAssembler {
2
+ async assembleContext(chunks) {
3
+ // Group chunks by type
4
+ const groups = this.groupChunksByType(chunks);
5
+ // Extract relationships between chunks
6
+ const relationships = this.extractRelationships(chunks);
7
+ // Build hierarchical context
8
+ const hierarchicalContext = this.buildHierarchy(groups);
9
+ // Consolidate metadata
10
+ const metadata = this.consolidateMetadata(chunks);
11
+ return {
12
+ hierarchicalContext,
13
+ relationships,
14
+ metadata
15
+ };
16
+ }
17
+ groupChunksByType(chunks) {
18
+ const groups = new Map();
19
+ for (const chunk of chunks) {
20
+ const type = chunk.metadata.type || 'overview';
21
+ if (!groups.has(type)) {
22
+ groups.set(type, { type, chunks: [] });
23
+ }
24
+ groups.get(type).chunks.push(chunk);
25
+ }
26
+ return groups;
27
+ }
28
+ extractRelationships(chunks) {
29
+ const relationships = [];
30
+ // Collect all relationships from chunks
31
+ for (const chunk of chunks) {
32
+ if (chunk.relationships) {
33
+ relationships.push(...chunk.relationships);
34
+ }
35
+ }
36
+ // Remove duplicates
37
+ return this.deduplicateRelationships(relationships);
38
+ }
39
+ buildHierarchy(groups) {
40
+ const hierarchy = [];
41
+ // Start with overview chunks
42
+ if (groups.has('overview')) {
43
+ hierarchy.push(...groups.get('overview').chunks);
44
+ }
45
+ // Add API documentation
46
+ if (groups.has('api')) {
47
+ hierarchy.push(...groups.get('api').chunks);
48
+ }
49
+ // Add usage examples
50
+ if (groups.has('usage')) {
51
+ hierarchy.push(...groups.get('usage').chunks);
52
+ }
53
+ // Add code examples
54
+ if (groups.has('example')) {
55
+ hierarchy.push(...groups.get('example').chunks);
56
+ }
57
+ return hierarchy;
58
+ }
59
+ consolidateMetadata(chunks) {
60
+ const topics = new Set();
61
+ const prerequisites = new Set();
62
+ const frameworks = new Set();
63
+ const languages = new Set();
64
+ let maxComplexity = 'basic';
65
+ for (const chunk of chunks) {
66
+ // Collect topics from semantic tags
67
+ chunk.metadata.semanticTags?.forEach(tag => topics.add(tag));
68
+ // Collect prerequisites
69
+ chunk.metadata.prerequisites?.forEach(prereq => prerequisites.add(prereq));
70
+ // Track frameworks and languages
71
+ if (chunk.metadata.framework)
72
+ frameworks.add(chunk.metadata.framework);
73
+ if (chunk.metadata.language)
74
+ languages.add(chunk.metadata.language);
75
+ // Determine highest complexity
76
+ if (chunk.metadata.complexity === 'advanced' ||
77
+ (chunk.metadata.complexity === 'intermediate' && maxComplexity === 'basic')) {
78
+ maxComplexity = chunk.metadata.complexity;
79
+ }
80
+ }
81
+ // Generate a summary based on the most relevant chunks
82
+ const summary = this.generateSummary(chunks);
83
+ return {
84
+ summary,
85
+ topics: Array.from(topics),
86
+ complexity: maxComplexity,
87
+ prerequisites: Array.from(prerequisites),
88
+ frameworks: Array.from(frameworks),
89
+ languages: Array.from(languages)
90
+ };
91
+ }
92
+ generateSummary(chunks) {
93
+ // Sort chunks by relevance (using sourceReliability as a proxy)
94
+ const sortedChunks = [...chunks].sort((a, b) => (b.metadata.sourceReliability || 0) - (a.metadata.sourceReliability || 0));
95
+ // Take the most relevant chunks' summaries
96
+ const summaries = sortedChunks
97
+ .slice(0, 3)
98
+ .map(chunk => chunk.metadata.contextualSummary || '')
99
+ .filter(Boolean);
100
+ return summaries.join(' ');
101
+ }
102
+ deduplicateRelationships(relationships) {
103
+ const seen = new Set();
104
+ return relationships.filter(rel => {
105
+ const key = `${rel.sourceComponent}-${rel.targetComponent}-${rel.type}`;
106
+ if (seen.has(key))
107
+ return false;
108
+ seen.add(key);
109
+ return true;
110
+ });
111
+ }
112
+ }
113
+ //# sourceMappingURL=context-assembler.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"context-assembler.js","sourceRoot":"","sources":["../../src/rag/context-assembler.ts"],"names":[],"mappings":"AAqBA,MAAM,OAAO,gBAAgB;IAC3B,KAAK,CAAC,eAAe,CAAC,MAAuB;QAC3C,uBAAuB;QACvB,MAAM,MAAM,GAAG,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;QAE9C,uCAAuC;QACvC,MAAM,aAAa,GAAG,IAAI,CAAC,oBAAoB,CAAC,MAAM,CAAC,CAAC;QAExD,6BAA6B;QAC7B,MAAM,mBAAmB,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;QAExD,uBAAuB;QACvB,MAAM,QAAQ,GAAG,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAC;QAElD,OAAO;YACL,mBAAmB;YACnB,aAAa;YACb,QAAQ;SACT,CAAC;IACJ,CAAC;IAEO,iBAAiB,CAAC,MAAuB;QAC/C,MAAM,MAAM,GAAG,IAAI,GAAG,EAAsB,CAAC;QAE7C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI,IAAI,UAAU,CAAC;YAC/C,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACtB,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,CAAC;YACzC,CAAC;YACD,MAAM,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACvC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,oBAAoB,CAAC,MAAuB;QAClD,MAAM,aAAa,GAA4B,EAAE,CAAC;QAElD,wCAAwC;QACxC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,IAAI,KAAK,CAAC,aAAa,EAAE,CAAC;gBACxB,aAAa,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,aAAa,CAAC,CAAC;YAC7C,CAAC;QACH,CAAC;QAED,oBAAoB;QACpB,OAAO,IAAI,CAAC,wBAAwB,CAAC,aAAa,CAAC,CAAC;IACtD,CAAC;IAEO,cAAc,CAAC,MAA+B;QACpD,MAAM,SAAS,GAAoB,EAAE,CAAC;QAEtC,6BAA6B;QAC7B,IAAI,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YAC3B,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,UAAU,CAAE,CAAC,MAAM,CAAC,CAAC;QACpD,CAAC;QAED,wBAAwB;QACxB,IAAI,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YACtB,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAE,CAAC,MAAM,CAAC,CAAC;QAC/C,CAAC;QAED,qBAAqB;QACrB,IAAI,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YACxB,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,OAAO,CAAE,CAAC,MAAM,CAAC,CAAC;QACjD,CAAC;QAED,oBAAoB;QACpB,IAAI,MAAM,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;YAC1B,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,SAAS,CAAE,CAAC,MAAM,CAAC,CAAC;QACnD,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,mBAAmB,CAAC,MAAuB;QACjD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;QACjC,MAAM,aAAa,GAAG,IAAI,GAAG,EAAU,CAAC;QACxC,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAC;QACrC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAC;QACpC,IAAI,aAAa,GAA0C,OAAO,CAAC;QAEnE,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,oCAAoC;YACpC,KAAK,CAAC,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YAE7D,wBAAwB;YACxB,KAAK,CAAC,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;YAE3E,iCAAiC;YACjC,IAAI,KAAK,CAAC,QAAQ,CAAC,SAAS;gBAAE,UAAU,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;YACvE,IAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ;gBAAE,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAEpE,+BAA+B;YAC/B,IAAI,KAAK,CAAC,QAAQ,CAAC,UAAU,KAAK,UAAU;gBACzC,CAAC,KAAK,CAAC,QAAQ,CAAC,UAAU,KAAK,cAAc,IAAI,aAAa,KAAK,OAAO,CAAC,EAAE,CAAC;gBAC/E,aAAa,GAAG,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC;YAC5C,CAAC;QACH,CAAC;QAED,uDAAuD;QACvD,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC;QAE7C,OAAO;YACL,OAAO;YACP,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC;YAC1B,UAAU,EAAE,aAAa;YACzB,aAAa,EAAE,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC;YACxC,UAAU,EAAE,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC;YAClC,SAAS,EAAE,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC;SACjC,CAAC;IACJ,CAAC;IAEO,eAAe,CAAC,MAAuB;QAC7C,gEAAgE;QAChE,MAAM,YAAY,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAC7C,CAAC,CAAC,CAAC,QAAQ,CAAC,iBAAiB,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,iBAAiB,IAAI,CAAC,CAAC,CAC1E,CAAC;QAEF,2CAA2C;QAC3C,MAAM,SAAS,GAAG,YAAY;aAC3B,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;aACX,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,iBAAiB,IAAI,EAAE,CAAC;aACpD,MAAM,CAAC,OAAO,CAAC,CAAC;QAEnB,OAAO,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;IAEO,wBAAwB,CAAC,aAAsC;QACrE,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAC/B,OAAO,aAAa,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;YAChC,MAAM,GAAG,GAAG,GAAG,GAAG,CAAC,eAAe,IAAI,GAAG,CAAC,eAAe,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;YACxE,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,OAAO,KAAK,CAAC;YAChC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;IACL,CAAC;CACF"}
@@ -0,0 +1,55 @@
1
+ import { DocumentStore } from "../storage/storage.js";
2
+ import { EmbeddingsProvider } from "../embeddings/types.js";
3
+ export interface DocsSearchResult {
4
+ answer: string;
5
+ codeExamples: string[];
6
+ componentDetails: any[];
7
+ validation: {
8
+ factCheck: boolean;
9
+ codeCheck: boolean;
10
+ consistencyCheck: boolean;
11
+ details: string[];
12
+ };
13
+ limitations: string[];
14
+ }
15
+ export interface DocsSearchOptions {
16
+ urls: string[];
17
+ componentNames?: string[];
18
+ propNames?: string[];
19
+ strictMode?: boolean;
20
+ maxSourceChunks?: number;
21
+ advanced?: {
22
+ useQueryExpansion?: boolean;
23
+ useMultiQuery?: boolean;
24
+ maxQueryVariations?: number;
25
+ mergeUrlResults?: boolean;
26
+ };
27
+ }
28
+ export declare class DocsSearch {
29
+ private queryProcessor;
30
+ private contextRetriever;
31
+ private contextAssembler;
32
+ private responseGenerator;
33
+ private responseValidator;
34
+ private store;
35
+ private openai;
36
+ constructor(openaiApiKey: string, store: DocumentStore, embeddings: EmbeddingsProvider);
37
+ /**
38
+ * Generate alternative phrasings of the query to improve search recall
39
+ * @param query The original query
40
+ * @param componentNames Optional component names to include in variations
41
+ * @param propNames Optional prop names to include in variations
42
+ * @returns Array of query variations
43
+ */
44
+ private expandQuery;
45
+ /**
46
+ * Search across multiple URLs and merge the results
47
+ * @param queryIntent The query intent
48
+ * @param urls Array of URLs to search
49
+ * @param options Retrieval options
50
+ * @returns Combined retrieval result
51
+ */
52
+ private searchAcrossUrls;
53
+ search(query: string, options: DocsSearchOptions): Promise<DocsSearchResult>;
54
+ private identifyLimitations;
55
+ }