@twelvehart/supermemory-runtime 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/.env.example +57 -0
  2. package/README.md +374 -0
  3. package/dist/index.js +189 -0
  4. package/dist/mcp/index.js +1132 -0
  5. package/docker-compose.prod.yml +91 -0
  6. package/docker-compose.yml +358 -0
  7. package/drizzle/0000_dapper_the_professor.sql +159 -0
  8. package/drizzle/0001_api_keys.sql +51 -0
  9. package/drizzle/meta/0000_snapshot.json +1532 -0
  10. package/drizzle/meta/_journal.json +13 -0
  11. package/drizzle.config.ts +20 -0
  12. package/package.json +114 -0
  13. package/scripts/add-extraction-job.ts +122 -0
  14. package/scripts/benchmark-pgvector.ts +122 -0
  15. package/scripts/bootstrap.sh +209 -0
  16. package/scripts/check-runtime-pack.ts +111 -0
  17. package/scripts/claude-mcp-config.ts +336 -0
  18. package/scripts/docker-entrypoint.sh +183 -0
  19. package/scripts/doctor.ts +377 -0
  20. package/scripts/init-db.sql +33 -0
  21. package/scripts/install.sh +1110 -0
  22. package/scripts/mcp-setup.ts +271 -0
  23. package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
  24. package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
  25. package/scripts/migrations/003_create_hnsw_index.sql +94 -0
  26. package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
  27. package/scripts/migrations/005_create_chunks_table.sql +95 -0
  28. package/scripts/migrations/006_create_processing_queue.sql +45 -0
  29. package/scripts/migrations/generate_test_data.sql +42 -0
  30. package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
  31. package/scripts/migrations/run_migrations.sh +286 -0
  32. package/scripts/migrations/test_hnsw_index.sql +255 -0
  33. package/scripts/pre-commit-secrets +282 -0
  34. package/scripts/run-extraction-worker.ts +46 -0
  35. package/scripts/run-phase1-tests.sh +291 -0
  36. package/scripts/setup.ts +222 -0
  37. package/scripts/smoke-install.sh +12 -0
  38. package/scripts/test-health-endpoint.sh +328 -0
  39. package/src/api/index.ts +2 -0
  40. package/src/api/middleware/auth.ts +80 -0
  41. package/src/api/middleware/csrf.ts +308 -0
  42. package/src/api/middleware/errorHandler.ts +166 -0
  43. package/src/api/middleware/rateLimit.ts +360 -0
  44. package/src/api/middleware/validation.ts +514 -0
  45. package/src/api/routes/documents.ts +286 -0
  46. package/src/api/routes/profiles.ts +237 -0
  47. package/src/api/routes/search.ts +71 -0
  48. package/src/api/stores/index.ts +58 -0
  49. package/src/config/bootstrap-env.ts +3 -0
  50. package/src/config/env.ts +71 -0
  51. package/src/config/feature-flags.ts +25 -0
  52. package/src/config/index.ts +140 -0
  53. package/src/config/secrets.config.ts +291 -0
  54. package/src/db/client.ts +92 -0
  55. package/src/db/index.ts +73 -0
  56. package/src/db/postgres.ts +72 -0
  57. package/src/db/schema/chunks.schema.ts +31 -0
  58. package/src/db/schema/containers.schema.ts +46 -0
  59. package/src/db/schema/documents.schema.ts +49 -0
  60. package/src/db/schema/embeddings.schema.ts +32 -0
  61. package/src/db/schema/index.ts +11 -0
  62. package/src/db/schema/memories.schema.ts +72 -0
  63. package/src/db/schema/profiles.schema.ts +34 -0
  64. package/src/db/schema/queue.schema.ts +59 -0
  65. package/src/db/schema/relationships.schema.ts +42 -0
  66. package/src/db/schema.ts +223 -0
  67. package/src/db/worker-connection.ts +47 -0
  68. package/src/index.ts +235 -0
  69. package/src/mcp/CLAUDE.md +1 -0
  70. package/src/mcp/index.ts +1380 -0
  71. package/src/mcp/legacyState.ts +22 -0
  72. package/src/mcp/rateLimit.ts +358 -0
  73. package/src/mcp/resources.ts +309 -0
  74. package/src/mcp/results.ts +104 -0
  75. package/src/mcp/tools.ts +401 -0
  76. package/src/queues/config.ts +119 -0
  77. package/src/queues/index.ts +289 -0
  78. package/src/sdk/client.ts +225 -0
  79. package/src/sdk/errors.ts +266 -0
  80. package/src/sdk/http.ts +560 -0
  81. package/src/sdk/index.ts +244 -0
  82. package/src/sdk/resources/base.ts +65 -0
  83. package/src/sdk/resources/connections.ts +204 -0
  84. package/src/sdk/resources/documents.ts +163 -0
  85. package/src/sdk/resources/index.ts +10 -0
  86. package/src/sdk/resources/memories.ts +150 -0
  87. package/src/sdk/resources/search.ts +60 -0
  88. package/src/sdk/resources/settings.ts +36 -0
  89. package/src/sdk/types.ts +674 -0
  90. package/src/services/chunking/index.ts +451 -0
  91. package/src/services/chunking.service.ts +650 -0
  92. package/src/services/csrf.service.ts +252 -0
  93. package/src/services/documents.repository.ts +219 -0
  94. package/src/services/documents.service.ts +191 -0
  95. package/src/services/embedding.service.ts +404 -0
  96. package/src/services/extraction.service.ts +300 -0
  97. package/src/services/extractors/code.extractor.ts +451 -0
  98. package/src/services/extractors/index.ts +9 -0
  99. package/src/services/extractors/markdown.extractor.ts +461 -0
  100. package/src/services/extractors/pdf.extractor.ts +315 -0
  101. package/src/services/extractors/text.extractor.ts +118 -0
  102. package/src/services/extractors/url.extractor.ts +243 -0
  103. package/src/services/index.ts +235 -0
  104. package/src/services/ingestion.service.ts +177 -0
  105. package/src/services/llm/anthropic.ts +400 -0
  106. package/src/services/llm/base.ts +460 -0
  107. package/src/services/llm/contradiction-detector.service.ts +526 -0
  108. package/src/services/llm/heuristics.ts +148 -0
  109. package/src/services/llm/index.ts +309 -0
  110. package/src/services/llm/memory-classifier.service.ts +383 -0
  111. package/src/services/llm/memory-extension-detector.service.ts +523 -0
  112. package/src/services/llm/mock.ts +470 -0
  113. package/src/services/llm/openai.ts +398 -0
  114. package/src/services/llm/prompts.ts +438 -0
  115. package/src/services/llm/types.ts +373 -0
  116. package/src/services/memory.repository.ts +1769 -0
  117. package/src/services/memory.service.ts +1338 -0
  118. package/src/services/memory.types.ts +234 -0
  119. package/src/services/persistence/index.ts +295 -0
  120. package/src/services/pipeline.service.ts +509 -0
  121. package/src/services/profile.repository.ts +436 -0
  122. package/src/services/profile.service.ts +560 -0
  123. package/src/services/profile.types.ts +270 -0
  124. package/src/services/relationships/detector.ts +1128 -0
  125. package/src/services/relationships/index.ts +268 -0
  126. package/src/services/relationships/memory-integration.ts +459 -0
  127. package/src/services/relationships/strategies.ts +132 -0
  128. package/src/services/relationships/types.ts +370 -0
  129. package/src/services/search.service.ts +761 -0
  130. package/src/services/search.types.ts +220 -0
  131. package/src/services/secrets.service.ts +384 -0
  132. package/src/services/vectorstore/base.ts +327 -0
  133. package/src/services/vectorstore/index.ts +444 -0
  134. package/src/services/vectorstore/memory.ts +286 -0
  135. package/src/services/vectorstore/migration.ts +295 -0
  136. package/src/services/vectorstore/mock.ts +403 -0
  137. package/src/services/vectorstore/pgvector.ts +695 -0
  138. package/src/services/vectorstore/types.ts +247 -0
  139. package/src/startup.ts +389 -0
  140. package/src/types/api.types.ts +193 -0
  141. package/src/types/document.types.ts +103 -0
  142. package/src/types/index.ts +241 -0
  143. package/src/types/profile.base.ts +133 -0
  144. package/src/utils/errors.ts +447 -0
  145. package/src/utils/id.ts +15 -0
  146. package/src/utils/index.ts +101 -0
  147. package/src/utils/logger.ts +313 -0
  148. package/src/utils/sanitization.ts +501 -0
  149. package/src/utils/secret-validation.ts +273 -0
  150. package/src/utils/synonyms.ts +188 -0
  151. package/src/utils/validation.ts +581 -0
  152. package/src/workers/chunking.worker.ts +242 -0
  153. package/src/workers/embedding.worker.ts +358 -0
  154. package/src/workers/extraction.worker.ts +346 -0
  155. package/src/workers/indexing.worker.ts +505 -0
  156. package/tsconfig.json +38 -0
@@ -0,0 +1,451 @@
1
+ /**
2
+ * Code extractor - AST-aware extraction and chunking for source code
3
+ */
4
+
5
+ import { ExtractionResult, ExtractorInterface, ContentType } from '../../types/document.types.js'
6
+
7
+ export interface CodeBlock {
8
+ type: 'function' | 'class' | 'method' | 'interface' | 'type' | 'import' | 'export' | 'comment' | 'other'
9
+ name: string
10
+ content: string
11
+ startLine: number
12
+ endLine: number
13
+ language: string
14
+ parent?: string
15
+ docstring?: string
16
+ }
17
+
18
+ interface LanguagePattern {
19
+ extensions: string[]
20
+ functionPattern: RegExp
21
+ classPattern: RegExp
22
+ methodPattern?: RegExp
23
+ interfacePattern?: RegExp
24
+ typePattern?: RegExp
25
+ importPattern: RegExp
26
+ commentPattern: RegExp
27
+ docstringPattern?: RegExp
28
+ }
29
+
30
+ export class CodeExtractor implements ExtractorInterface {
31
+ /**
32
+ * Core language patterns - supports TypeScript, JavaScript, Python, and Go
33
+ * Other languages can be added as needed based on usage patterns
34
+ */
35
+ private readonly languages: Record<string, LanguagePattern> = {
36
+ typescript: {
37
+ extensions: ['.ts', '.tsx'],
38
+ functionPattern: /^(?:export\s+)?(?:async\s+)?function\s+(\w+)/m,
39
+ classPattern: /^(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/m,
40
+ methodPattern: /^\s+(?:async\s+)?(?:static\s+)?(?:get\s+|set\s+)?(\w+)\s*\([^)]*\)\s*(?::\s*\S+)?\s*\{/m,
41
+ interfacePattern: /^(?:export\s+)?interface\s+(\w+)/m,
42
+ typePattern: /^(?:export\s+)?type\s+(\w+)/m,
43
+ importPattern: /^import\s+.*from\s+['"](.+)['"]/m,
44
+ commentPattern: /\/\/.*$|\/\*[\s\S]*?\*\//,
45
+ docstringPattern: /\/\*\*[\s\S]*?\*\//,
46
+ },
47
+ javascript: {
48
+ extensions: ['.js', '.jsx', '.mjs', '.cjs'],
49
+ functionPattern: /^(?:export\s+)?(?:async\s+)?function\s+(\w+)/m,
50
+ classPattern: /^(?:export\s+)?class\s+(\w+)/m,
51
+ methodPattern: /^\s+(?:async\s+)?(?:static\s+)?(?:get\s+|set\s+)?(\w+)\s*\([^)]*\)\s*\{/m,
52
+ importPattern: /^(?:import\s+.*from\s+['"](.+)['"]|require\(['"](.+)['"]\))/m,
53
+ commentPattern: /\/\/.*$|\/\*[\s\S]*?\*\//,
54
+ docstringPattern: /\/\*\*[\s\S]*?\*\//,
55
+ },
56
+ python: {
57
+ extensions: ['.py', '.pyw'],
58
+ functionPattern: /^(?:async\s+)?def\s+(\w+)/m,
59
+ classPattern: /^class\s+(\w+)/m,
60
+ methodPattern: /^\s+(?:async\s+)?def\s+(\w+)/m,
61
+ importPattern: /^(?:from\s+(\S+)\s+)?import\s+/m,
62
+ commentPattern: /#.*$/,
63
+ docstringPattern: /"""[\s\S]*?"""|'''[\s\S]*?'''/,
64
+ },
65
+ go: {
66
+ extensions: ['.go'],
67
+ functionPattern: /^func\s+(\w+)/m,
68
+ classPattern: /^type\s+(\w+)\s+struct/m,
69
+ methodPattern: /^func\s+\([^)]+\)\s+(\w+)/m,
70
+ interfacePattern: /^type\s+(\w+)\s+interface/m,
71
+ importPattern: /^import\s+(?:\(\s*)?["']([^"']+)["']/m,
72
+ commentPattern: /\/\/.*$|\/\*[\s\S]*?\*\//,
73
+ },
74
+ }
75
+
76
+ /**
77
+ * Check if content appears to be source code
78
+ */
79
+ canHandle(content: string): boolean {
80
+ if (typeof content !== 'string' || content.length === 0) {
81
+ return false
82
+ }
83
+
84
+ // Check for common code patterns (focusing on core languages)
85
+ const codePatterns = [
86
+ /^import\s+/m, // JS/TS/Python/Go
87
+ /^export\s+/m, // JS/TS
88
+ /^(?:const|let|var)\s+\w+\s*=/m, // JS/TS
89
+ /^function\s+\w+/m, // JS/TS
90
+ /^class\s+\w+/m, // JS/TS/Python
91
+ /^def\s+\w+/m, // Python
92
+ /^func\s+\w+/m, // Go
93
+ /^package\s+\w+/m, // Go
94
+ /:\s*(?:string|number|boolean)/m, // TS
95
+ /^\s+self\./m, // Python
96
+ /:\s*=\s*/, // Go
97
+ /^(?:async\s+)?function\s+/m, // JS/TS
98
+ /=>\s*\{/, // JS/TS arrow functions
99
+ ]
100
+
101
+ let score = 0
102
+ for (const pattern of codePatterns) {
103
+ if (pattern.test(content)) score++
104
+ }
105
+
106
+ // Check bracket/brace balance (code usually has balanced braces)
107
+ const openBraces = (content.match(/\{/g) ?? []).length
108
+ const closeBraces = (content.match(/\}/g) ?? []).length
109
+ if (openBraces > 0 && Math.abs(openBraces - closeBraces) < openBraces * 0.1) {
110
+ score++
111
+ }
112
+
113
+ return score >= 2
114
+ }
115
+
116
+ /**
117
+ * Extract code content with AST-aware parsing
118
+ */
119
+ async extract(content: string, options?: Record<string, unknown>): Promise<ExtractionResult> {
120
+ const language = (options?.language as string) ?? this.detectLanguage(content)
121
+ const blocks = this.parseCodeBlocks(content, language)
122
+ const metadata = this.extractMetadata(content, blocks, language)
123
+
124
+ return {
125
+ content,
126
+ contentType: 'code' as ContentType,
127
+ metadata: {
128
+ ...metadata,
129
+ codeBlocks: blocks.map((b) => ({
130
+ type: b.type,
131
+ name: b.name,
132
+ startLine: b.startLine,
133
+ endLine: b.endLine,
134
+ })),
135
+ },
136
+ rawContent: content,
137
+ }
138
+ }
139
+
140
+ /**
141
+ * Detect programming language from content - supports TypeScript, JavaScript, Python, and Go
142
+ */
143
+ detectLanguage(content: string): string {
144
+ // TypeScript indicators (check first since it's a superset of JavaScript)
145
+ if (
146
+ /:\s*(?:string|number|boolean|void|any|unknown|never)\b/.test(content) ||
147
+ /interface\s+\w+/.test(content) ||
148
+ /<\w+>.*>/.test(content) ||
149
+ /as\s+(?:string|number|boolean)/.test(content)
150
+ ) {
151
+ return 'typescript'
152
+ }
153
+
154
+ // Python indicators
155
+ if (/^def\s+\w+.*:\s*$/m.test(content) || /^class\s+\w+.*:\s*$/m.test(content) || /^\s+self\./m.test(content)) {
156
+ return 'python'
157
+ }
158
+
159
+ // Go indicators
160
+ if (/^package\s+\w+/m.test(content) || /^func\s+\([^)]+\)/.test(content) || /:=/.test(content)) {
161
+ return 'go'
162
+ }
163
+
164
+ // Default to JavaScript
165
+ return 'javascript'
166
+ }
167
+
168
+ /**
169
+ * Parse code into logical blocks
170
+ */
171
+ parseCodeBlocks(content: string, language: string): CodeBlock[] {
172
+ const blocks: CodeBlock[] = []
173
+ const lines = content.split('\n')
174
+ const pattern = this.languages[language] ?? this.languages['javascript']
175
+ if (!pattern) {
176
+ return blocks
177
+ }
178
+
179
+ let currentClass: string | undefined
180
+ let i = 0
181
+
182
+ while (i < lines.length) {
183
+ const line = lines[i] ?? ''
184
+ const remainingContent = lines.slice(i).join('\n')
185
+
186
+ // Check for imports
187
+ const importMatch = line.match(pattern.importPattern)
188
+ if (importMatch) {
189
+ blocks.push({
190
+ type: 'import',
191
+ name: importMatch[1] ?? importMatch[2] ?? 'import',
192
+ content: line,
193
+ startLine: i + 1,
194
+ endLine: i + 1,
195
+ language,
196
+ })
197
+ i++
198
+ continue
199
+ }
200
+
201
+ // Check for docstrings/comments before definitions
202
+ let docstring: string | undefined
203
+ if (pattern.docstringPattern) {
204
+ const docMatch = remainingContent.match(pattern.docstringPattern)
205
+ if (docMatch && docMatch[0] && remainingContent.indexOf(docMatch[0]) === 0) {
206
+ docstring = docMatch[0]
207
+ }
208
+ }
209
+
210
+ // Check for class definitions
211
+ const classMatch = line.match(pattern.classPattern)
212
+ if (classMatch && classMatch[1]) {
213
+ const block = this.extractBlock(lines, i, language)
214
+ blocks.push({
215
+ type: 'class',
216
+ name: classMatch[1],
217
+ content: block.content,
218
+ startLine: i + 1,
219
+ endLine: block.endLine + 1,
220
+ language,
221
+ docstring,
222
+ })
223
+ currentClass = classMatch[1]
224
+ i = block.endLine + 1
225
+ continue
226
+ }
227
+
228
+ // Check for interface definitions (TypeScript/Java/Go)
229
+ if (pattern.interfacePattern) {
230
+ const interfaceMatch = line.match(pattern.interfacePattern)
231
+ if (interfaceMatch && interfaceMatch[1]) {
232
+ const block = this.extractBlock(lines, i, language)
233
+ blocks.push({
234
+ type: 'interface',
235
+ name: interfaceMatch[1],
236
+ content: block.content,
237
+ startLine: i + 1,
238
+ endLine: block.endLine + 1,
239
+ language,
240
+ docstring,
241
+ })
242
+ i = block.endLine + 1
243
+ continue
244
+ }
245
+ }
246
+
247
+ // Check for type definitions
248
+ if (pattern.typePattern) {
249
+ const typeMatch = line.match(pattern.typePattern)
250
+ if (typeMatch && typeMatch[1]) {
251
+ const block = this.extractBlock(lines, i, language)
252
+ blocks.push({
253
+ type: 'type',
254
+ name: typeMatch[1],
255
+ content: block.content,
256
+ startLine: i + 1,
257
+ endLine: block.endLine + 1,
258
+ language,
259
+ docstring,
260
+ })
261
+ i = block.endLine + 1
262
+ continue
263
+ }
264
+ }
265
+
266
+ // Check for method definitions (inside class)
267
+ if (pattern.methodPattern && currentClass) {
268
+ const methodMatch = line.match(pattern.methodPattern)
269
+ if (methodMatch && methodMatch[1] && (line.startsWith(' ') || line.startsWith('\t'))) {
270
+ const block = this.extractBlock(lines, i, language)
271
+ blocks.push({
272
+ type: 'method',
273
+ name: methodMatch[1],
274
+ content: block.content,
275
+ startLine: i + 1,
276
+ endLine: block.endLine + 1,
277
+ language,
278
+ parent: currentClass,
279
+ docstring,
280
+ })
281
+ i = block.endLine + 1
282
+ continue
283
+ }
284
+ }
285
+
286
+ // Check for function definitions
287
+ const functionMatch = line.match(pattern.functionPattern)
288
+ if (functionMatch && functionMatch[1]) {
289
+ const block = this.extractBlock(lines, i, language)
290
+ blocks.push({
291
+ type: 'function',
292
+ name: functionMatch[1],
293
+ content: block.content,
294
+ startLine: i + 1,
295
+ endLine: block.endLine + 1,
296
+ language,
297
+ docstring,
298
+ })
299
+ currentClass = undefined
300
+ i = block.endLine + 1
301
+ continue
302
+ }
303
+
304
+ // Check for arrow functions and const declarations
305
+ const arrowMatch = line.match(/^(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>/)
306
+ if (arrowMatch && arrowMatch[1]) {
307
+ const block = this.extractBlock(lines, i, language)
308
+ blocks.push({
309
+ type: 'function',
310
+ name: arrowMatch[1],
311
+ content: block.content,
312
+ startLine: i + 1,
313
+ endLine: block.endLine + 1,
314
+ language,
315
+ docstring,
316
+ })
317
+ i = block.endLine + 1
318
+ continue
319
+ }
320
+
321
+ i++
322
+ }
323
+
324
+ return blocks
325
+ }
326
+
327
+ /**
328
+ * Extract a complete code block (handles brace matching)
329
+ */
330
+ private extractBlock(lines: string[], startIndex: number, language: string): { content: string; endLine: number } {
331
+ const isPython = language === 'python'
332
+
333
+ if (isPython) {
334
+ return this.extractPythonBlock(lines, startIndex)
335
+ }
336
+
337
+ // Brace-based languages
338
+ let braceCount = 0
339
+ let started = false
340
+ let endIndex = startIndex
341
+
342
+ for (let i = startIndex; i < lines.length; i++) {
343
+ const line = lines[i] ?? ''
344
+
345
+ for (const char of line) {
346
+ if (char === '{') {
347
+ braceCount++
348
+ started = true
349
+ } else if (char === '}') {
350
+ braceCount--
351
+ }
352
+ }
353
+
354
+ endIndex = i
355
+
356
+ if (started && braceCount === 0) {
357
+ break
358
+ }
359
+ }
360
+
361
+ const content = lines.slice(startIndex, endIndex + 1).join('\n')
362
+ return { content, endLine: endIndex }
363
+ }
364
+
365
+ /**
366
+ * Extract Python block (indentation-based)
367
+ */
368
+ private extractPythonBlock(lines: string[], startIndex: number): { content: string; endLine: number } {
369
+ const startLine = lines[startIndex] ?? ''
370
+ const baseIndent = startLine.match(/^(\s*)/)?.[1]?.length ?? 0
371
+ let endIndex = startIndex
372
+
373
+ for (let i = startIndex + 1; i < lines.length; i++) {
374
+ const line = lines[i] ?? ''
375
+
376
+ // Skip empty lines
377
+ if (line.trim() === '') {
378
+ endIndex = i
379
+ continue
380
+ }
381
+
382
+ const currentIndent = line.match(/^(\s*)/)?.[1]?.length ?? 0
383
+
384
+ // Block ends when we return to same or less indentation
385
+ if (currentIndent <= baseIndent && line.trim() !== '') {
386
+ break
387
+ }
388
+
389
+ endIndex = i
390
+ }
391
+
392
+ const content = lines.slice(startIndex, endIndex + 1).join('\n')
393
+ return { content, endLine: endIndex }
394
+ }
395
+
396
+ /**
397
+ * Extract metadata from code content
398
+ */
399
+ private extractMetadata(content: string, blocks: CodeBlock[], language: string): ExtractionResult['metadata'] {
400
+ const lines = content.split('\n')
401
+ const words = content.split(/\s+/).filter((w) => w.length > 0)
402
+
403
+ const functions = blocks.filter((b) => b.type === 'function')
404
+ const classes = blocks.filter((b) => b.type === 'class')
405
+ const interfaces = blocks.filter((b) => b.type === 'interface')
406
+ const imports = blocks.filter((b) => b.type === 'import')
407
+
408
+ // Detect test file (check for common test patterns in content)
409
+ const isTestFile =
410
+ /describe\s*\(|it\s*\(|test\s*\(/.test(content) ||
411
+ /def\s+test_/.test(content) ||
412
+ /#\[test\]/.test(content) ||
413
+ /assert\s*\(|expect\s*\(/.test(content)
414
+
415
+ return {
416
+ source: 'code',
417
+ language,
418
+ mimeType: this.getMimeType(language),
419
+ wordCount: words.length,
420
+ charCount: content.length,
421
+ lineCount: lines.length,
422
+ functionCount: functions.length,
423
+ classCount: classes.length,
424
+ interfaceCount: interfaces.length,
425
+ importCount: imports.length,
426
+ isTestFile,
427
+ hasDocstrings: blocks.some((b) => b.docstring),
428
+ }
429
+ }
430
+
431
+ /**
432
+ * Get MIME type for language - supports TypeScript, JavaScript, Python, and Go
433
+ */
434
+ private getMimeType(language: string): string {
435
+ const mimeTypes: Record<string, string> = {
436
+ typescript: 'text/typescript',
437
+ javascript: 'text/javascript',
438
+ python: 'text/x-python',
439
+ go: 'text/x-go',
440
+ }
441
+
442
+ return mimeTypes[language] ?? 'text/plain'
443
+ }
444
+
445
+ /**
446
+ * Get supported languages
447
+ */
448
+ getSupportedLanguages(): string[] {
449
+ return Object.keys(this.languages)
450
+ }
451
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Extractors barrel export
3
+ */
4
+
5
+ export { TextExtractor } from './text.extractor.js'
6
+ export { UrlExtractor } from './url.extractor.js'
7
+ export { PdfExtractor } from './pdf.extractor.js'
8
+ export { MarkdownExtractor, type MarkdownSection } from './markdown.extractor.js'
9
+ export { CodeExtractor, type CodeBlock } from './code.extractor.js'