@dependabit/detector 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/LICENSE +21 -0
  3. package/README.md +32 -0
  4. package/dist/detector.d.ts +64 -0
  5. package/dist/detector.d.ts.map +1 -0
  6. package/dist/detector.js +578 -0
  7. package/dist/detector.js.map +1 -0
  8. package/dist/diff-parser.d.ts +53 -0
  9. package/dist/diff-parser.d.ts.map +1 -0
  10. package/dist/diff-parser.js +203 -0
  11. package/dist/diff-parser.js.map +1 -0
  12. package/dist/index.d.ts +14 -0
  13. package/dist/index.d.ts.map +1 -0
  14. package/dist/index.js +9 -0
  15. package/dist/index.js.map +1 -0
  16. package/dist/llm/client.d.ts +65 -0
  17. package/dist/llm/client.d.ts.map +1 -0
  18. package/dist/llm/client.js +12 -0
  19. package/dist/llm/client.js.map +1 -0
  20. package/dist/llm/copilot.d.ts +15 -0
  21. package/dist/llm/copilot.d.ts.map +1 -0
  22. package/dist/llm/copilot.js +119 -0
  23. package/dist/llm/copilot.js.map +1 -0
  24. package/dist/llm/prompts.d.ts +10 -0
  25. package/dist/llm/prompts.d.ts.map +1 -0
  26. package/dist/llm/prompts.js +94 -0
  27. package/dist/llm/prompts.js.map +1 -0
  28. package/dist/parsers/code-comments.d.ts +23 -0
  29. package/dist/parsers/code-comments.d.ts.map +1 -0
  30. package/dist/parsers/code-comments.js +139 -0
  31. package/dist/parsers/code-comments.js.map +1 -0
  32. package/dist/parsers/package-files.d.ts +31 -0
  33. package/dist/parsers/package-files.d.ts.map +1 -0
  34. package/dist/parsers/package-files.js +130 -0
  35. package/dist/parsers/package-files.js.map +1 -0
  36. package/dist/parsers/readme.d.ts +23 -0
  37. package/dist/parsers/readme.d.ts.map +1 -0
  38. package/dist/parsers/readme.js +151 -0
  39. package/dist/parsers/readme.js.map +1 -0
  40. package/package.json +41 -0
  41. package/src/detector.ts +746 -0
  42. package/src/diff-parser.ts +257 -0
  43. package/src/index.ts +43 -0
  44. package/src/llm/client.ts +85 -0
  45. package/src/llm/copilot.ts +147 -0
  46. package/src/llm/prompts.ts +102 -0
  47. package/src/parsers/code-comments.ts +178 -0
  48. package/src/parsers/package-files.ts +156 -0
  49. package/src/parsers/readme.ts +185 -0
  50. package/test/detector.test.ts +102 -0
  51. package/test/diff-parser.test.ts +187 -0
  52. package/test/llm/client.test.ts +31 -0
  53. package/test/llm/copilot.test.ts +55 -0
  54. package/test/parsers/code-comments.test.ts +98 -0
  55. package/test/parsers/package-files.test.ts +52 -0
  56. package/test/parsers/readme.test.ts +52 -0
  57. package/tsconfig.json +10 -0
  58. package/tsconfig.tsbuildinfo +1 -0
@@ -0,0 +1,746 @@
1
+ /**
2
+ * Detector Orchestrator
3
+ * Coordinates content parsers and LLM analysis to detect external dependencies
4
+ */
5
+
6
+ import { readdir, readFile } from 'node:fs/promises';
7
+ import { join, relative, resolve, normalize, sep } from 'node:path';
8
+ import { randomUUID } from 'node:crypto';
9
+ import type { LLMProvider } from './llm/client.js';
10
+ import { createClassificationPrompt } from './llm/prompts.js';
11
+ import { parseReadme } from './parsers/readme.js';
12
+ import { parseCodeComments } from './parsers/code-comments.js';
13
+ import {
14
+ parsePackageJson,
15
+ parseRequirementsTxt,
16
+ parseCargoToml,
17
+ parseGoMod
18
+ } from './parsers/package-files.js';
19
+ import type {
20
+ DependencyEntry,
21
+ DependencyType,
22
+ AccessMethod,
23
+ DetectionMethod
24
+ } from '@dependabit/manifest';
25
+
26
+ export interface DetectorOptions {
27
+ repoPath: string;
28
+ llmProvider: LLMProvider;
29
+ ignorePatterns?: string[];
30
+ }
31
+
32
+ export interface DetectionResult {
33
+ dependencies: DependencyEntry[];
34
+ statistics: {
35
+ filesScanned: number;
36
+ urlsFound: number;
37
+ llmCalls: number;
38
+ totalTokens: number;
39
+ totalLatencyMs: number;
40
+ };
41
+ }
42
+
43
+ const DEFAULT_IGNORE_PATTERNS = [
44
+ 'node_modules',
45
+ '.git',
46
+ 'dist',
47
+ 'build',
48
+ 'target',
49
+ 'vendor',
50
+ '.venv',
51
+ 'venv',
52
+ '__pycache__',
53
+ 'coverage',
54
+ '.next',
55
+ '.nuxt'
56
+ ];
57
+
58
+ /**
59
+ * Main detector class
60
+ */
61
+ export class Detector {
62
+ private options: Required<DetectorOptions>;
63
+
64
+ constructor(options: DetectorOptions) {
65
+ this.options = {
66
+ ...options,
67
+ ignorePatterns: options.ignorePatterns || DEFAULT_IGNORE_PATTERNS
68
+ };
69
+ }
70
+
71
+ /**
72
+ * Detect all external dependencies in the repository
73
+ *
74
+ * Implementation follows a hybrid approach:
75
+ * 1. Programmatic parsing of repository files (README, code comments, package files)
76
+ * 2. LLM analysis only for documents not fully parsed in step 1 (future enhancement)
77
+ * 3. Programmatic type categorization based on URL patterns and context
78
+ * 4. LLM fallback for uncategorized dependencies
79
+ * 5. Programmatic access method determination based on URL patterns
80
+ * 6. LLM fallback for access methods that can't be determined (future enhancement)
81
+ * 7. Manifest entry creation with references and versioning
82
+ */
83
+ async detectDependencies(): Promise<DetectionResult> {
84
+ const allReferences: Map<
85
+ string,
86
+ {
87
+ url: string;
88
+ contexts: Array<{ file: string; line?: number; text: string }>;
89
+ detectionMethod: DetectionMethod;
90
+ }
91
+ > = new Map();
92
+
93
+ let filesScanned = 0;
94
+ let llmCalls = 0;
95
+ let totalTokens = 0;
96
+ let totalLatencyMs = 0;
97
+
98
+ // Step 1: Parse repository for dependencies (programmatic)
99
+ // 1a. Parse README files
100
+ const readmeFiles = await this.findFiles(this.options.repoPath, /^README/i);
101
+ for (const file of readmeFiles) {
102
+ const content = await readFile(file, 'utf-8');
103
+ const references = parseReadme(content, relative(this.options.repoPath, file));
104
+
105
+ for (const ref of references) {
106
+ this.addReference(
107
+ allReferences,
108
+ ref.url,
109
+ {
110
+ file: relative(this.options.repoPath, file),
111
+ ...(ref.line !== undefined && { line: ref.line }),
112
+ text: ref.context
113
+ },
114
+ 'llm-analysis'
115
+ );
116
+ }
117
+
118
+ filesScanned++;
119
+ }
120
+
121
+ // 1b. Parse package files for metadata (NOT dependencies)
122
+ const packageFiles = await this.findPackageFiles(this.options.repoPath);
123
+ for (const file of packageFiles) {
124
+ const content = await readFile(file, 'utf-8');
125
+ const metadata = this.parsePackageFile(file, content);
126
+
127
+ for (const url of [
128
+ ...(metadata.urls || []),
129
+ metadata.repository,
130
+ metadata.homepage,
131
+ metadata.documentation
132
+ ].filter(Boolean)) {
133
+ this.addReference(
134
+ allReferences,
135
+ url!,
136
+ {
137
+ file: relative(this.options.repoPath, file),
138
+ text: 'Package metadata'
139
+ },
140
+ 'package-json'
141
+ );
142
+ }
143
+
144
+ filesScanned++;
145
+ }
146
+
147
+ // 1c. Parse code comments from source files
148
+ const sourceFiles = await this.findSourceFiles(this.options.repoPath);
149
+ for (const file of sourceFiles.slice(0, 50)) {
150
+ // Limit to 50 files for performance
151
+ const content = await readFile(file, 'utf-8');
152
+ const references = parseCodeComments(content, relative(this.options.repoPath, file));
153
+
154
+ for (const ref of references) {
155
+ this.addReference(
156
+ allReferences,
157
+ ref.url,
158
+ {
159
+ file: ref.file,
160
+ line: ref.line,
161
+ text: ref.context
162
+ },
163
+ 'code-comment'
164
+ );
165
+ }
166
+
167
+ filesScanned++;
168
+ }
169
+
170
+ // Step 2: LLM 2nd pass for documents not fully parsed in step 1
171
+ // Analyze README files for dependency context that parsers might have missed
172
+ const llmEnhancedReferences = new Set<string>();
173
+
174
+ for (const file of readmeFiles.slice(0, 5)) {
175
+ // Limit to 5 READMEs for LLM analysis
176
+ try {
177
+ const content = await readFile(file, 'utf-8');
178
+ const relPath = relative(this.options.repoPath, file);
179
+
180
+ // Use LLM to extract additional context from README
181
+ const detectionPrompt = `Analyze this README file and identify any external dependencies or resources that might be referenced but not explicitly linked:
182
+
183
+ File: ${relPath}
184
+ Content:
185
+ ${content.slice(0, 5000)}
186
+
187
+ Identify:
188
+ 1. Documentation sites mentioned but not linked
189
+ 2. Tools or libraries referenced in text
190
+ 3. API services mentioned
191
+ 4. Research papers or specifications cited
192
+
193
+ Return as JSON with "dependencies" array.`;
194
+
195
+ const response = await this.options.llmProvider.analyze(content, detectionPrompt);
196
+ llmCalls++;
197
+ totalTokens += response.usage.totalTokens;
198
+ totalLatencyMs += response.usage.latencyMs;
199
+
200
+ // Add LLM-discovered references
201
+ for (const dep of response.dependencies) {
202
+ if (dep.url && !allReferences.has(dep.url)) {
203
+ llmEnhancedReferences.add(dep.url);
204
+ this.addReference(
205
+ allReferences,
206
+ dep.url,
207
+ {
208
+ file: relPath,
209
+ text: dep.description || 'Discovered by LLM analysis'
210
+ },
211
+ 'llm-analysis'
212
+ );
213
+ }
214
+ }
215
+ } catch (error) {
216
+ console.error(`LLM document analysis failed for ${file}:`, error);
217
+ }
218
+ }
219
+
220
+ // Steps 3-6: Categorize dependencies (programmatic first, LLM fallback)
221
+ const dependencies: DependencyEntry[] = [];
222
+ const now = new Date().toISOString();
223
+
224
+ for (const [url, data] of allReferences) {
225
+ // Prepare context for potential LLM use
226
+ const context = data.contexts
227
+ .map((c) => `${c.file}${c.line ? `:${c.line}` : ''}: ${c.text}`)
228
+ .join('\n');
229
+ const firstContext = data.contexts[0]?.text || '';
230
+
231
+ // Step 3: Try programmatic type categorization
232
+ let type: DependencyType | null = this.determineDependencyType(url, firstContext);
233
+ let typeConfidence = type ? 0.9 : 0.5; // High confidence for programmatic
234
+
235
+ // Step 4: If type couldn't be determined, use LLM fallback
236
+ if (!type) {
237
+ try {
238
+ const classificationPrompt = createClassificationPrompt(url, context);
239
+ const response = await this.options.llmProvider.analyze('', classificationPrompt);
240
+ llmCalls++;
241
+ totalTokens += response.usage.totalTokens;
242
+ totalLatencyMs += response.usage.latencyMs;
243
+
244
+ if (response.dependencies.length > 0) {
245
+ const dep = response.dependencies[0];
246
+ if (dep) {
247
+ type = dep.type as DependencyType;
248
+ typeConfidence = dep.confidence;
249
+ }
250
+ }
251
+ } catch (error) {
252
+ console.error(`LLM classification failed for ${url}:`, error);
253
+ }
254
+ }
255
+
256
+ // Default to 'other' if still not determined
257
+ if (!type) {
258
+ type = 'other';
259
+ typeConfidence = 0.3;
260
+ }
261
+
262
+ // Step 5: Try programmatic access method determination
263
+ let accessMethod: AccessMethod | null = this.determineAccessMethod(url);
264
+
265
+ // Step 6: If access method couldn't be determined, use LLM fallback
266
+ if (!accessMethod) {
267
+ try {
268
+ const accessMethodPrompt = `Determine the best access method for this URL: ${url}
269
+
270
+ Context: ${firstContext}
271
+
272
+ Choose ONE of these access methods:
273
+ - "github-api": For GitHub repositories
274
+ - "arxiv": For arXiv papers
275
+ - "openapi": For API specifications
276
+ - "context7": For Context7 documentation
277
+ - "http": For general web resources
278
+
279
+ Return as JSON: {"accessMethod": "...", "confidence": 0.0-1.0}`;
280
+
281
+ const response = await this.options.llmProvider.analyze('', accessMethodPrompt);
282
+ llmCalls++;
283
+ totalTokens += response.usage.totalTokens;
284
+ totalLatencyMs += response.usage.latencyMs;
285
+
286
+ // Parse LLM response for access method
287
+ const content = response.rawResponse || '{}';
288
+ try {
289
+ const parsed = JSON.parse(content);
290
+ if (parsed.accessMethod) {
291
+ accessMethod = parsed.accessMethod as AccessMethod;
292
+ }
293
+ } catch {
294
+ // If parsing fails, fall back to http
295
+ accessMethod = 'http';
296
+ }
297
+ } catch (error) {
298
+ console.error(`LLM access method determination failed for ${url}:`, error);
299
+ accessMethod = 'http';
300
+ }
301
+ }
302
+
303
+ // Ensure we have a valid access method
304
+ if (!accessMethod) {
305
+ accessMethod = 'http';
306
+ }
307
+
308
+ // Step 7: Create manifest entry with references and versioning
309
+ const entry: DependencyEntry = {
310
+ id: randomUUID(),
311
+ url,
312
+ type,
313
+ accessMethod,
314
+ name: this.extractName(url),
315
+ description: firstContext,
316
+ currentVersion: undefined,
317
+ currentStateHash: '', // Will be populated by monitor
318
+ detectionMethod: data.detectionMethod,
319
+ detectionConfidence: typeConfidence,
320
+ detectedAt: now,
321
+ lastChecked: now,
322
+ auth: undefined,
323
+ monitoring: {
324
+ enabled: true,
325
+ checkFrequency: 'daily',
326
+ ignoreChanges: false
327
+ },
328
+ referencedIn: data.contexts.map((c) => ({
329
+ file: c.file,
330
+ line: c.line,
331
+ context: c.text
332
+ })),
333
+ changeHistory: []
334
+ };
335
+
336
+ dependencies.push(entry);
337
+ }
338
+
339
+ return {
340
+ dependencies,
341
+ statistics: {
342
+ filesScanned,
343
+ urlsFound: allReferences.size,
344
+ llmCalls,
345
+ totalTokens,
346
+ totalLatencyMs
347
+ }
348
+ };
349
+ }
350
+
351
+ private addReference(
352
+ map: Map<string, any>,
353
+ url: string,
354
+ context: { file: string; line?: number; text: string },
355
+ detectionMethod: DetectionMethod
356
+ ): void {
357
+ if (!map.has(url)) {
358
+ map.set(url, {
359
+ url,
360
+ contexts: [],
361
+ detectionMethod
362
+ });
363
+ }
364
+ map.get(url)!.contexts.push(context);
365
+ }
366
+
367
+ /**
368
+ * Programmatically determine access method based on URL patterns
369
+ * Returns null if cannot be determined programmatically
370
+ */
371
+ private determineAccessMethod(url: string): AccessMethod | null {
372
+ // GitHub URLs
373
+ if (url.includes('github.com')) return 'github-api';
374
+
375
+ // arXiv papers
376
+ if (url.includes('arxiv.org')) return 'arxiv';
377
+
378
+ // OpenAPI/Swagger specs
379
+ if (
380
+ url.includes('openapi') ||
381
+ url.includes('swagger') ||
382
+ url.endsWith('.yaml') ||
383
+ url.endsWith('.json') ||
384
+ url.includes('/api/spec') ||
385
+ url.includes('/api-docs')
386
+ ) {
387
+ return 'openapi';
388
+ }
389
+
390
+ // Context7 documentation
391
+ if (url.includes('context7')) return 'context7';
392
+
393
+ // Cannot determine programmatically - needs LLM
394
+ return null;
395
+ }
396
+
397
+ /**
398
+ * Programmatically determine dependency type based on URL patterns and context
399
+ * Returns null if cannot be determined programmatically
400
+ */
401
+ private determineDependencyType(url: string, context: string): DependencyType | null {
402
+ const lowerUrl = url.toLowerCase();
403
+ const lowerContext = context.toLowerCase();
404
+
405
+ // Research papers
406
+ if (
407
+ lowerUrl.includes('arxiv.org') ||
408
+ lowerContext.includes('paper') ||
409
+ lowerContext.includes('research')
410
+ ) {
411
+ return 'research-paper';
412
+ }
413
+
414
+ // Schemas
415
+ if (
416
+ lowerUrl.includes('schema') ||
417
+ lowerUrl.includes('openapi') ||
418
+ lowerUrl.includes('swagger') ||
419
+ lowerUrl.includes('graphql') ||
420
+ lowerUrl.includes('protobuf')
421
+ ) {
422
+ return 'schema';
423
+ }
424
+
425
+ // Documentation
426
+ if (
427
+ lowerUrl.includes('/docs') ||
428
+ lowerUrl.includes('/documentation') ||
429
+ lowerUrl.includes('/guide') ||
430
+ lowerUrl.includes('/tutorial') ||
431
+ lowerUrl.includes('/reference') ||
432
+ lowerContext.includes('documentation') ||
433
+ lowerContext.includes('docs')
434
+ ) {
435
+ return 'documentation';
436
+ }
437
+
438
+ // Reference implementations (GitHub repos)
439
+ if (
440
+ lowerUrl.includes('github.com') &&
441
+ (lowerContext.includes('example') ||
442
+ lowerContext.includes('implementation') ||
443
+ lowerContext.includes('reference'))
444
+ ) {
445
+ return 'reference-implementation';
446
+ }
447
+
448
+ // API examples
449
+ if (
450
+ lowerContext.includes('example') &&
451
+ (lowerContext.includes('api') || lowerContext.includes('endpoint'))
452
+ ) {
453
+ return 'api-example';
454
+ }
455
+
456
+ // Cannot determine programmatically - needs LLM
457
+ return null;
458
+ }
459
+
460
+ private extractName(url: string): string {
461
+ // Extract a reasonable name from URL
462
+ try {
463
+ const urlObj = new URL(url);
464
+ const pathParts = urlObj.pathname.split('/').filter(Boolean);
465
+ if (pathParts.length > 0) {
466
+ const lastPart = pathParts[pathParts.length - 1];
467
+ if (lastPart) {
468
+ return lastPart.replace(/\.[^.]+$/, '');
469
+ }
470
+ }
471
+ return urlObj.hostname;
472
+ } catch {
473
+ return url;
474
+ }
475
+ }
476
+
477
+ private parsePackageFile(
478
+ filePath: string,
479
+ content: string
480
+ ): { urls: string[]; repository?: string; homepage?: string; documentation?: string } {
481
+ const fileName = filePath.split('/').pop() || '';
482
+
483
+ if (fileName === 'package.json') {
484
+ return parsePackageJson(content);
485
+ }
486
+ if (fileName === 'requirements.txt') {
487
+ return parseRequirementsTxt(content);
488
+ }
489
+ if (fileName === 'Cargo.toml') {
490
+ return parseCargoToml(content);
491
+ }
492
+ if (fileName === 'go.mod') {
493
+ return parseGoMod(content);
494
+ }
495
+
496
+ return { urls: [] };
497
+ }
498
+
499
+ private async findFiles(dir: string, pattern: RegExp): Promise<string[]> {
500
+ const files: string[] = [];
501
+
502
+ try {
503
+ const entries = await readdir(dir, { withFileTypes: true });
504
+
505
+ for (const entry of entries) {
506
+ const fullPath = join(dir, entry.name);
507
+
508
+ if (this.shouldIgnore(entry.name)) {
509
+ continue;
510
+ }
511
+
512
+ if (entry.isDirectory()) {
513
+ const subFiles = await this.findFiles(fullPath, pattern);
514
+ files.push(...subFiles);
515
+ } else if (pattern.test(entry.name)) {
516
+ files.push(fullPath);
517
+ }
518
+ }
519
+ } catch {
520
+ // Ignore errors (permission denied, etc.)
521
+ }
522
+
523
+ return files;
524
+ }
525
+
526
+ private async findPackageFiles(dir: string): Promise<string[]> {
527
+ return this.findFiles(dir, /^(package\.json|requirements\.txt|Cargo\.toml|go\.mod)$/);
528
+ }
529
+
530
+ private async findSourceFiles(dir: string): Promise<string[]> {
531
+ return this.findFiles(dir, /\.(ts|js|tsx|jsx|py|rs|go|java|kt|cs|rb|php)$/);
532
+ }
533
+
534
+ private shouldIgnore(name: string): boolean {
535
+ return this.options.ignorePatterns.some((pattern) => name.includes(pattern));
536
+ }
537
+
538
+ /**
539
+ * Analyze only specific files for dependencies (for incremental updates)
540
+ * This is more efficient than full repository scan when only few files changed
541
+ */
542
+ async analyzeFiles(filePaths: string[]): Promise<DetectionResult> {
543
+ const allReferences: Map<
544
+ string,
545
+ {
546
+ url: string;
547
+ contexts: Array<{ file: string; line?: number; text: string }>;
548
+ detectionMethod: DetectionMethod;
549
+ }
550
+ > = new Map();
551
+
552
+ let filesScanned = 0;
553
+ let llmCalls = 0;
554
+ let totalTokens = 0;
555
+ let totalLatencyMs = 0;
556
+
557
+ for (const filePath of filePaths) {
558
+ // Validate that the file path is safe before joining
559
+ const normalizedRepoPath = resolve(normalize(this.options.repoPath));
560
+ const normalizedFilePath = normalize(filePath);
561
+ const fullPath = resolve(normalizedRepoPath, normalizedFilePath);
562
+
563
+ // Ensure the resolved path is within the repository boundaries
564
+ // Use path.sep for cross-platform compatibility
565
+ const repoPathWithSep = normalizedRepoPath.endsWith(sep)
566
+ ? normalizedRepoPath
567
+ : normalizedRepoPath + sep;
568
+
569
+ if (!fullPath.startsWith(repoPathWithSep) && fullPath !== normalizedRepoPath) {
570
+ // Skip files outside the repository to prevent path traversal
571
+ if (process.env['DEBUG']) {
572
+ console.warn(`Skipping file outside repository: ${filePath}`);
573
+ }
574
+ continue;
575
+ }
576
+
577
+ try {
578
+ const content = await readFile(fullPath, 'utf-8');
579
+ const relativePath = relative(this.options.repoPath, fullPath);
580
+ const fileName = filePath.split('/').pop() || '';
581
+
582
+ // Parse based on file type
583
+ if (/^README/i.test(fileName)) {
584
+ // README file
585
+ const references = parseReadme(content, relativePath);
586
+ for (const ref of references) {
587
+ this.addReference(
588
+ allReferences,
589
+ ref.url,
590
+ {
591
+ file: relativePath,
592
+ ...(ref.line !== undefined && { line: ref.line }),
593
+ text: ref.context
594
+ },
595
+ 'llm-analysis'
596
+ );
597
+ }
598
+ } else if (/^(package\.json|requirements\.txt|Cargo\.toml|go\.mod)$/i.test(fileName)) {
599
+ // Package file
600
+ const metadata = this.parsePackageFile(fullPath, content);
601
+ for (const url of [
602
+ ...(metadata.urls || []),
603
+ metadata.repository,
604
+ metadata.homepage,
605
+ metadata.documentation
606
+ ].filter(Boolean)) {
607
+ this.addReference(
608
+ allReferences,
609
+ url!,
610
+ {
611
+ file: relativePath,
612
+ text: 'Package metadata'
613
+ },
614
+ 'package-json'
615
+ );
616
+ }
617
+ } else if (/\.(ts|js|tsx|jsx|py|rs|go|java|kt|cs|rb|php)$/.test(fileName)) {
618
+ // Source file
619
+ const references = parseCodeComments(content, relativePath);
620
+ for (const ref of references) {
621
+ this.addReference(
622
+ allReferences,
623
+ ref.url,
624
+ {
625
+ file: ref.file,
626
+ line: ref.line,
627
+ text: ref.context
628
+ },
629
+ 'code-comment'
630
+ );
631
+ }
632
+ } else if (/\.(md|txt|rst|adoc)$/.test(fileName)) {
633
+ // Documentation file
634
+ const references = parseReadme(content, relativePath);
635
+ for (const ref of references) {
636
+ this.addReference(
637
+ allReferences,
638
+ ref.url,
639
+ {
640
+ file: relativePath,
641
+ ...(ref.line !== undefined && { line: ref.line }),
642
+ text: ref.context
643
+ },
644
+ 'llm-analysis'
645
+ );
646
+ }
647
+ }
648
+
649
+ filesScanned++;
650
+ } catch (error) {
651
+ // Skip files that can't be read - log but don't throw
652
+ // Using console.warn since we don't have a logger instance here
653
+ const message = error instanceof Error ? error.message : String(error);
654
+ console.warn(`Failed to analyze ${filePath}: ${message}`);
655
+ if (process.env['DEBUG']) {
656
+ // Log full error details when DEBUG is enabled
657
+ console.debug('Full error while analyzing %s:', filePath, error);
658
+ }
659
+ }
660
+ }
661
+
662
+ // Create dependency entries
663
+ const dependencies: DependencyEntry[] = [];
664
+
665
+ for (const [url, refData] of allReferences.entries()) {
666
+ const contextText = refData.contexts.map((c) => c.text).join(' ');
667
+
668
+ // Step 3: Programmatic type categorization
669
+ let type = this.determineDependencyType(url, contextText);
670
+
671
+ // Step 4: LLM fallback for type categorization (if needed)
672
+ if (!type && refData.contexts.length > 0) {
673
+ const startTime = Date.now();
674
+ try {
675
+ const prompt = createClassificationPrompt(url, contextText);
676
+ const response = await this.options.llmProvider.analyze('', prompt);
677
+
678
+ llmCalls++;
679
+ totalTokens += response.usage?.totalTokens || 0;
680
+ totalLatencyMs += Date.now() - startTime;
681
+
682
+ // Use rawResponse for classification
683
+ const responseText = (response.rawResponse || '').toLowerCase();
684
+ type = (
685
+ responseText.includes('schema')
686
+ ? 'schema'
687
+ : responseText.includes('documentation')
688
+ ? 'documentation'
689
+ : responseText.includes('research') || responseText.includes('paper')
690
+ ? 'research-paper'
691
+ : responseText.includes('implementation')
692
+ ? 'reference-implementation'
693
+ : responseText.includes('example')
694
+ ? 'api-example'
695
+ : 'other'
696
+ ) as DependencyType;
697
+ } catch {
698
+ type = 'other';
699
+ }
700
+ }
701
+
702
+ if (!type) {
703
+ type = 'other';
704
+ }
705
+
706
+ // Step 5: Programmatic access method determination
707
+ let accessMethod = this.determineAccessMethod(url);
708
+ if (!accessMethod) {
709
+ accessMethod = 'http'; // Default fallback
710
+ }
711
+
712
+ const dependency: DependencyEntry = {
713
+ id: randomUUID(),
714
+ url,
715
+ type,
716
+ accessMethod,
717
+ name: this.extractName(url),
718
+ currentStateHash: `sha256:pending`,
719
+ detectionMethod: refData.detectionMethod,
720
+ detectionConfidence: refData.detectionMethod === 'manual' ? 1.0 : 0.85,
721
+ detectedAt: new Date().toISOString(),
722
+ lastChecked: new Date().toISOString(),
723
+ auth: undefined,
724
+ referencedIn: refData.contexts.map((ctx) => ({
725
+ file: ctx.file,
726
+ line: ctx.line,
727
+ context: ctx.text
728
+ })),
729
+ changeHistory: []
730
+ };
731
+
732
+ dependencies.push(dependency);
733
+ }
734
+
735
+ return {
736
+ dependencies,
737
+ statistics: {
738
+ filesScanned,
739
+ urlsFound: allReferences.size,
740
+ llmCalls,
741
+ totalTokens,
742
+ totalLatencyMs
743
+ }
744
+ };
745
+ }
746
+ }