@dependabit/detector 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/LICENSE +21 -0
- package/README.md +32 -0
- package/dist/detector.d.ts +64 -0
- package/dist/detector.d.ts.map +1 -0
- package/dist/detector.js +578 -0
- package/dist/detector.js.map +1 -0
- package/dist/diff-parser.d.ts +53 -0
- package/dist/diff-parser.d.ts.map +1 -0
- package/dist/diff-parser.js +203 -0
- package/dist/diff-parser.js.map +1 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -0
- package/dist/llm/client.d.ts +65 -0
- package/dist/llm/client.d.ts.map +1 -0
- package/dist/llm/client.js +12 -0
- package/dist/llm/client.js.map +1 -0
- package/dist/llm/copilot.d.ts +15 -0
- package/dist/llm/copilot.d.ts.map +1 -0
- package/dist/llm/copilot.js +119 -0
- package/dist/llm/copilot.js.map +1 -0
- package/dist/llm/prompts.d.ts +10 -0
- package/dist/llm/prompts.d.ts.map +1 -0
- package/dist/llm/prompts.js +94 -0
- package/dist/llm/prompts.js.map +1 -0
- package/dist/parsers/code-comments.d.ts +23 -0
- package/dist/parsers/code-comments.d.ts.map +1 -0
- package/dist/parsers/code-comments.js +139 -0
- package/dist/parsers/code-comments.js.map +1 -0
- package/dist/parsers/package-files.d.ts +31 -0
- package/dist/parsers/package-files.d.ts.map +1 -0
- package/dist/parsers/package-files.js +130 -0
- package/dist/parsers/package-files.js.map +1 -0
- package/dist/parsers/readme.d.ts +23 -0
- package/dist/parsers/readme.d.ts.map +1 -0
- package/dist/parsers/readme.js +151 -0
- package/dist/parsers/readme.js.map +1 -0
- package/package.json +41 -0
- package/src/detector.ts +746 -0
- package/src/diff-parser.ts +257 -0
- package/src/index.ts +43 -0
- package/src/llm/client.ts +85 -0
- package/src/llm/copilot.ts +147 -0
- package/src/llm/prompts.ts +102 -0
- package/src/parsers/code-comments.ts +178 -0
- package/src/parsers/package-files.ts +156 -0
- package/src/parsers/readme.ts +185 -0
- package/test/detector.test.ts +102 -0
- package/test/diff-parser.test.ts +187 -0
- package/test/llm/client.test.ts +31 -0
- package/test/llm/copilot.test.ts +55 -0
- package/test/parsers/code-comments.test.ts +98 -0
- package/test/parsers/package-files.test.ts +52 -0
- package/test/parsers/readme.test.ts +52 -0
- package/tsconfig.json +10 -0
- package/tsconfig.tsbuildinfo +1 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Comment Parser
|
|
3
|
+
* Extracts URLs and references from code comments
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export interface CommentReference {
|
|
7
|
+
url: string;
|
|
8
|
+
context: string;
|
|
9
|
+
file: string;
|
|
10
|
+
line: number;
|
|
11
|
+
commentType: 'single-line' | 'multi-line' | 'jsdoc';
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Parse code files and extract references from comments
|
|
16
|
+
*/
|
|
17
|
+
export function parseCodeComments(content: string, filePath: string): CommentReference[] {
|
|
18
|
+
const references: CommentReference[] = [];
|
|
19
|
+
const extension = getFileExtension(filePath);
|
|
20
|
+
const commentStyle = getCommentStyle(extension);
|
|
21
|
+
|
|
22
|
+
if (!commentStyle) {
|
|
23
|
+
return references; // Unsupported file type
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const lines = content.split('\n');
|
|
27
|
+
let inMultiLineComment = false;
|
|
28
|
+
|
|
29
|
+
for (let i = 0; i < lines.length; i++) {
|
|
30
|
+
const line = lines[i];
|
|
31
|
+
if (!line) continue;
|
|
32
|
+
|
|
33
|
+
const lineNumber = i + 1;
|
|
34
|
+
|
|
35
|
+
// Check for multi-line comment start/end
|
|
36
|
+
if (commentStyle.multiLine) {
|
|
37
|
+
if (line.includes(commentStyle.multiLine.start)) {
|
|
38
|
+
inMultiLineComment = true;
|
|
39
|
+
}
|
|
40
|
+
if (inMultiLineComment) {
|
|
41
|
+
const urls = extractUrls(line);
|
|
42
|
+
for (const url of urls) {
|
|
43
|
+
references.push({
|
|
44
|
+
url,
|
|
45
|
+
context: line.trim(),
|
|
46
|
+
file: filePath,
|
|
47
|
+
line: lineNumber,
|
|
48
|
+
commentType: line.includes('/**') ? 'jsdoc' : 'multi-line'
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
if (line.includes(commentStyle.multiLine.end)) {
|
|
53
|
+
inMultiLineComment = false;
|
|
54
|
+
}
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Check for single-line comments
|
|
59
|
+
if (commentStyle.singleLine) {
|
|
60
|
+
const commentStart = line.indexOf(commentStyle.singleLine);
|
|
61
|
+
if (commentStart !== -1) {
|
|
62
|
+
const comment = line.substring(commentStart);
|
|
63
|
+
const urls = extractUrls(comment);
|
|
64
|
+
for (const url of urls) {
|
|
65
|
+
references.push({
|
|
66
|
+
url,
|
|
67
|
+
context: comment.trim(),
|
|
68
|
+
file: filePath,
|
|
69
|
+
line: lineNumber,
|
|
70
|
+
commentType: 'single-line'
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return references;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function getFileExtension(filePath: string): string {
|
|
81
|
+
const match = filePath.match(/\.([^.]+)$/);
|
|
82
|
+
const ext = match?.[1];
|
|
83
|
+
return ext ? ext.toLowerCase() : '';
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
interface CommentStyle {
|
|
87
|
+
singleLine?: string;
|
|
88
|
+
multiLine?: { start: string; end: string };
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function getCommentStyle(extension: string): CommentStyle | null {
|
|
92
|
+
const styles: Record<string, CommentStyle> = {
|
|
93
|
+
// JavaScript/TypeScript
|
|
94
|
+
js: { singleLine: '//', multiLine: { start: '/*', end: '*/' } },
|
|
95
|
+
ts: { singleLine: '//', multiLine: { start: '/*', end: '*/' } },
|
|
96
|
+
jsx: { singleLine: '//', multiLine: { start: '/*', end: '*/' } },
|
|
97
|
+
tsx: { singleLine: '//', multiLine: { start: '/*', end: '*/' } },
|
|
98
|
+
|
|
99
|
+
// Python
|
|
100
|
+
py: { singleLine: '#' },
|
|
101
|
+
|
|
102
|
+
// Ruby
|
|
103
|
+
rb: { singleLine: '#', multiLine: { start: '=begin', end: '=end' } },
|
|
104
|
+
|
|
105
|
+
// Go
|
|
106
|
+
go: { singleLine: '//', multiLine: { start: '/*', end: '*/' } },
|
|
107
|
+
|
|
108
|
+
// Rust
|
|
109
|
+
rs: { singleLine: '//', multiLine: { start: '/*', end: '*/' } },
|
|
110
|
+
|
|
111
|
+
// C/C++
|
|
112
|
+
c: { singleLine: '//', multiLine: { start: '/*', end: '*/' } },
|
|
113
|
+
cpp: { singleLine: '//', multiLine: { start: '/*', end: '*/' } },
|
|
114
|
+
h: { singleLine: '//', multiLine: { start: '/*', end: '*/' } },
|
|
115
|
+
|
|
116
|
+
// Java/Kotlin
|
|
117
|
+
java: { singleLine: '//', multiLine: { start: '/*', end: '*/' } },
|
|
118
|
+
kt: { singleLine: '//', multiLine: { start: '/*', end: '*/' } },
|
|
119
|
+
|
|
120
|
+
// C#
|
|
121
|
+
cs: { singleLine: '//', multiLine: { start: '/*', end: '*/' } },
|
|
122
|
+
|
|
123
|
+
// PHP
|
|
124
|
+
php: { singleLine: '//', multiLine: { start: '/*', end: '*/' } },
|
|
125
|
+
|
|
126
|
+
// Shell
|
|
127
|
+
sh: { singleLine: '#' },
|
|
128
|
+
bash: { singleLine: '#' },
|
|
129
|
+
|
|
130
|
+
// YAML
|
|
131
|
+
yml: { singleLine: '#' },
|
|
132
|
+
yaml: { singleLine: '#' }
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
return styles[extension] || null;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function extractUrls(text: string): string[] {
|
|
139
|
+
const urls: string[] = [];
|
|
140
|
+
const regex = /https?:\/\/[^\s<>()[\]'"]+/g;
|
|
141
|
+
let match;
|
|
142
|
+
|
|
143
|
+
while ((match = regex.exec(text)) !== null) {
|
|
144
|
+
urls.push(match[0]);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return urls;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Extract specification and RFC references from comments
|
|
152
|
+
*/
|
|
153
|
+
export function extractSpecReferences(content: string): Array<{ spec: string; context: string }> {
|
|
154
|
+
const references: Array<{ spec: string; context: string }> = [];
|
|
155
|
+
const lines = content.split('\n');
|
|
156
|
+
|
|
157
|
+
for (const line of lines) {
|
|
158
|
+
// Match RFC references
|
|
159
|
+
const rfcMatch = /RFC\s*(\d+)/i.exec(line);
|
|
160
|
+
if (rfcMatch) {
|
|
161
|
+
references.push({
|
|
162
|
+
spec: `RFC ${rfcMatch[1]}`,
|
|
163
|
+
context: line.trim()
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Match standard references (ISO, IEEE, etc.)
|
|
168
|
+
const standardMatch = /(ISO|IEEE|ECMA|W3C)[\s-]*(\d+(?:[-.]\d+)*)/i.exec(line);
|
|
169
|
+
if (standardMatch) {
|
|
170
|
+
references.push({
|
|
171
|
+
spec: `${standardMatch[1]} ${standardMatch[2]}`,
|
|
172
|
+
context: line.trim()
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return references;
|
|
178
|
+
}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Package File Parser
|
|
3
|
+
* Extracts metadata and references from package manager files
|
|
4
|
+
* EXCLUDES actual dependencies (handled by dependabot)
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export interface PackageMetadata {
|
|
8
|
+
repository?: string;
|
|
9
|
+
homepage?: string;
|
|
10
|
+
documentation?: string;
|
|
11
|
+
urls: string[]; // URLs found in descriptions, etc.
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Parse package.json and extract metadata URLs (NOT dependencies)
|
|
16
|
+
*/
|
|
17
|
+
export function parsePackageJson(content: string): PackageMetadata {
|
|
18
|
+
try {
|
|
19
|
+
const pkg = JSON.parse(content);
|
|
20
|
+
const urls: string[] = [];
|
|
21
|
+
|
|
22
|
+
// Extract repository URL
|
|
23
|
+
let repository: string | undefined = undefined;
|
|
24
|
+
if (typeof pkg.repository === 'string') {
|
|
25
|
+
repository = pkg.repository;
|
|
26
|
+
} else if (pkg.repository && pkg.repository.url) {
|
|
27
|
+
repository = pkg.repository.url;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Extract homepage
|
|
31
|
+
const homepage: string | undefined = pkg.homepage;
|
|
32
|
+
|
|
33
|
+
// Extract documentation (not standard but sometimes present)
|
|
34
|
+
const documentation: string | undefined = pkg.documentation || pkg.docs;
|
|
35
|
+
|
|
36
|
+
// Extract URLs from description
|
|
37
|
+
if (pkg.description) {
|
|
38
|
+
const descUrls = extractUrls(pkg.description);
|
|
39
|
+
urls.push(...descUrls);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Note: We DO NOT extract dependencies/devDependencies
|
|
43
|
+
// Those are handled by dependabot
|
|
44
|
+
|
|
45
|
+
return {
|
|
46
|
+
...(repository !== undefined && { repository }),
|
|
47
|
+
...(homepage !== undefined && { homepage }),
|
|
48
|
+
...(documentation !== undefined && { documentation }),
|
|
49
|
+
urls
|
|
50
|
+
};
|
|
51
|
+
} catch {
|
|
52
|
+
return { urls: [] };
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Parse requirements.txt and extract URLs from comments
|
|
58
|
+
* EXCLUDES actual packages (handled by dependabot)
|
|
59
|
+
*/
|
|
60
|
+
export function parseRequirementsTxt(content: string): PackageMetadata {
|
|
61
|
+
const urls: string[] = [];
|
|
62
|
+
const lines = content.split('\n');
|
|
63
|
+
|
|
64
|
+
for (const line of lines) {
|
|
65
|
+
// Only extract URLs from comments
|
|
66
|
+
if (line.trim().startsWith('#')) {
|
|
67
|
+
const commentUrls = extractUrls(line);
|
|
68
|
+
urls.push(...commentUrls);
|
|
69
|
+
}
|
|
70
|
+
// Skip actual package lines - dependabot handles those
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return { urls };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Parse Cargo.toml and extract metadata URLs
|
|
78
|
+
* EXCLUDES actual dependencies (handled by dependabot)
|
|
79
|
+
*/
|
|
80
|
+
export function parseCargoToml(content: string): PackageMetadata {
|
|
81
|
+
const urls: string[] = [];
|
|
82
|
+
let repository: string | undefined = undefined;
|
|
83
|
+
let homepage: string | undefined = undefined;
|
|
84
|
+
let documentation: string | undefined = undefined;
|
|
85
|
+
|
|
86
|
+
const lines = content.split('\n');
|
|
87
|
+
let inPackageSection = false;
|
|
88
|
+
|
|
89
|
+
for (const line of lines) {
|
|
90
|
+
if (line.trim() === '[package]') {
|
|
91
|
+
inPackageSection = true;
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (line.trim().startsWith('[') && line.trim() !== '[package]') {
|
|
96
|
+
inPackageSection = false;
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if (inPackageSection) {
|
|
101
|
+
const repoMatch = /repository\s*=\s*"([^"]+)"/.exec(line);
|
|
102
|
+
if (repoMatch && repoMatch[1]) repository = repoMatch[1];
|
|
103
|
+
|
|
104
|
+
const homepageMatch = /homepage\s*=\s*"([^"]+)"/.exec(line);
|
|
105
|
+
if (homepageMatch && homepageMatch[1]) homepage = homepageMatch[1];
|
|
106
|
+
|
|
107
|
+
const docMatch = /documentation\s*=\s*"([^"]+)"/.exec(line);
|
|
108
|
+
if (docMatch && docMatch[1]) documentation = docMatch[1];
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Extract URLs from comments
|
|
112
|
+
if (line.trim().startsWith('#')) {
|
|
113
|
+
const commentUrls = extractUrls(line);
|
|
114
|
+
urls.push(...commentUrls);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return {
|
|
119
|
+
...(repository !== undefined && { repository }),
|
|
120
|
+
...(homepage !== undefined && { homepage }),
|
|
121
|
+
...(documentation !== undefined && { documentation }),
|
|
122
|
+
urls
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Parse go.mod and extract URLs from comments
|
|
128
|
+
* EXCLUDES actual dependencies (handled by dependabot)
|
|
129
|
+
*/
|
|
130
|
+
export function parseGoMod(content: string): PackageMetadata {
|
|
131
|
+
const urls: string[] = [];
|
|
132
|
+
const lines = content.split('\n');
|
|
133
|
+
|
|
134
|
+
for (const line of lines) {
|
|
135
|
+
// Only extract URLs from comments
|
|
136
|
+
if (line.trim().startsWith('//')) {
|
|
137
|
+
const commentUrls = extractUrls(line);
|
|
138
|
+
urls.push(...commentUrls);
|
|
139
|
+
}
|
|
140
|
+
// Skip actual require lines - dependabot handles those
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return { urls };
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function extractUrls(text: string): string[] {
|
|
147
|
+
const urls: string[] = [];
|
|
148
|
+
const regex = /https?:\/\/[^\s<>()[\]'"]+/g;
|
|
149
|
+
let match;
|
|
150
|
+
|
|
151
|
+
while ((match = regex.exec(text)) !== null) {
|
|
152
|
+
urls.push(match[0]);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return urls;
|
|
156
|
+
}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* README Parser
|
|
3
|
+
* Extracts URLs and references from README and markdown files
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export interface ExtractedReference {
|
|
7
|
+
url: string;
|
|
8
|
+
context: string; // Surrounding text
|
|
9
|
+
line?: number;
|
|
10
|
+
type: 'markdown-link' | 'bare-url' | 'reference-link';
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
// Patterns to skip (package managers, CI badges, shields.io)
|
|
14
|
+
const SKIP_PATTERNS = [
|
|
15
|
+
/npmjs\.com\/package/,
|
|
16
|
+
/pypi\.org\/project/,
|
|
17
|
+
/crates\.io\/crates/,
|
|
18
|
+
/rubygems\.org\/gems/,
|
|
19
|
+
/packagist\.org\/packages/,
|
|
20
|
+
/shields\.io/,
|
|
21
|
+
/badge(s)?\..*\.svg/,
|
|
22
|
+
/travis-ci\.(org|com)/,
|
|
23
|
+
/circleci\.com/,
|
|
24
|
+
/github\.com\/.*\/actions/ // GitHub Actions badges
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Parse README content and extract external references
|
|
29
|
+
*/
|
|
30
|
+
export function parseReadme(content: string, filePath = 'README.md'): ExtractedReference[] {
|
|
31
|
+
const references: ExtractedReference[] = [];
|
|
32
|
+
const lines = content.split('\n');
|
|
33
|
+
|
|
34
|
+
for (let i = 0; i < lines.length; i++) {
|
|
35
|
+
const line = lines[i];
|
|
36
|
+
if (!line) continue; // Skip undefined or empty lines
|
|
37
|
+
|
|
38
|
+
const lineNumber = i + 1;
|
|
39
|
+
|
|
40
|
+
// Extract markdown links [text](url)
|
|
41
|
+
const markdownLinks = extractMarkdownLinks(line);
|
|
42
|
+
for (const { url, text } of markdownLinks) {
|
|
43
|
+
if (!shouldSkipUrl(url)) {
|
|
44
|
+
references.push({
|
|
45
|
+
url,
|
|
46
|
+
context: text || line.trim(),
|
|
47
|
+
line: lineNumber,
|
|
48
|
+
type: 'markdown-link'
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Extract reference-style links [text]: url
|
|
54
|
+
const referenceLinks = extractReferenceLinks(line);
|
|
55
|
+
for (const { url, text } of referenceLinks) {
|
|
56
|
+
if (!shouldSkipUrl(url)) {
|
|
57
|
+
references.push({
|
|
58
|
+
url,
|
|
59
|
+
context: text || line.trim(),
|
|
60
|
+
line: lineNumber,
|
|
61
|
+
type: 'reference-link'
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Extract bare URLs
|
|
67
|
+
const bareUrls = extractBareUrls(line);
|
|
68
|
+
for (const url of bareUrls) {
|
|
69
|
+
if (!shouldSkipUrl(url)) {
|
|
70
|
+
references.push({
|
|
71
|
+
url,
|
|
72
|
+
context: line.trim(),
|
|
73
|
+
line: lineNumber,
|
|
74
|
+
type: 'bare-url'
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Deduplicate by URL
|
|
81
|
+
return deduplicateReferences(references);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function extractMarkdownLinks(line: string): Array<{ url: string; text: string }> {
|
|
85
|
+
const links: Array<{ url: string; text: string }> = [];
|
|
86
|
+
const regex = /\[([^\]]+)\]\(([^)]+)\)/g;
|
|
87
|
+
let match;
|
|
88
|
+
|
|
89
|
+
while ((match = regex.exec(line)) !== null) {
|
|
90
|
+
const text = match[1];
|
|
91
|
+
const url = match[2];
|
|
92
|
+
if (text !== undefined && url !== undefined) {
|
|
93
|
+
links.push({ text, url });
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return links;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function extractReferenceLinks(line: string): Array<{ url: string; text: string }> {
|
|
101
|
+
const links: Array<{ url: string; text: string }> = [];
|
|
102
|
+
const regex = /^\[([^\]]+)\]:\s+(.+)$/;
|
|
103
|
+
const match = regex.exec(line);
|
|
104
|
+
|
|
105
|
+
if (match) {
|
|
106
|
+
const text = match[1];
|
|
107
|
+
const url = match[2];
|
|
108
|
+
if (text !== undefined && url !== undefined) {
|
|
109
|
+
links.push({ text, url });
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return links;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function extractBareUrls(line: string): string[] {
|
|
117
|
+
const urls: string[] = [];
|
|
118
|
+
const regex = /https?:\/\/[^\s<>()[\]]+/g;
|
|
119
|
+
let match;
|
|
120
|
+
|
|
121
|
+
while ((match = regex.exec(line)) !== null) {
|
|
122
|
+
urls.push(match[0]);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return urls;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function shouldSkipUrl(url: string): boolean {
|
|
129
|
+
return SKIP_PATTERNS.some((pattern) => pattern.test(url));
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function deduplicateReferences(references: ExtractedReference[]): ExtractedReference[] {
|
|
133
|
+
const seen = new Set<string>();
|
|
134
|
+
return references.filter((ref) => {
|
|
135
|
+
if (seen.has(ref.url)) {
|
|
136
|
+
return false;
|
|
137
|
+
}
|
|
138
|
+
seen.add(ref.url);
|
|
139
|
+
return true;
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Extract GitHub repository mentions (owner/repo format)
|
|
145
|
+
*/
|
|
146
|
+
export function extractGitHubReferences(
|
|
147
|
+
content: string
|
|
148
|
+
): Array<{ owner: string; repo: string; context: string }> {
|
|
149
|
+
const references: Array<{ owner: string; repo: string; context: string }> = [];
|
|
150
|
+
const lines = content.split('\n');
|
|
151
|
+
|
|
152
|
+
for (const line of lines) {
|
|
153
|
+
// Match owner/repo pattern not in URLs, with basic length and context constraints
|
|
154
|
+
const regex =
|
|
155
|
+
/(?<!https?:\/\/github\.com\/)(?:^|[\s(])([a-zA-Z0-9_-]{2,}\/[a-zA-Z0-9_.-]{2,})(?=$|[\s),.;])/g;
|
|
156
|
+
let match;
|
|
157
|
+
|
|
158
|
+
while ((match = regex.exec(line)) !== null) {
|
|
159
|
+
const ownerRepo = match[1];
|
|
160
|
+
if (ownerRepo) {
|
|
161
|
+
const parts = ownerRepo.split('/');
|
|
162
|
+
const owner = parts[0];
|
|
163
|
+
const repo = parts[1];
|
|
164
|
+
|
|
165
|
+
if (
|
|
166
|
+
owner &&
|
|
167
|
+
repo &&
|
|
168
|
+
owner.length >= 2 &&
|
|
169
|
+
repo.length >= 2 &&
|
|
170
|
+
owner !== 'owner' &&
|
|
171
|
+
repo !== 'repo' &&
|
|
172
|
+
!(/^\d+$/.test(owner) && /^\d+$/.test(repo))
|
|
173
|
+
) {
|
|
174
|
+
references.push({
|
|
175
|
+
owner,
|
|
176
|
+
repo,
|
|
177
|
+
context: line.trim()
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return references;
|
|
185
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
|
2
|
+
|
|
3
|
+
describe('Detector', () => {
|
|
4
|
+
beforeEach(() => {
|
|
5
|
+
vi.clearAllMocks();
|
|
6
|
+
});
|
|
7
|
+
|
|
8
|
+
describe('detectDependencies', () => {
|
|
9
|
+
it('should orchestrate all parsers', async () => {
|
|
10
|
+
// Expected: Call README parser, code comment parser, package file parser
|
|
11
|
+
expect(true).toBe(true);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it('should aggregate results from all parsers', async () => {
|
|
15
|
+
// Expected: Combine results from multiple parsers
|
|
16
|
+
expect(true).toBe(true);
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it('should send aggregated content to LLM for analysis', async () => {
|
|
20
|
+
// Expected: Pass extracted content to LLM provider
|
|
21
|
+
expect(true).toBe(true);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('should deduplicate dependencies by URL', async () => {
|
|
25
|
+
// Expected: Same URL from multiple sources = one dependency
|
|
26
|
+
expect(true).toBe(true);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it('should calculate confidence scores', async () => {
|
|
30
|
+
// Expected: LLM confidence * detection method weight
|
|
31
|
+
expect(true).toBe(true);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('should generate UUIDs for each dependency', async () => {
|
|
35
|
+
expect(true).toBe(true);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it('should include detection metadata', async () => {
|
|
39
|
+
// Expected: detectionMethod, detectedAt, referencedIn
|
|
40
|
+
expect(true).toBe(true);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it('should handle empty repository', async () => {
|
|
44
|
+
// Expected: Return empty dependencies array
|
|
45
|
+
expect(true).toBe(true);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it('should handle LLM failures gracefully', async () => {
|
|
49
|
+
// Expected: Continue with parser results, log error
|
|
50
|
+
expect(true).toBe(true);
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
describe('classifyDependency', () => {
|
|
55
|
+
it('should classify GitHub URLs as repository', () => {
|
|
56
|
+
const url = 'https://github.com/owner/repo';
|
|
57
|
+
expect(url).toContain('github.com');
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it('should classify arXiv URLs as research-paper', () => {
|
|
61
|
+
const url = 'https://arxiv.org/abs/1706.03762';
|
|
62
|
+
expect(url).toContain('arxiv.org');
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it('should classify OpenAPI specs as schema', () => {
|
|
66
|
+
const url = 'https://api.example.com/openapi.yaml';
|
|
67
|
+
expect(url).toContain('openapi');
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it('should classify documentation sites as documentation', () => {
|
|
71
|
+
const url = 'https://docs.example.com/guide';
|
|
72
|
+
expect(url).toContain('docs.');
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it('should use LLM for ambiguous URLs', () => {
|
|
76
|
+
const url = 'https://example.com/some-resource';
|
|
77
|
+
expect(typeof url).toBe('string');
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
describe('determineAccessMethod', () => {
|
|
82
|
+
it('should use github-api for GitHub URLs', () => {
|
|
83
|
+
const url = 'https://github.com/owner/repo';
|
|
84
|
+
expect(url).toContain('github.com');
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it('should use arxiv for arXiv URLs', () => {
|
|
88
|
+
const url = 'https://arxiv.org/abs/1234.5678';
|
|
89
|
+
expect(url).toContain('arxiv.org');
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it('should use openapi for OpenAPI spec URLs', () => {
|
|
93
|
+
const url = 'https://api.example.com/openapi.json';
|
|
94
|
+
expect(url).toContain('openapi');
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it('should use http as fallback', () => {
|
|
98
|
+
const url = 'https://example.com/docs';
|
|
99
|
+
expect(url).toContain('http');
|
|
100
|
+
});
|
|
101
|
+
});
|
|
102
|
+
});
|