doc-freshness-checker 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +305 -0
- package/dist/cache/cacheManager.d.ts +42 -0
- package/dist/cache/cacheManager.js +138 -0
- package/dist/cache/cacheManager.js.map +1 -0
- package/dist/cache/cacheManager.test.d.ts +1 -0
- package/dist/cache/cacheManager.test.js +142 -0
- package/dist/cache/cacheManager.test.js.map +1 -0
- package/dist/cli.d.ts +32 -0
- package/dist/cli.js +137 -0
- package/dist/cli.js.map +1 -0
- package/dist/cli.test.d.ts +1 -0
- package/dist/cli.test.js +184 -0
- package/dist/cli.test.js.map +1 -0
- package/dist/config/defaults.d.ts +5 -0
- package/dist/config/defaults.js +135 -0
- package/dist/config/defaults.js.map +1 -0
- package/dist/config/defineConfig.d.ts +28 -0
- package/dist/config/defineConfig.js +30 -0
- package/dist/config/defineConfig.js.map +1 -0
- package/dist/config/defineConfig.test.d.ts +1 -0
- package/dist/config/defineConfig.test.js +10 -0
- package/dist/config/defineConfig.test.js.map +1 -0
- package/dist/config/loader.d.ts +7 -0
- package/dist/config/loader.js +250 -0
- package/dist/config/loader.js.map +1 -0
- package/dist/config/loader.test.d.ts +1 -0
- package/dist/config/loader.test.js +276 -0
- package/dist/config/loader.test.js.map +1 -0
- package/dist/git/changeTracker.d.ts +44 -0
- package/dist/git/changeTracker.js +149 -0
- package/dist/git/changeTracker.js.map +1 -0
- package/dist/git/changeTracker.test.d.ts +1 -0
- package/dist/git/changeTracker.test.js +184 -0
- package/dist/git/changeTracker.test.js.map +1 -0
- package/dist/graph/codeDocGraph.d.ts +43 -0
- package/dist/graph/codeDocGraph.js +103 -0
- package/dist/graph/codeDocGraph.js.map +1 -0
- package/dist/graph/codeDocGraph.test.d.ts +1 -0
- package/dist/graph/codeDocGraph.test.js +78 -0
- package/dist/graph/codeDocGraph.test.js.map +1 -0
- package/dist/graph/graphBuilder.d.ts +17 -0
- package/dist/graph/graphBuilder.js +76 -0
- package/dist/graph/graphBuilder.js.map +1 -0
- package/dist/graph/graphBuilder.test.d.ts +1 -0
- package/dist/graph/graphBuilder.test.js +87 -0
- package/dist/graph/graphBuilder.test.js.map +1 -0
- package/dist/index.d.ts +37 -0
- package/dist/index.js +37 -0
- package/dist/index.js.map +1 -0
- package/dist/parsers/documentParser.d.ts +22 -0
- package/dist/parsers/documentParser.js +76 -0
- package/dist/parsers/documentParser.js.map +1 -0
- package/dist/parsers/documentParser.test.d.ts +1 -0
- package/dist/parsers/documentParser.test.js +116 -0
- package/dist/parsers/documentParser.test.js.map +1 -0
- package/dist/parsers/extractors/baseExtractor.d.ts +19 -0
- package/dist/parsers/extractors/baseExtractor.js +33 -0
- package/dist/parsers/extractors/baseExtractor.js.map +1 -0
- package/dist/parsers/extractors/baseExtractor.test.d.ts +1 -0
- package/dist/parsers/extractors/baseExtractor.test.js +43 -0
- package/dist/parsers/extractors/baseExtractor.test.js.map +1 -0
- package/dist/parsers/extractors/codePatternExtractor.d.ts +13 -0
- package/dist/parsers/extractors/codePatternExtractor.js +108 -0
- package/dist/parsers/extractors/codePatternExtractor.js.map +1 -0
- package/dist/parsers/extractors/codePatternExtractor.test.d.ts +1 -0
- package/dist/parsers/extractors/codePatternExtractor.test.js +49 -0
- package/dist/parsers/extractors/codePatternExtractor.test.js.map +1 -0
- package/dist/parsers/extractors/dependencyExtractor.d.ts +12 -0
- package/dist/parsers/extractors/dependencyExtractor.js +92 -0
- package/dist/parsers/extractors/dependencyExtractor.js.map +1 -0
- package/dist/parsers/extractors/dependencyExtractor.test.d.ts +1 -0
- package/dist/parsers/extractors/dependencyExtractor.test.js +48 -0
- package/dist/parsers/extractors/dependencyExtractor.test.js.map +1 -0
- package/dist/parsers/extractors/directoryStructureExtractor.d.ts +34 -0
- package/dist/parsers/extractors/directoryStructureExtractor.js +168 -0
- package/dist/parsers/extractors/directoryStructureExtractor.js.map +1 -0
- package/dist/parsers/extractors/directoryStructureExtractor.test.d.ts +1 -0
- package/dist/parsers/extractors/directoryStructureExtractor.test.js +121 -0
- package/dist/parsers/extractors/directoryStructureExtractor.test.js.map +1 -0
- package/dist/parsers/extractors/externalUrlExtractor.d.ts +14 -0
- package/dist/parsers/extractors/externalUrlExtractor.js +53 -0
- package/dist/parsers/extractors/externalUrlExtractor.js.map +1 -0
- package/dist/parsers/extractors/externalUrlExtractor.test.d.ts +1 -0
- package/dist/parsers/extractors/externalUrlExtractor.test.js +85 -0
- package/dist/parsers/extractors/externalUrlExtractor.test.js.map +1 -0
- package/dist/parsers/extractors/filePathExtractor.d.ts +18 -0
- package/dist/parsers/extractors/filePathExtractor.js +72 -0
- package/dist/parsers/extractors/filePathExtractor.js.map +1 -0
- package/dist/parsers/extractors/filePathExtractor.test.d.ts +1 -0
- package/dist/parsers/extractors/filePathExtractor.test.js +73 -0
- package/dist/parsers/extractors/filePathExtractor.test.js.map +1 -0
- package/dist/parsers/extractors/versionExtractor.d.ts +11 -0
- package/dist/parsers/extractors/versionExtractor.js +74 -0
- package/dist/parsers/extractors/versionExtractor.js.map +1 -0
- package/dist/parsers/extractors/versionExtractor.test.d.ts +1 -0
- package/dist/parsers/extractors/versionExtractor.test.js +55 -0
- package/dist/parsers/extractors/versionExtractor.test.js.map +1 -0
- package/dist/plugins/plugin.d.ts +32 -0
- package/dist/plugins/plugin.js +40 -0
- package/dist/plugins/plugin.js.map +1 -0
- package/dist/plugins/plugin.test.d.ts +1 -0
- package/dist/plugins/plugin.test.js +23 -0
- package/dist/plugins/plugin.test.js.map +1 -0
- package/dist/reporters/consoleReporter.d.ts +15 -0
- package/dist/reporters/consoleReporter.js +73 -0
- package/dist/reporters/consoleReporter.js.map +1 -0
- package/dist/reporters/consoleReporter.test.d.ts +1 -0
- package/dist/reporters/consoleReporter.test.js +155 -0
- package/dist/reporters/consoleReporter.test.js.map +1 -0
- package/dist/reporters/enhancedReporter.d.ts +12 -0
- package/dist/reporters/enhancedReporter.js +81 -0
- package/dist/reporters/enhancedReporter.js.map +1 -0
- package/dist/reporters/enhancedReporter.test.d.ts +1 -0
- package/dist/reporters/enhancedReporter.test.js +152 -0
- package/dist/reporters/enhancedReporter.test.js.map +1 -0
- package/dist/reporters/jsonReporter.d.ts +11 -0
- package/dist/reporters/jsonReporter.js +20 -0
- package/dist/reporters/jsonReporter.js.map +1 -0
- package/dist/reporters/jsonReporter.test.d.ts +1 -0
- package/dist/reporters/jsonReporter.test.js +31 -0
- package/dist/reporters/jsonReporter.test.js.map +1 -0
- package/dist/reporters/markdownReporter.d.ts +11 -0
- package/dist/reporters/markdownReporter.js +55 -0
- package/dist/reporters/markdownReporter.js.map +1 -0
- package/dist/reporters/markdownReporter.test.d.ts +1 -0
- package/dist/reporters/markdownReporter.test.js +136 -0
- package/dist/reporters/markdownReporter.test.js.map +1 -0
- package/dist/runner.d.ts +9 -0
- package/dist/runner.js +265 -0
- package/dist/runner.js.map +1 -0
- package/dist/runner.test.d.ts +1 -0
- package/dist/runner.test.js +353 -0
- package/dist/runner.test.js.map +1 -0
- package/dist/scoring/freshnessScorer.d.ts +40 -0
- package/dist/scoring/freshnessScorer.js +170 -0
- package/dist/scoring/freshnessScorer.js.map +1 -0
- package/dist/scoring/freshnessScorer.test.d.ts +1 -0
- package/dist/scoring/freshnessScorer.test.js +397 -0
- package/dist/scoring/freshnessScorer.test.js.map +1 -0
- package/dist/semantic/vectorSearch.d.ts +84 -0
- package/dist/semantic/vectorSearch.js +484 -0
- package/dist/semantic/vectorSearch.js.map +1 -0
- package/dist/semantic/vectorSearch.test.d.ts +1 -0
- package/dist/semantic/vectorSearch.test.js +660 -0
- package/dist/semantic/vectorSearch.test.js.map +1 -0
- package/dist/setupTests.d.ts +4 -0
- package/dist/setupTests.js +11 -0
- package/dist/setupTests.js.map +1 -0
- package/dist/test-utils/console.d.ts +2 -0
- package/dist/test-utils/console.js +3 -0
- package/dist/test-utils/console.js.map +1 -0
- package/dist/test-utils/factories.d.ts +3 -0
- package/dist/test-utils/factories.js +25 -0
- package/dist/test-utils/factories.js.map +1 -0
- package/dist/test-utils/tempFiles.d.ts +1 -0
- package/dist/test-utils/tempFiles.js +12 -0
- package/dist/test-utils/tempFiles.js.map +1 -0
- package/dist/types.d.ts +304 -0
- package/dist/types.js +5 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/boundedMap.d.ts +8 -0
- package/dist/utils/boundedMap.js +22 -0
- package/dist/utils/boundedMap.js.map +1 -0
- package/dist/utils/boundedMap.test.d.ts +1 -0
- package/dist/utils/boundedMap.test.js +57 -0
- package/dist/utils/boundedMap.test.js.map +1 -0
- package/dist/utils/illustrativePatterns.d.ts +28 -0
- package/dist/utils/illustrativePatterns.js +80 -0
- package/dist/utils/illustrativePatterns.js.map +1 -0
- package/dist/utils/illustrativePatterns.test.d.ts +1 -0
- package/dist/utils/illustrativePatterns.test.js +48 -0
- package/dist/utils/illustrativePatterns.test.js.map +1 -0
- package/dist/utils/incremental.d.ts +36 -0
- package/dist/utils/incremental.js +87 -0
- package/dist/utils/incremental.js.map +1 -0
- package/dist/utils/incremental.test.d.ts +1 -0
- package/dist/utils/incremental.test.js +84 -0
- package/dist/utils/incremental.test.js.map +1 -0
- package/dist/utils/parallel.d.ts +14 -0
- package/dist/utils/parallel.js +43 -0
- package/dist/utils/parallel.js.map +1 -0
- package/dist/utils/parallel.test.d.ts +1 -0
- package/dist/utils/parallel.test.js +48 -0
- package/dist/utils/parallel.test.js.map +1 -0
- package/dist/utils/pathSecurity.d.ts +12 -0
- package/dist/utils/pathSecurity.js +22 -0
- package/dist/utils/pathSecurity.js.map +1 -0
- package/dist/utils/pathSecurity.test.d.ts +1 -0
- package/dist/utils/pathSecurity.test.js +34 -0
- package/dist/utils/pathSecurity.test.js.map +1 -0
- package/dist/utils/similarity.d.ts +12 -0
- package/dist/utils/similarity.js +64 -0
- package/dist/utils/similarity.js.map +1 -0
- package/dist/utils/similarity.test.d.ts +1 -0
- package/dist/utils/similarity.test.js +49 -0
- package/dist/utils/similarity.test.js.map +1 -0
- package/dist/utils/validation.d.ts +13 -0
- package/dist/utils/validation.js +24 -0
- package/dist/utils/validation.js.map +1 -0
- package/dist/utils/validation.test.d.ts +1 -0
- package/dist/utils/validation.test.js +28 -0
- package/dist/utils/validation.test.js.map +1 -0
- package/dist/validators/codePatternValidator.d.ts +28 -0
- package/dist/validators/codePatternValidator.js +200 -0
- package/dist/validators/codePatternValidator.js.map +1 -0
- package/dist/validators/codePatternValidator.test.d.ts +1 -0
- package/dist/validators/codePatternValidator.test.js +86 -0
- package/dist/validators/codePatternValidator.test.js.map +1 -0
- package/dist/validators/dependencyValidator.d.ts +12 -0
- package/dist/validators/dependencyValidator.js +102 -0
- package/dist/validators/dependencyValidator.js.map +1 -0
- package/dist/validators/dependencyValidator.test.d.ts +1 -0
- package/dist/validators/dependencyValidator.test.js +179 -0
- package/dist/validators/dependencyValidator.test.js.map +1 -0
- package/dist/validators/directoryValidator.d.ts +30 -0
- package/dist/validators/directoryValidator.js +192 -0
- package/dist/validators/directoryValidator.js.map +1 -0
- package/dist/validators/directoryValidator.test.d.ts +1 -0
- package/dist/validators/directoryValidator.test.js +193 -0
- package/dist/validators/directoryValidator.test.js.map +1 -0
- package/dist/validators/fileValidator.d.ts +16 -0
- package/dist/validators/fileValidator.js +114 -0
- package/dist/validators/fileValidator.js.map +1 -0
- package/dist/validators/fileValidator.test.d.ts +1 -0
- package/dist/validators/fileValidator.test.js +108 -0
- package/dist/validators/fileValidator.test.js.map +1 -0
- package/dist/validators/urlValidator.d.ts +25 -0
- package/dist/validators/urlValidator.js +320 -0
- package/dist/validators/urlValidator.js.map +1 -0
- package/dist/validators/urlValidator.test.d.ts +1 -0
- package/dist/validators/urlValidator.test.js +252 -0
- package/dist/validators/urlValidator.test.js.map +1 -0
- package/dist/validators/validationEngine.d.ts +23 -0
- package/dist/validators/validationEngine.js +117 -0
- package/dist/validators/validationEngine.js.map +1 -0
- package/dist/validators/validationEngine.test.d.ts +1 -0
- package/dist/validators/validationEngine.test.js +82 -0
- package/dist/validators/validationEngine.test.js.map +1 -0
- package/dist/validators/versionValidator.d.ts +18 -0
- package/dist/validators/versionValidator.js +211 -0
- package/dist/validators/versionValidator.js.map +1 -0
- package/dist/validators/versionValidator.test.d.ts +1 -0
- package/dist/validators/versionValidator.test.js +308 -0
- package/dist/validators/versionValidator.test.js.map +1 -0
- package/package.json +98 -0
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { DirectoryStructureExtractor } from './directoryStructureExtractor.js';
|
|
2
|
+
function makeDoc(content) {
|
|
3
|
+
return {
|
|
4
|
+
path: 'docs/test.md',
|
|
5
|
+
absolutePath: '/project/docs/test.md',
|
|
6
|
+
content,
|
|
7
|
+
format: 'markdown',
|
|
8
|
+
lines: content.split('\n'),
|
|
9
|
+
references: [],
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
describe('DirectoryStructureExtractor', () => {
|
|
13
|
+
const extractor = new DirectoryStructureExtractor();
|
|
14
|
+
it('extracts paths from ASCII tree structures', () => {
|
|
15
|
+
const tree = ['```', 'src/', '├── index.ts', '├── utils/', '│ └── helper.ts', '└── config/', ' └── defaults.ts', '```'].join('\n');
|
|
16
|
+
const refs = extractor.extract(makeDoc(tree));
|
|
17
|
+
const values = refs.map((r) => r.value);
|
|
18
|
+
expect(values).toContain('src');
|
|
19
|
+
expect(values).toContain('src/index.ts');
|
|
20
|
+
expect(values).toContain('src/utils');
|
|
21
|
+
expect(values).toContain('src/utils/helper.ts');
|
|
22
|
+
expect(values).toContain('src/config');
|
|
23
|
+
expect(values).toContain('src/config/defaults.ts');
|
|
24
|
+
});
|
|
25
|
+
it('ignores code blocks without tree characters', () => {
|
|
26
|
+
const doc = makeDoc('```\nconst x = 1;\n```');
|
|
27
|
+
expect(extractor.extract(doc)).toHaveLength(0);
|
|
28
|
+
});
|
|
29
|
+
it('marks illustrative paths', () => {
|
|
30
|
+
const tree = ['```', 'project/', '├── YourComponent.tsx', '└── real-file.ts', '```'].join('\n');
|
|
31
|
+
const refs = extractor.extract(makeDoc(tree));
|
|
32
|
+
const illustrative = refs.filter((r) => r.isIllustrative);
|
|
33
|
+
expect(illustrative.length).toBeGreaterThan(0);
|
|
34
|
+
});
|
|
35
|
+
it('skips comments and ellipsis entries', () => {
|
|
36
|
+
const tree = ['```', 'src/', '├── ...', '├── # comment', '└── real.ts', '```'].join('\n');
|
|
37
|
+
const refs = extractor.extract(makeDoc(tree));
|
|
38
|
+
const values = refs.map((r) => r.value);
|
|
39
|
+
expect(values).not.toContain('...');
|
|
40
|
+
expect(values).not.toContain('# comment');
|
|
41
|
+
});
|
|
42
|
+
it('handles backtick-dash style trees', () => {
|
|
43
|
+
const tree = ['```', 'root/', '├── file.ts', '└── nested/', ' └── deep.ts', '```'].join('\n');
|
|
44
|
+
const refs = extractor.extract(makeDoc(tree));
|
|
45
|
+
const values = refs.map((r) => r.value);
|
|
46
|
+
expect(values).toContain('root/file.ts');
|
|
47
|
+
expect(values).toContain('root/nested');
|
|
48
|
+
expect(values).toContain('root/nested/deep.ts');
|
|
49
|
+
});
|
|
50
|
+
it('skips separator and dash entries', () => {
|
|
51
|
+
const tree = ['```', 'src/', '├── -', '├── ---', '├── ___', '├── ===', '└── real.ts', '```'].join('\n');
|
|
52
|
+
const refs = extractor.extract(makeDoc(tree));
|
|
53
|
+
const values = refs.map((r) => r.value);
|
|
54
|
+
expect(values).not.toContain('-');
|
|
55
|
+
expect(values).not.toContain('---');
|
|
56
|
+
expect(values).toContain('src/real.ts');
|
|
57
|
+
});
|
|
58
|
+
it('skips short single-segment paths', () => {
|
|
59
|
+
const tree = ['```', 'ab', '├── long-name.ts', '```'].join('\n');
|
|
60
|
+
const refs = extractor.extract(makeDoc(tree));
|
|
61
|
+
const values = refs.map((r) => r.value);
|
|
62
|
+
expect(values).not.toContain('ab');
|
|
63
|
+
expect(values).toContain('ab/long-name.ts');
|
|
64
|
+
});
|
|
65
|
+
it('handles backtick-dash connector in trees', () => {
|
|
66
|
+
const tree = ['```', 'project/', '├── src/', '│ └── index.ts', '`-- config.ts', '```'].join('\n');
|
|
67
|
+
const refs = extractor.extract(makeDoc(tree));
|
|
68
|
+
const values = refs.map((r) => r.value);
|
|
69
|
+
expect(values).toContain('project');
|
|
70
|
+
expect(values).toContain('project/config.ts');
|
|
71
|
+
});
|
|
72
|
+
it('extracts from restructuredtext format', () => {
|
|
73
|
+
const content = ['.. code-block::', '', ' project/', ' ├── src/', ' │ └── main.ts', ' └── README.md'].join('\n');
|
|
74
|
+
const doc = {
|
|
75
|
+
path: 'docs/test.rst',
|
|
76
|
+
absolutePath: '/project/docs/test.rst',
|
|
77
|
+
content,
|
|
78
|
+
format: 'restructuredtext',
|
|
79
|
+
lines: content.split('\n'),
|
|
80
|
+
references: [],
|
|
81
|
+
};
|
|
82
|
+
const refs = extractor.extract(doc);
|
|
83
|
+
expect(refs.length).toBeGreaterThan(0);
|
|
84
|
+
});
|
|
85
|
+
it('extracts from asciidoc format', () => {
|
|
86
|
+
const content = ['----', 'project/', '├── src/', '│ └── main.ts', '----'].join('\n');
|
|
87
|
+
const doc = {
|
|
88
|
+
path: 'docs/test.adoc',
|
|
89
|
+
absolutePath: '/project/docs/test.adoc',
|
|
90
|
+
content,
|
|
91
|
+
format: 'asciidoc',
|
|
92
|
+
lines: content.split('\n'),
|
|
93
|
+
references: [],
|
|
94
|
+
};
|
|
95
|
+
const refs = extractor.extract(doc);
|
|
96
|
+
expect(refs.length).toBeGreaterThan(0);
|
|
97
|
+
});
|
|
98
|
+
it('handles plaintext format with fallback to markdown pattern', () => {
|
|
99
|
+
const tree = ['```', 'app/', '├── main.py', '```'].join('\n');
|
|
100
|
+
const doc = {
|
|
101
|
+
path: 'docs/test.txt',
|
|
102
|
+
absolutePath: '/project/docs/test.txt',
|
|
103
|
+
content: tree,
|
|
104
|
+
format: 'plaintext',
|
|
105
|
+
lines: tree.split('\n'),
|
|
106
|
+
references: [],
|
|
107
|
+
};
|
|
108
|
+
const refs = extractor.extract(doc);
|
|
109
|
+
expect(refs.length).toBeGreaterThan(0);
|
|
110
|
+
});
|
|
111
|
+
it('uses custom illustrative patterns from config', () => {
|
|
112
|
+
const ext = new DirectoryStructureExtractor({
|
|
113
|
+
rules: { 'directory-structure': { illustrativePatterns: ['^custom-'] } },
|
|
114
|
+
});
|
|
115
|
+
const tree = ['```', 'project/', '├── custom-example.ts', '└── real.ts', '```'].join('\n');
|
|
116
|
+
const refs = ext.extract(makeDoc(tree));
|
|
117
|
+
const illustrative = refs.filter((r) => r.isIllustrative);
|
|
118
|
+
expect(illustrative.some((r) => r.value.includes('custom-example'))).toBe(true);
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
//# sourceMappingURL=directoryStructureExtractor.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"directoryStructureExtractor.test.js","sourceRoot":"","sources":["../../../src/parsers/extractors/directoryStructureExtractor.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,2BAA2B,EAAE,MAAM,kCAAkC,CAAC;AAG/E,SAAS,OAAO,CAAC,OAAe;IAC9B,OAAO;QACL,IAAI,EAAE,cAAc;QACpB,YAAY,EAAE,uBAAuB;QACrC,OAAO;QACP,MAAM,EAAE,UAAU;QAClB,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC;QAC1B,UAAU,EAAE,EAAE;KACf,CAAC;AACJ,CAAC;AAED,QAAQ,CAAC,6BAA6B,EAAE,GAAG,EAAE;IAC3C,MAAM,SAAS,GAAG,IAAI,2BAA2B,EAAE,CAAC;IAEpD,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,IAAI,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,cAAc,EAAE,YAAY,EAAE,mBAAmB,EAAE,aAAa,EAAE,qBAAqB,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAExI,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;QAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAChC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,qBAAqB,CAAC,CAAC;QAChD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,MAAM,GAAG,GAAG,OAAO,CAAC,wBAAwB,CAAC,CAAC;QAC9C,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0BAA0B,EAAE,GAAG,EAAE;QAClC,MAAM,IAAI,GAAG,CAAC,KAAK,EAAE,UAAU,EAAE,uBAAuB,EAAE,kBAAkB,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEhG,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;QAC9C,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC;QAC1D,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,IAAI,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,eAAe,EAAE,aAAa,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE1F,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;QAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,IAAI,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,iBAAiB,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEjG,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;QAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,qBAAqB,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,IAAI,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,aAAa,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAExG,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;QAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,IAAI,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,kBAAkB,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEjE,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;QAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,IAAI,GAAG,CAAC,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,kBAAkB,EAAE,eAAe,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEpG,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;QAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,mBAAmB,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,OAAO,GAAG,CAAC,iBAAiB,EAAE,EAAE,EAAE,aAAa,EAAE,aAAa,EAAE,oBAAoB,EAAE,kBAAkB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE3H,MAAM,GAAG,GAAG;YACV,IAAI,EAAE,eAAe;YACrB,YAAY,EAAE,wBAAwB;YACtC,OAAO;YACP,MAAM,EAAE,kBAA2B;YACnC,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC;YAC1B,UAAU,EAAE,EAAE;SACf,CAAC;QACF,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;QACvC,MAAM,OAAO,GAAG,CAAC,MAAM,EAAE,UAAU,EAAE,UAAU,EAAE,iBAAiB,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEvF,MAAM,GAAG,GAAG;YACV,IAAI,EAAE,gBAAgB;YACtB,YAAY,EAAE,yBAAyB;YACvC,OAAO;YACP,MAAM,EAAE,UAAmB;YAC3B,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC;YAC1B,UAAU,EAAE,EAAE;SACf,CAAC;QACF,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4DAA4D,EAAE,GAAG,EAAE;QACpE,MAAM,IAAI,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE9D,MAAM,GAAG,GAAG;YACV,IAAI,EAAE,eAAe;YACrB,YAAY,EAAE,wBAAwB;YACtC,OAAO,EAAE,IAAI;YACb,MAAM,EAAE,WAAoB;YAC5B,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;YACvB,UAAU,EAAE,EAAE;SACf,CAAC;QACF,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,GAAG,EAAE;QACvD,MAAM,GAAG,GAAG,IAAI,2BAA2B,CAAC;YAC1C,KAAK,EAAE,EAAE,qBAAqB,EAAE,EAAE,oBAAoB,EAAE,CAAC,UAAU,CAAC,EAAE,EAAE;SACzE,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,CAAC,KAAK,EAAE,UAAU,EAAE,uBAAuB,EAAE,aAAa,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE3F,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;QACxC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC;QAC1D,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAClF,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { BaseExtractor } from './baseExtractor.js';
|
|
2
|
+
import type { Document, Reference } from '../../types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Extracts external URL references
|
|
5
|
+
*/
|
|
6
|
+
export declare class ExternalUrlExtractor extends BaseExtractor {
|
|
7
|
+
constructor();
|
|
8
|
+
extract(document: Document): Reference[];
|
|
9
|
+
/**
|
|
10
|
+
* Strip trailing punctuation while preserving balanced parentheses
|
|
11
|
+
* (handles Wikipedia-style URLs like .../Example_(disambiguation))
|
|
12
|
+
*/
|
|
13
|
+
private cleanTrailingPunctuation;
|
|
14
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { BaseExtractor } from './baseExtractor.js';
|
|
2
|
+
/**
|
|
3
|
+
* Extracts external URL references
|
|
4
|
+
*/
|
|
5
|
+
export class ExternalUrlExtractor extends BaseExtractor {
|
|
6
|
+
constructor() {
|
|
7
|
+
super('external-url');
|
|
8
|
+
}
|
|
9
|
+
extract(document) {
|
|
10
|
+
const references = [];
|
|
11
|
+
const pattern = /https?:\/\/[^\s>\]"']+/g;
|
|
12
|
+
let match;
|
|
13
|
+
while ((match = pattern.exec(document.content)) !== null) {
|
|
14
|
+
let url = match[0];
|
|
15
|
+
url = this.cleanTrailingPunctuation(url);
|
|
16
|
+
if (url.length > 0) {
|
|
17
|
+
references.push({
|
|
18
|
+
type: this.type,
|
|
19
|
+
value: url,
|
|
20
|
+
lineNumber: this.findLineNumber(document.content, match.index),
|
|
21
|
+
raw: match[0],
|
|
22
|
+
sourceFile: document.path,
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return references;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Strip trailing punctuation while preserving balanced parentheses
|
|
30
|
+
* (handles Wikipedia-style URLs like .../Example_(disambiguation))
|
|
31
|
+
*/
|
|
32
|
+
cleanTrailingPunctuation(url) {
|
|
33
|
+
while (url.length > 0) {
|
|
34
|
+
const last = url[url.length - 1];
|
|
35
|
+
if (last === ')') {
|
|
36
|
+
const opens = (url.match(/\(/g) || []).length;
|
|
37
|
+
const closes = (url.match(/\)/g) || []).length;
|
|
38
|
+
if (closes > opens) {
|
|
39
|
+
url = url.slice(0, -1);
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
break;
|
|
43
|
+
}
|
|
44
|
+
if (/[.,;:!?>}\]'"]+$/.test(last)) {
|
|
45
|
+
url = url.slice(0, -1);
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
break;
|
|
49
|
+
}
|
|
50
|
+
return url;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=externalUrlExtractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"externalUrlExtractor.js","sourceRoot":"","sources":["../../../src/parsers/extractors/externalUrlExtractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAGnD;;GAEG;AACH,MAAM,OAAO,oBAAqB,SAAQ,aAAa;IACrD;QACE,KAAK,CAAC,cAAc,CAAC,CAAC;IACxB,CAAC;IAED,OAAO,CAAC,QAAkB;QACxB,MAAM,UAAU,GAAgB,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,yBAAyB,CAAC;QAE1C,IAAI,KAA6B,CAAC;QAClC,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACzD,IAAI,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACnB,GAAG,GAAG,IAAI,CAAC,wBAAwB,CAAC,GAAG,CAAC,CAAC;YAEzC,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACnB,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,KAAK,EAAE,GAAG;oBACV,UAAU,EAAE,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC;oBAC9D,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;oBACb,UAAU,EAAE,QAAQ,CAAC,IAAI;iBAC1B,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;;OAGG;IACK,wBAAwB,CAAC,GAAW;QAC1C,OAAO,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,MAAM,IAAI,GAAG,GAAG,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YAEjC,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;gBACjB,MAAM,KAAK,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;gBAC9C,MAAM,MAAM,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;gBAC/C,IAAI,MAAM,GAAG,KAAK,EAAE,CAAC;oBACnB,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;oBACvB,SAAS;gBACX,CAAC;gBACD,MAAM;YACR,CAAC;YAED,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAClC,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;gBACvB,SAAS;YACX,CAAC;YAED,MAAM;QACR,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { ExternalUrlExtractor } from './externalUrlExtractor.js';
|
|
2
|
+
function makeDoc(content) {
|
|
3
|
+
return {
|
|
4
|
+
path: 'docs/test.md',
|
|
5
|
+
absolutePath: '/project/docs/test.md',
|
|
6
|
+
content,
|
|
7
|
+
format: 'markdown',
|
|
8
|
+
lines: content.split('\n'),
|
|
9
|
+
references: [],
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
describe('ExternalUrlExtractor', () => {
|
|
13
|
+
const extractor = new ExternalUrlExtractor();
|
|
14
|
+
it('extracts HTTP and HTTPS URLs', () => {
|
|
15
|
+
const doc = makeDoc('Visit https://example.com and http://test.org/path');
|
|
16
|
+
const refs = extractor.extract(doc);
|
|
17
|
+
expect(refs).toHaveLength(2);
|
|
18
|
+
expect(refs[0].value).toBe('https://example.com');
|
|
19
|
+
expect(refs[1].value).toBe('http://test.org/path');
|
|
20
|
+
});
|
|
21
|
+
it('strips trailing punctuation', () => {
|
|
22
|
+
const doc = makeDoc('See https://example.com. Also https://test.org,');
|
|
23
|
+
const refs = extractor.extract(doc);
|
|
24
|
+
expect(refs[0].value).toBe('https://example.com');
|
|
25
|
+
expect(refs[1].value).toBe('https://test.org');
|
|
26
|
+
});
|
|
27
|
+
it('preserves balanced parentheses in Wikipedia-style URLs', () => {
|
|
28
|
+
const doc = makeDoc('See https://en.wikipedia.org/wiki/Example_(disambiguation)');
|
|
29
|
+
const refs = extractor.extract(doc);
|
|
30
|
+
expect(refs[0].value).toBe('https://en.wikipedia.org/wiki/Example_(disambiguation)');
|
|
31
|
+
});
|
|
32
|
+
it('strips unbalanced trailing parenthesis', () => {
|
|
33
|
+
const doc = makeDoc('(visit https://example.com)');
|
|
34
|
+
const refs = extractor.extract(doc);
|
|
35
|
+
expect(refs[0].value).toBe('https://example.com');
|
|
36
|
+
});
|
|
37
|
+
it('sets correct line numbers', () => {
|
|
38
|
+
const doc = makeDoc('line1\nhttps://example.com\nline3');
|
|
39
|
+
const refs = extractor.extract(doc);
|
|
40
|
+
expect(refs[0].lineNumber).toBe(2);
|
|
41
|
+
});
|
|
42
|
+
it('extracts URLs with query params and fragments', () => {
|
|
43
|
+
const doc = makeDoc('https://example.com/page?foo=bar&baz=1#section');
|
|
44
|
+
const refs = extractor.extract(doc);
|
|
45
|
+
expect(refs[0].value).toBe('https://example.com/page?foo=bar&baz=1#section');
|
|
46
|
+
});
|
|
47
|
+
it('strips multiple trailing punctuation characters', () => {
|
|
48
|
+
const doc = makeDoc('See https://example.com/path...');
|
|
49
|
+
const refs = extractor.extract(doc);
|
|
50
|
+
expect(refs[0].value).toBe('https://example.com/path');
|
|
51
|
+
});
|
|
52
|
+
it('handles URL ending with semicolon and colon', () => {
|
|
53
|
+
const doc = makeDoc('Visit https://example.com/page; and https://example.com/other:');
|
|
54
|
+
const refs = extractor.extract(doc);
|
|
55
|
+
expect(refs[0].value).toBe('https://example.com/page');
|
|
56
|
+
expect(refs[1].value).toBe('https://example.com/other');
|
|
57
|
+
});
|
|
58
|
+
it('handles multiple unbalanced trailing parens', () => {
|
|
59
|
+
const doc = makeDoc('(see (https://example.com))');
|
|
60
|
+
const refs = extractor.extract(doc);
|
|
61
|
+
expect(refs[0].value).toBe('https://example.com');
|
|
62
|
+
});
|
|
63
|
+
it('preserves URL with balanced nested parens', () => {
|
|
64
|
+
const doc = makeDoc('https://en.wikipedia.org/wiki/A_(B_(C))');
|
|
65
|
+
const refs = extractor.extract(doc);
|
|
66
|
+
expect(refs[0].value).toBe('https://en.wikipedia.org/wiki/A_(B_(C))');
|
|
67
|
+
});
|
|
68
|
+
it('strips trailing bracket characters', () => {
|
|
69
|
+
const doc = makeDoc('[https://example.com/page]');
|
|
70
|
+
const refs = extractor.extract(doc);
|
|
71
|
+
expect(refs[0].value).toBe('https://example.com/page');
|
|
72
|
+
});
|
|
73
|
+
it('strips trailing single and double quotes', () => {
|
|
74
|
+
const doc = makeDoc("see 'https://example.com/page'");
|
|
75
|
+
const refs = extractor.extract(doc);
|
|
76
|
+
expect(refs[0].value).toBe('https://example.com/page');
|
|
77
|
+
});
|
|
78
|
+
it('strips trailing exclamation and question marks', () => {
|
|
79
|
+
const doc = makeDoc('Visit https://example.com/page! or https://example.com/other?');
|
|
80
|
+
const refs = extractor.extract(doc);
|
|
81
|
+
expect(refs[0].value).toBe('https://example.com/page');
|
|
82
|
+
expect(refs[1].value).toBe('https://example.com/other');
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
//# sourceMappingURL=externalUrlExtractor.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"externalUrlExtractor.test.js","sourceRoot":"","sources":["../../../src/parsers/extractors/externalUrlExtractor.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,2BAA2B,CAAC;AAGjE,SAAS,OAAO,CAAC,OAAe;IAC9B,OAAO;QACL,IAAI,EAAE,cAAc;QACpB,YAAY,EAAE,uBAAuB;QACrC,OAAO;QACP,MAAM,EAAE,UAAU;QAClB,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC;QAC1B,UAAU,EAAE,EAAE;KACf,CAAC;AACJ,CAAC;AAED,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;IACpC,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAE7C,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,GAAG,GAAG,OAAO,CAAC,oDAAoD,CAAC,CAAC;QAC1E,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QAClD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,GAAG,GAAG,OAAO,CAAC,iDAAiD,CAAC,CAAC;QACvE,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QAClD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wDAAwD,EAAE,GAAG,EAAE;QAChE,MAAM,GAAG,GAAG,OAAO,CAAC,4DAA4D,CAAC,CAAC;QAClF,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,wDAAwD,CAAC,CAAC;IACvF,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,GAAG,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;QACnD,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACnC,MAAM,GAAG,GAAG,OAAO,CAAC,mCAAmC,CAAC,CAAC;QACzD,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,GAAG,EAAE;QACvD,MAAM,GAAG,GAAG,OAAO,CAAC,gDAAgD,CAAC,CAAC;QACtE,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;IAC/E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iDAAiD,EAAE,GAAG,EAAE;QACzD,MAAM,GAAG,GAAG,OAAO,CAAC,iCAAiC,CAAC,CAAC;QACvD,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,MAAM,GAAG,GAAG,OAAO,CAAC,gEAAgE,CAAC,CAAC;QACtF,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;QACvD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,MAAM,GAAG,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;QACnD,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,GAAG,GAAG,OAAO,CAAC,yCAAyC,CAAC,CAAC;QAC/D,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;IACxE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,MAAM,GAAG,GAAG,OAAO,CAAC,4BAA4B,CAAC,CAAC;QAClD,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,GAAG,GAAG,OAAO,CAAC,gCAAgC,CAAC,CAAC;QACtD,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,GAAG,GAAG,OAAO,CAAC,+DAA+D,CAAC,CAAC;QACrF,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;QACvD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { BaseExtractor } from './baseExtractor.js';
|
|
2
|
+
import type { Document, Reference } from '../../types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Extracts file/path references from documentation links
|
|
5
|
+
* Supports: Markdown, RST, AsciiDoc link formats
|
|
6
|
+
*/
|
|
7
|
+
export declare class FilePathExtractor extends BaseExtractor {
|
|
8
|
+
constructor();
|
|
9
|
+
extract(document: Document): Reference[];
|
|
10
|
+
/**
|
|
11
|
+
* Extract line number reference from a file path and return the clean path
|
|
12
|
+
* Examples:
|
|
13
|
+
* "../src/file.ts:1" -> { path: "../src/file.ts", lineRef: "1" }
|
|
14
|
+
* "../src/file.ts:26-38" -> { path: "../src/file.ts", lineRef: "26-38" }
|
|
15
|
+
* "../src/file.ts#L123" -> { path: "../src/file.ts", lineRef: "L123" }
|
|
16
|
+
*/
|
|
17
|
+
private extractLineReference;
|
|
18
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { BaseExtractor } from './baseExtractor.js';
|
|
2
|
+
/**
|
|
3
|
+
* Line number suffix patterns that should be stripped from file paths
|
|
4
|
+
* Supports: :N, :N-M, :N-, #LN, #LN-LM (GitHub-style)
|
|
5
|
+
*/
|
|
6
|
+
const LINE_NUMBER_PATTERNS = [
|
|
7
|
+
/:[0-9]+(?:-[0-9]*)?$/, // :1, :26-38, :10-
|
|
8
|
+
/#L[0-9]+(?:-L?[0-9]+)?$/, // #L123, #L123-L456, #L123-456
|
|
9
|
+
];
|
|
10
|
+
/**
|
|
11
|
+
* Extracts file/path references from documentation links
|
|
12
|
+
* Supports: Markdown, RST, AsciiDoc link formats
|
|
13
|
+
*/
|
|
14
|
+
export class FilePathExtractor extends BaseExtractor {
|
|
15
|
+
constructor() {
|
|
16
|
+
super('file-path');
|
|
17
|
+
}
|
|
18
|
+
extract(document) {
|
|
19
|
+
const references = [];
|
|
20
|
+
// Format-specific patterns
|
|
21
|
+
const patterns = {
|
|
22
|
+
markdown: /\[([^\]]*)\]\((\.\.[/\\][^)]+|\.\/[^)]+|[a-zA-Z0-9_\-/\\]+\.[a-zA-Z]{1,10})\)/g,
|
|
23
|
+
restructuredtext: /`([^`]+)\s+<([^>]+)>`_/g,
|
|
24
|
+
asciidoc: /link:([^[]+)\[([^\]]*)\]/g,
|
|
25
|
+
};
|
|
26
|
+
const pattern = patterns[document.format] || patterns.markdown;
|
|
27
|
+
let match;
|
|
28
|
+
while ((match = pattern.exec(document.content)) !== null) {
|
|
29
|
+
const refPath = document.format === 'asciidoc' ? match[1] : match[2];
|
|
30
|
+
// Skip URLs
|
|
31
|
+
if (refPath.startsWith('http://') || refPath.startsWith('https://')) {
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
// Skip anchors
|
|
35
|
+
if (refPath.startsWith('#')) {
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
// Extract and strip line number suffixes
|
|
39
|
+
const { path: cleanPath, lineRef } = this.extractLineReference(refPath);
|
|
40
|
+
references.push({
|
|
41
|
+
type: this.type,
|
|
42
|
+
value: cleanPath,
|
|
43
|
+
linkText: document.format === 'asciidoc' ? match[2] : match[1],
|
|
44
|
+
lineNumber: this.findLineNumber(document.content, match.index),
|
|
45
|
+
raw: match[0],
|
|
46
|
+
sourceFile: document.path,
|
|
47
|
+
// Store the line reference metadata if present
|
|
48
|
+
...(lineRef && { lineRef }),
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
return references;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Extract line number reference from a file path and return the clean path
|
|
55
|
+
* Examples:
|
|
56
|
+
* "../src/file.ts:1" -> { path: "../src/file.ts", lineRef: "1" }
|
|
57
|
+
* "../src/file.ts:26-38" -> { path: "../src/file.ts", lineRef: "26-38" }
|
|
58
|
+
* "../src/file.ts#L123" -> { path: "../src/file.ts", lineRef: "L123" }
|
|
59
|
+
*/
|
|
60
|
+
extractLineReference(refPath) {
|
|
61
|
+
for (const pattern of LINE_NUMBER_PATTERNS) {
|
|
62
|
+
const match = refPath.match(pattern);
|
|
63
|
+
if (match) {
|
|
64
|
+
const lineRef = match[0].replace(/^[:#]L?/, ''); // Remove leading :, #, or #L
|
|
65
|
+
const cleanPath = refPath.replace(pattern, '');
|
|
66
|
+
return { path: cleanPath, lineRef };
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return { path: refPath };
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
//# sourceMappingURL=filePathExtractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"filePathExtractor.js","sourceRoot":"","sources":["../../../src/parsers/extractors/filePathExtractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAKnD;;;GAGG;AACH,MAAM,oBAAoB,GAAG;IAC3B,sBAAsB,EAAE,mBAAmB;IAC3C,yBAAyB,EAAE,+BAA+B;CAC3D,CAAC;AAEF;;;GAGG;AACH,MAAM,OAAO,iBAAkB,SAAQ,aAAa;IAClD;QACE,KAAK,CAAC,WAAW,CAAC,CAAC;IACrB,CAAC;IAED,OAAO,CAAC,QAAkB;QACxB,MAAM,UAAU,GAAgB,EAAE,CAAC;QAEnC,2BAA2B;QAC3B,MAAM,QAAQ,GAAe;YAC3B,QAAQ,EAAE,gFAAgF;YAC1F,gBAAgB,EAAE,yBAAyB;YAC3C,QAAQ,EAAE,2BAA2B;SACtC,CAAC;QAEF,MAAM,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC;QAE/D,IAAI,KAA6B,CAAC;QAClC,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACzD,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAErE,YAAY;YACZ,IAAI,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;gBACpE,SAAS;YACX,CAAC;YAED,eAAe;YACf,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC5B,SAAS;YACX,CAAC;YAED,yCAAyC;YACzC,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAC;YAExE,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,KAAK,EAAE,SAAS;gBAChB,QAAQ,EAAE,QAAQ,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;gBAC9D,UAAU,EAAE,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC;gBAC9D,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;gBACb,UAAU,EAAE,QAAQ,CAAC,IAAI;gBACzB,+CAA+C;gBAC/C,GAAG,CAAC,OAAO,IAAI,EAAE,OAAO,EAAE,CAAC;aAC5B,CAAC,CAAC;QACL,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;;;;;OAMG;IACK,oBAAoB,CAAC,OAAe;QAC1C,KAAK,MAAM,OAAO,IAAI,oBAAoB,EAAE,CAAC;YAC3C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACrC,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC,6BAA6B;gBAC9E,MAAM,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;gBAC/C,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC;YACtC,CAAC;QACH,CAAC;QACD,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;IAC3B,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { FilePathExtractor } from './filePathExtractor.js';
|
|
2
|
+
function makeDoc(content, format = 'markdown') {
|
|
3
|
+
return {
|
|
4
|
+
path: 'docs/test.md',
|
|
5
|
+
absolutePath: '/project/docs/test.md',
|
|
6
|
+
content,
|
|
7
|
+
format,
|
|
8
|
+
lines: content.split('\n'),
|
|
9
|
+
references: [],
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
describe('FilePathExtractor', () => {
|
|
13
|
+
const extractor = new FilePathExtractor();
|
|
14
|
+
it('has type "file-path"', () => {
|
|
15
|
+
expect(extractor.type).toBe('file-path');
|
|
16
|
+
});
|
|
17
|
+
describe('markdown format', () => {
|
|
18
|
+
it('extracts relative file paths from links', () => {
|
|
19
|
+
const doc = makeDoc('See [config](./config/loader.ts) and [readme](../README.md)');
|
|
20
|
+
const refs = extractor.extract(doc);
|
|
21
|
+
expect(refs).toHaveLength(2);
|
|
22
|
+
expect(refs[0].value).toBe('./config/loader.ts');
|
|
23
|
+
expect(refs[1].value).toBe('../README.md');
|
|
24
|
+
});
|
|
25
|
+
it('extracts paths with extensions', () => {
|
|
26
|
+
const doc = makeDoc('Check [file](src/utils/helper.ts)');
|
|
27
|
+
const refs = extractor.extract(doc);
|
|
28
|
+
expect(refs).toHaveLength(1);
|
|
29
|
+
expect(refs[0].value).toBe('src/utils/helper.ts');
|
|
30
|
+
});
|
|
31
|
+
it('skips URLs and anchors', () => {
|
|
32
|
+
const doc = makeDoc('[link](https://example.com) [anchor](#heading)');
|
|
33
|
+
const refs = extractor.extract(doc);
|
|
34
|
+
expect(refs).toHaveLength(0);
|
|
35
|
+
});
|
|
36
|
+
it('strips line number suffixes and stores lineRef', () => {
|
|
37
|
+
const doc = makeDoc('[file](../src/file.ts:26-38)');
|
|
38
|
+
const refs = extractor.extract(doc);
|
|
39
|
+
expect(refs).toHaveLength(1);
|
|
40
|
+
expect(refs[0].value).toBe('../src/file.ts');
|
|
41
|
+
expect(refs[0].lineRef).toBe('26-38');
|
|
42
|
+
});
|
|
43
|
+
it('handles GitHub-style line references', () => {
|
|
44
|
+
const doc = makeDoc('[file](../src/file.ts#L123)');
|
|
45
|
+
const refs = extractor.extract(doc);
|
|
46
|
+
expect(refs[0].value).toBe('../src/file.ts');
|
|
47
|
+
expect(refs[0].lineRef).toBe('123');
|
|
48
|
+
});
|
|
49
|
+
it('preserves linkText', () => {
|
|
50
|
+
const doc = makeDoc('[My Link Text](./file.ts)');
|
|
51
|
+
const refs = extractor.extract(doc);
|
|
52
|
+
expect(refs[0].linkText).toBe('My Link Text');
|
|
53
|
+
});
|
|
54
|
+
});
|
|
55
|
+
describe('restructuredtext format', () => {
|
|
56
|
+
it('extracts RST link references', () => {
|
|
57
|
+
const doc = makeDoc('`Configuration <../config.rst>`_', 'restructuredtext');
|
|
58
|
+
const refs = extractor.extract(doc);
|
|
59
|
+
expect(refs).toHaveLength(1);
|
|
60
|
+
expect(refs[0].value).toBe('../config.rst');
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
describe('asciidoc format', () => {
|
|
64
|
+
it('extracts AsciiDoc link references', () => {
|
|
65
|
+
const doc = makeDoc('link:./config.adoc[Configuration]', 'asciidoc');
|
|
66
|
+
const refs = extractor.extract(doc);
|
|
67
|
+
expect(refs).toHaveLength(1);
|
|
68
|
+
expect(refs[0].value).toBe('./config.adoc');
|
|
69
|
+
expect(refs[0].linkText).toBe('Configuration');
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
//# sourceMappingURL=filePathExtractor.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"filePathExtractor.test.js","sourceRoot":"","sources":["../../../src/parsers/extractors/filePathExtractor.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAG3D,SAAS,OAAO,CAAC,OAAe,EAAE,SAAuD,UAAU;IACjG,OAAO;QACL,IAAI,EAAE,cAAc;QACpB,YAAY,EAAE,uBAAuB;QACrC,OAAO;QACP,MAAM;QACN,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC;QAC1B,UAAU,EAAE,EAAE;KACf,CAAC;AACJ,CAAC;AAED,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;IACjC,MAAM,SAAS,GAAG,IAAI,iBAAiB,EAAE,CAAC;IAE1C,EAAE,CAAC,sBAAsB,EAAE,GAAG,EAAE;QAC9B,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;QAC/B,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;YACjD,MAAM,GAAG,GAAG,OAAO,CAAC,6DAA6D,CAAC,CAAC;YACnF,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;YACjD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;YACxC,MAAM,GAAG,GAAG,OAAO,CAAC,mCAAmC,CAAC,CAAC;YACzD,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QACpD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wBAAwB,EAAE,GAAG,EAAE;YAChC,MAAM,GAAG,GAAG,OAAO,CAAC,gDAAgD,CAAC,CAAC;YACtE,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;YACxD,MAAM,GAAG,GAAG,OAAO,CAAC,8BAA8B,CAAC,CAAC;YACpD,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;YAC7C,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACxC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;YAC9C,MAAM,GAAG,GAAG,OAAO,CAAC,6BAA6B,CAAC,CAAC;YACnD,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;YAC7C,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACtC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oBAAoB,EAAE,GAAG,EAAE;YAC5B,MAAM,GAAG,GAAG,OAAO,CAAC,2BAA2B,CAAC,CAAC;YACjD,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAChD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACvC,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;YACtC,MAAM,GAAG,GAAG,OAAO,CAAC,kCAAkC,EAAE,kBAAkB,CAAC,CAAC;YAC5E,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAC9C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;QAC/B,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC3C,MAAM,GAAG,GAAG,OAAO,CAAC,mCAAmC,EAAE,UAAU,CAAC,CAAC;YACrE,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;YAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { BaseExtractor } from './baseExtractor.js';
|
|
2
|
+
import type { Document, DocFreshnessConfig, Reference } from '../../types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Extracts version references
|
|
5
|
+
* Configurable technology list for any stack
|
|
6
|
+
*/
|
|
7
|
+
export declare class VersionExtractor extends BaseExtractor {
|
|
8
|
+
private technologies;
|
|
9
|
+
constructor(config?: Partial<DocFreshnessConfig>);
|
|
10
|
+
extract(document: Document): Reference[];
|
|
11
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { BaseExtractor } from './baseExtractor.js';
|
|
2
|
+
/**
|
|
3
|
+
* Extracts version references
|
|
4
|
+
* Configurable technology list for any stack
|
|
5
|
+
*/
|
|
6
|
+
export class VersionExtractor extends BaseExtractor {
|
|
7
|
+
technologies;
|
|
8
|
+
constructor(config = {}) {
|
|
9
|
+
super('version');
|
|
10
|
+
// Default technologies - can be extended via config
|
|
11
|
+
this.technologies = config.technologies || [
|
|
12
|
+
// JavaScript/TypeScript ecosystem
|
|
13
|
+
'Node\\.?js?',
|
|
14
|
+
'npm',
|
|
15
|
+
'yarn',
|
|
16
|
+
'pnpm',
|
|
17
|
+
'React',
|
|
18
|
+
'Vue',
|
|
19
|
+
'Angular',
|
|
20
|
+
'TypeScript',
|
|
21
|
+
'Express',
|
|
22
|
+
'Vite',
|
|
23
|
+
'webpack',
|
|
24
|
+
'Next\\.?js',
|
|
25
|
+
'Nuxt',
|
|
26
|
+
// Python ecosystem
|
|
27
|
+
'Python',
|
|
28
|
+
'pip',
|
|
29
|
+
'Django',
|
|
30
|
+
'Flask',
|
|
31
|
+
'FastAPI',
|
|
32
|
+
// Go ecosystem
|
|
33
|
+
'Go',
|
|
34
|
+
'Golang',
|
|
35
|
+
// Rust ecosystem
|
|
36
|
+
'Rust',
|
|
37
|
+
'Cargo',
|
|
38
|
+
// Java ecosystem
|
|
39
|
+
'Java',
|
|
40
|
+
'Maven',
|
|
41
|
+
'Gradle',
|
|
42
|
+
'Spring',
|
|
43
|
+
// Databases
|
|
44
|
+
'PostgreSQL',
|
|
45
|
+
'MySQL',
|
|
46
|
+
'MongoDB',
|
|
47
|
+
'Redis',
|
|
48
|
+
// Other
|
|
49
|
+
'Docker',
|
|
50
|
+
'Kubernetes',
|
|
51
|
+
'Terraform',
|
|
52
|
+
];
|
|
53
|
+
}
|
|
54
|
+
extract(document) {
|
|
55
|
+
const references = [];
|
|
56
|
+
const techPattern = this.technologies.join('|');
|
|
57
|
+
// Pattern: "Technology 19.x" or "Technology 19.2.3"
|
|
58
|
+
const pattern = new RegExp(`\\b(${techPattern})\\s+v?(\\d+(?:\\.\\d+)?(?:\\.\\d+)?(?:\\.x)?)\\b`, 'gi');
|
|
59
|
+
let match;
|
|
60
|
+
while ((match = pattern.exec(document.content)) !== null) {
|
|
61
|
+
references.push({
|
|
62
|
+
type: this.type,
|
|
63
|
+
technology: match[1],
|
|
64
|
+
version: match[2],
|
|
65
|
+
value: match[0],
|
|
66
|
+
lineNumber: this.findLineNumber(document.content, match.index),
|
|
67
|
+
raw: match[0],
|
|
68
|
+
sourceFile: document.path,
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
return references;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
//# sourceMappingURL=versionExtractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"versionExtractor.js","sourceRoot":"","sources":["../../../src/parsers/extractors/versionExtractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAGnD;;;GAGG;AACH,MAAM,OAAO,gBAAiB,SAAQ,aAAa;IACzC,YAAY,CAAW;IAE/B,YAAY,SAAsC,EAAE;QAClD,KAAK,CAAC,SAAS,CAAC,CAAC;QACjB,oDAAoD;QACpD,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI;YACzC,kCAAkC;YAClC,aAAa;YACb,KAAK;YACL,MAAM;YACN,MAAM;YACN,OAAO;YACP,KAAK;YACL,SAAS;YACT,YAAY;YACZ,SAAS;YACT,MAAM;YACN,SAAS;YACT,YAAY;YACZ,MAAM;YACN,mBAAmB;YACnB,QAAQ;YACR,KAAK;YACL,QAAQ;YACR,OAAO;YACP,SAAS;YACT,eAAe;YACf,IAAI;YACJ,QAAQ;YACR,iBAAiB;YACjB,MAAM;YACN,OAAO;YACP,iBAAiB;YACjB,MAAM;YACN,OAAO;YACP,QAAQ;YACR,QAAQ;YACR,YAAY;YACZ,YAAY;YACZ,OAAO;YACP,SAAS;YACT,OAAO;YACP,QAAQ;YACR,QAAQ;YACR,YAAY;YACZ,WAAW;SACZ,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,QAAkB;QACxB,MAAM,UAAU,GAAgB,EAAE,CAAC;QACnC,MAAM,WAAW,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEhD,oDAAoD;QACpD,MAAM,OAAO,GAAG,IAAI,MAAM,CAAC,OAAO,WAAW,mDAAmD,EAAE,IAAI,CAAC,CAAC;QAExG,IAAI,KAA6B,CAAC;QAClC,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACzD,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,UAAU,EAAE,KAAK,CAAC,CAAC,CAAC;gBACpB,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;gBACjB,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC;gBACf,UAAU,EAAE,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC;gBAC9D,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;gBACb,UAAU,EAAE,QAAQ,CAAC,IAAI;aAC1B,CAAC,CAAC;QACL,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { VersionExtractor } from './versionExtractor.js';
|
|
2
|
+
function makeDoc(content) {
|
|
3
|
+
return {
|
|
4
|
+
path: 'docs/test.md',
|
|
5
|
+
absolutePath: '/project/docs/test.md',
|
|
6
|
+
content,
|
|
7
|
+
format: 'markdown',
|
|
8
|
+
lines: content.split('\n'),
|
|
9
|
+
references: [],
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
describe('VersionExtractor', () => {
|
|
13
|
+
const extractor = new VersionExtractor();
|
|
14
|
+
it('extracts technology version references', () => {
|
|
15
|
+
const doc = makeDoc('Requires Node.js 18.0.0 and TypeScript 5.0');
|
|
16
|
+
const refs = extractor.extract(doc);
|
|
17
|
+
expect(refs).toHaveLength(2);
|
|
18
|
+
expect(refs[0]).toMatchObject({ technology: 'Node.js', version: '18.0.0' });
|
|
19
|
+
expect(refs[1]).toMatchObject({ technology: 'TypeScript', version: '5.0' });
|
|
20
|
+
});
|
|
21
|
+
it('handles v-prefixed versions', () => {
|
|
22
|
+
const doc = makeDoc('Uses React v18.2.0');
|
|
23
|
+
const refs = extractor.extract(doc);
|
|
24
|
+
expect(refs).toHaveLength(1);
|
|
25
|
+
expect(refs[0].version).toBe('18.2.0');
|
|
26
|
+
});
|
|
27
|
+
it('handles .x wildcard versions', () => {
|
|
28
|
+
const doc = makeDoc('Supports Python 3.x');
|
|
29
|
+
const refs = extractor.extract(doc);
|
|
30
|
+
expect(refs).toHaveLength(1);
|
|
31
|
+
expect(refs[0].version).toBe('3.x');
|
|
32
|
+
});
|
|
33
|
+
it('respects custom technologies config', () => {
|
|
34
|
+
const custom = new VersionExtractor({ technologies: ['CustomTool'] });
|
|
35
|
+
const doc = makeDoc('Uses CustomTool 2.0 and React 18.0');
|
|
36
|
+
const refs = custom.extract(doc);
|
|
37
|
+
expect(refs).toHaveLength(1);
|
|
38
|
+
expect(refs[0].technology).toBe('CustomTool');
|
|
39
|
+
});
|
|
40
|
+
it('does not extract versions without known technology prefix', () => {
|
|
41
|
+
const doc = makeDoc('Version 1.0.0 of something');
|
|
42
|
+
const refs = extractor.extract(doc);
|
|
43
|
+
expect(refs).toHaveLength(0);
|
|
44
|
+
});
|
|
45
|
+
it.each([
|
|
46
|
+
['Docker 24.0', 'Docker', '24.0'],
|
|
47
|
+
['PostgreSQL 16', 'PostgreSQL', '16'],
|
|
48
|
+
['Go 1.21', 'Go', '1.21'],
|
|
49
|
+
['Redis 7.2.1', 'Redis', '7.2.1'],
|
|
50
|
+
])('extracts %s => tech=%s version=%s', (input, tech, version) => {
|
|
51
|
+
const refs = extractor.extract(makeDoc(input));
|
|
52
|
+
expect(refs[0]).toMatchObject({ technology: tech, version });
|
|
53
|
+
});
|
|
54
|
+
});
|
|
55
|
+
//# sourceMappingURL=versionExtractor.test.js.map
|