@vibe-agent-toolkit/resources 0.1.0-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +646 -0
- package/dist/index.d.ts +33 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +37 -0
- package/dist/index.js.map +1 -0
- package/dist/link-parser.d.ts +37 -0
- package/dist/link-parser.d.ts.map +1 -0
- package/dist/link-parser.js +327 -0
- package/dist/link-parser.js.map +1 -0
- package/dist/link-validator.d.ts +30 -0
- package/dist/link-validator.d.ts.map +1 -0
- package/dist/link-validator.js +217 -0
- package/dist/link-validator.js.map +1 -0
- package/dist/resource-registry.d.ts +278 -0
- package/dist/resource-registry.d.ts.map +1 -0
- package/dist/resource-registry.js +468 -0
- package/dist/resource-registry.js.map +1 -0
- package/dist/schemas/resource-metadata.d.ts +137 -0
- package/dist/schemas/resource-metadata.d.ts.map +1 -0
- package/dist/schemas/resource-metadata.js +61 -0
- package/dist/schemas/resource-metadata.js.map +1 -0
- package/dist/schemas/validation-result.d.ts +124 -0
- package/dist/schemas/validation-result.d.ts.map +1 -0
- package/dist/schemas/validation-result.js +47 -0
- package/dist/schemas/validation-result.js.map +1 -0
- package/dist/types.d.ts +15 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +14 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.d.ts +18 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +26 -0
- package/dist/utils.js.map +1 -0
- package/package.json +60 -0
- package/src/index.ts +66 -0
- package/src/link-parser.ts +371 -0
- package/src/link-validator.ts +275 -0
- package/src/resource-registry.ts +559 -0
- package/src/schemas/resource-metadata.ts +86 -0
- package/src/schemas/validation-result.ts +55 -0
- package/src/types.ts +27 -0
- package/src/utils.ts +27 -0
|
@@ -0,0 +1,559 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resource registry for managing collections of markdown resources.
|
|
3
|
+
*
|
|
4
|
+
* The registry maintains a collection of parsed markdown resources and provides:
|
|
5
|
+
* - Resource addition and crawling
|
|
6
|
+
* - Link validation across the registry
|
|
7
|
+
* - Link resolution (setting resolvedId for local_file links)
|
|
8
|
+
* - Query capabilities (by path, ID, or glob pattern)
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import path from 'node:path';
|
|
12
|
+
|
|
13
|
+
import { crawlDirectory, toUnixPath, type CrawlOptions as UtilsCrawlOptions } from '@vibe-agent-toolkit/utils';
|
|
14
|
+
import picomatch from 'picomatch';
|
|
15
|
+
|
|
16
|
+
import { parseMarkdown } from './link-parser.js';
|
|
17
|
+
import { validateLink } from './link-validator.js';
|
|
18
|
+
import type { HeadingNode, ResourceMetadata } from './schemas/resource-metadata.js';
|
|
19
|
+
import type { ValidationIssue, ValidationResult } from './schemas/validation-result.js';
|
|
20
|
+
import { splitHrefAnchor } from './utils.js';
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Options for crawling directories to add resources.
|
|
24
|
+
*/
|
|
25
|
+
export interface CrawlOptions {
|
|
26
|
+
/** Base directory to crawl */
|
|
27
|
+
baseDir: string;
|
|
28
|
+
/** Include patterns (default: all .md files) */
|
|
29
|
+
include?: string[];
|
|
30
|
+
/** Exclude patterns (default: node_modules, .git, dist) */
|
|
31
|
+
exclude?: string[];
|
|
32
|
+
/** Follow symbolic links (default: false) */
|
|
33
|
+
followSymlinks?: boolean;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Options for ResourceRegistry constructor.
|
|
38
|
+
*/
|
|
39
|
+
export interface ResourceRegistryOptions {
|
|
40
|
+
/** Validate resources when they are added (default: false) */
|
|
41
|
+
validateOnAdd?: boolean;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Statistics about resources in the registry.
|
|
46
|
+
*/
|
|
47
|
+
export interface RegistryStats {
|
|
48
|
+
totalResources: number;
|
|
49
|
+
totalLinks: number;
|
|
50
|
+
linksByType: Record<string, number>;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Resource registry for managing collections of markdown resources.
|
|
55
|
+
*
|
|
56
|
+
* Provides centralized management of markdown resources with:
|
|
57
|
+
* - Automatic parsing and ID generation
|
|
58
|
+
* - Link validation across the registry
|
|
59
|
+
* - Link resolution between resources
|
|
60
|
+
* - Query capabilities
|
|
61
|
+
*
|
|
62
|
+
* @example
|
|
63
|
+
* ```typescript
|
|
64
|
+
* const registry = new ResourceRegistry();
|
|
65
|
+
*
|
|
66
|
+
* // Add resources
|
|
67
|
+
* await registry.addResource('/project/README.md');
|
|
68
|
+
* await registry.crawl({ baseDir: '/project/docs' });
|
|
69
|
+
*
|
|
70
|
+
* // Validate all links
|
|
71
|
+
* const result = await registry.validate();
|
|
72
|
+
* console.log(`Found ${result.errorCount} errors`);
|
|
73
|
+
*
|
|
74
|
+
* // Resolve links between resources
|
|
75
|
+
* registry.resolveLinks();
|
|
76
|
+
*
|
|
77
|
+
* // Query resources
|
|
78
|
+
* const readme = registry.getResourceById('readme');
|
|
79
|
+
* const docs = registry.getResourcesByPattern('docs/**');
|
|
80
|
+
* ```
|
|
81
|
+
*/
|
|
82
|
+
export class ResourceRegistry {
|
|
83
|
+
private readonly resourcesByPath: Map<string, ResourceMetadata> = new Map();
|
|
84
|
+
private readonly resourcesById: Map<string, ResourceMetadata> = new Map();
|
|
85
|
+
private readonly validateOnAdd: boolean;
|
|
86
|
+
|
|
87
|
+
constructor(options?: ResourceRegistryOptions) {
|
|
88
|
+
this.validateOnAdd = options?.validateOnAdd ?? false;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Add a single resource to the registry.
|
|
93
|
+
*
|
|
94
|
+
* Parses the markdown file, generates a unique ID, and stores the resource.
|
|
95
|
+
* If validateOnAdd is true, validates the resource immediately.
|
|
96
|
+
*
|
|
97
|
+
* @param filePath - Path to the markdown file (will be normalized to absolute)
|
|
98
|
+
* @returns The parsed resource metadata
|
|
99
|
+
* @throws Error if file cannot be read or parsed
|
|
100
|
+
*
|
|
101
|
+
* @example
|
|
102
|
+
* ```typescript
|
|
103
|
+
* const resource = await registry.addResource('./docs/README.md');
|
|
104
|
+
* console.log(`Added ${resource.id} with ${resource.links.length} links`);
|
|
105
|
+
* ```
|
|
106
|
+
*/
|
|
107
|
+
async addResource(filePath: string): Promise<ResourceMetadata> {
|
|
108
|
+
// Normalize path to absolute
|
|
109
|
+
const absolutePath = path.resolve(filePath);
|
|
110
|
+
|
|
111
|
+
// Parse the markdown file
|
|
112
|
+
const parseResult = await parseMarkdown(absolutePath);
|
|
113
|
+
|
|
114
|
+
// Generate unique ID from file path
|
|
115
|
+
const id = this.generateUniqueId(absolutePath);
|
|
116
|
+
|
|
117
|
+
// Get file modified time
|
|
118
|
+
const fs = await import('node:fs/promises');
|
|
119
|
+
const stats = await fs.stat(absolutePath);
|
|
120
|
+
|
|
121
|
+
// Create resource metadata
|
|
122
|
+
const resource: ResourceMetadata = {
|
|
123
|
+
id,
|
|
124
|
+
filePath: absolutePath,
|
|
125
|
+
links: parseResult.links,
|
|
126
|
+
headings: parseResult.headings,
|
|
127
|
+
sizeBytes: parseResult.sizeBytes,
|
|
128
|
+
estimatedTokenCount: parseResult.estimatedTokenCount,
|
|
129
|
+
modifiedAt: stats.mtime,
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
// Store in both maps
|
|
133
|
+
this.resourcesByPath.set(absolutePath, resource);
|
|
134
|
+
this.resourcesById.set(id, resource);
|
|
135
|
+
|
|
136
|
+
// Validate if requested
|
|
137
|
+
if (this.validateOnAdd) {
|
|
138
|
+
const headingsByFile = this.buildHeadingsByFileMap();
|
|
139
|
+
for (const link of resource.links) {
|
|
140
|
+
const issue = await validateLink(link, absolutePath, headingsByFile);
|
|
141
|
+
if (issue) {
|
|
142
|
+
throw new Error(`Validation failed: ${issue.message}`);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return resource;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Add multiple resources to the registry in parallel.
|
|
152
|
+
*
|
|
153
|
+
* @param filePaths - Array of file paths to add
|
|
154
|
+
* @returns Array of parsed resource metadata
|
|
155
|
+
*
|
|
156
|
+
* @example
|
|
157
|
+
* ```typescript
|
|
158
|
+
* const resources = await registry.addResources([
|
|
159
|
+
* './README.md',
|
|
160
|
+
* './docs/guide.md',
|
|
161
|
+
* './docs/api.md'
|
|
162
|
+
* ]);
|
|
163
|
+
* ```
|
|
164
|
+
*/
|
|
165
|
+
async addResources(filePaths: string[]): Promise<ResourceMetadata[]> {
|
|
166
|
+
return await Promise.all(filePaths.map((fp) => this.addResource(fp)));
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Crawl a directory and add all matching markdown files.
|
|
171
|
+
*
|
|
172
|
+
* @param options - Crawl options (baseDir, include, exclude patterns)
|
|
173
|
+
* @returns Array of all added resources
|
|
174
|
+
*
|
|
175
|
+
* @example
|
|
176
|
+
* ```typescript
|
|
177
|
+
* // Crawl docs directory, excluding node_modules
|
|
178
|
+
* const resources = await registry.crawl({
|
|
179
|
+
* baseDir: './docs',
|
|
180
|
+
* include: ['**\/*.md'],
|
|
181
|
+
* exclude: ['**\/node_modules/**']
|
|
182
|
+
* });
|
|
183
|
+
* ```
|
|
184
|
+
*/
|
|
185
|
+
async crawl(options: CrawlOptions): Promise<ResourceMetadata[]> {
|
|
186
|
+
const {
|
|
187
|
+
baseDir,
|
|
188
|
+
include = ['**/*.md'],
|
|
189
|
+
exclude = ['**/node_modules/**', '**/.git/**', '**/dist/**'],
|
|
190
|
+
followSymlinks = false,
|
|
191
|
+
} = options;
|
|
192
|
+
|
|
193
|
+
// Use utils file crawler
|
|
194
|
+
const crawlOptions: UtilsCrawlOptions = {
|
|
195
|
+
baseDir,
|
|
196
|
+
include,
|
|
197
|
+
exclude,
|
|
198
|
+
followSymlinks,
|
|
199
|
+
absolute: true,
|
|
200
|
+
filesOnly: true,
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
const files = await crawlDirectory(crawlOptions);
|
|
204
|
+
|
|
205
|
+
// Add all found files
|
|
206
|
+
return await this.addResources(files);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Validate all links in all resources in the registry.
|
|
211
|
+
*
|
|
212
|
+
* Checks:
|
|
213
|
+
* - local_file links: file exists, anchor valid if present
|
|
214
|
+
* - anchor links: heading exists in current file
|
|
215
|
+
* - external links: returns info (not errors)
|
|
216
|
+
* - email links: valid by default
|
|
217
|
+
* - unknown links: returns warning
|
|
218
|
+
*
|
|
219
|
+
* @returns Validation result with all issues and statistics
|
|
220
|
+
*
|
|
221
|
+
* @example
|
|
222
|
+
* ```typescript
|
|
223
|
+
* const result = await registry.validate();
|
|
224
|
+
* console.log(`Passed: ${result.passed}`);
|
|
225
|
+
* console.log(`Errors: ${result.errorCount}`);
|
|
226
|
+
* console.log(`Warnings: ${result.warningCount}`);
|
|
227
|
+
* console.log(`Total resources: ${result.totalResources}`);
|
|
228
|
+
* for (const issue of result.issues) {
|
|
229
|
+
* console.log(`${issue.severity}: ${issue.message}`);
|
|
230
|
+
* }
|
|
231
|
+
* ```
|
|
232
|
+
*/
|
|
233
|
+
async validate(): Promise<ValidationResult> {
|
|
234
|
+
const startTime = Date.now();
|
|
235
|
+
|
|
236
|
+
// Build headings map for validation
|
|
237
|
+
const headingsByFile = this.buildHeadingsByFileMap();
|
|
238
|
+
|
|
239
|
+
// Collect all validation issues
|
|
240
|
+
const issues: ValidationIssue[] = [];
|
|
241
|
+
|
|
242
|
+
// Validate each link in each resource
|
|
243
|
+
for (const resource of this.resourcesByPath.values()) {
|
|
244
|
+
for (const link of resource.links) {
|
|
245
|
+
const issue = await validateLink(link, resource.filePath, headingsByFile);
|
|
246
|
+
if (issue) {
|
|
247
|
+
issues.push(issue);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Count issues by severity
|
|
253
|
+
const errorCount = issues.filter((i) => i.severity === 'error').length;
|
|
254
|
+
const warningCount = issues.filter((i) => i.severity === 'warning').length;
|
|
255
|
+
const infoCount = issues.filter((i) => i.severity === 'info').length;
|
|
256
|
+
|
|
257
|
+
// Count links by type
|
|
258
|
+
const linksByType: Record<string, number> = {};
|
|
259
|
+
for (const resource of this.resourcesByPath.values()) {
|
|
260
|
+
for (const link of resource.links) {
|
|
261
|
+
linksByType[link.type] = (linksByType[link.type] ?? 0) + 1;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
const durationMs = Date.now() - startTime;
|
|
266
|
+
|
|
267
|
+
return {
|
|
268
|
+
totalResources: this.resourcesByPath.size,
|
|
269
|
+
totalLinks: [...this.resourcesByPath.values()].reduce(
|
|
270
|
+
(sum, r) => sum + r.links.length,
|
|
271
|
+
0
|
|
272
|
+
),
|
|
273
|
+
linksByType,
|
|
274
|
+
issues,
|
|
275
|
+
errorCount,
|
|
276
|
+
warningCount,
|
|
277
|
+
infoCount,
|
|
278
|
+
passed: errorCount === 0,
|
|
279
|
+
durationMs,
|
|
280
|
+
timestamp: new Date(),
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Resolve links between resources in the registry.
|
|
286
|
+
*
|
|
287
|
+
* For each local_file link, sets the resolvedId property to the ID
|
|
288
|
+
* of the target resource if it exists in the registry.
|
|
289
|
+
*
|
|
290
|
+
* Mutates the ResourceLink objects in place.
|
|
291
|
+
*
|
|
292
|
+
* @example
|
|
293
|
+
* ```typescript
|
|
294
|
+
* registry.resolveLinks();
|
|
295
|
+
*
|
|
296
|
+
* // Now local_file links have resolvedId set
|
|
297
|
+
* const resource = registry.getResource('/project/README.md');
|
|
298
|
+
* for (const link of resource.links) {
|
|
299
|
+
* if (link.type === 'local_file' && link.resolvedId) {
|
|
300
|
+
* console.log(`Link resolves to: ${link.resolvedId}`);
|
|
301
|
+
* }
|
|
302
|
+
* }
|
|
303
|
+
* ```
|
|
304
|
+
*/
|
|
305
|
+
resolveLinks(): void {
|
|
306
|
+
for (const resource of this.resourcesByPath.values()) {
|
|
307
|
+
for (const link of resource.links) {
|
|
308
|
+
if (link.type === 'local_file') {
|
|
309
|
+
// Resolve the target file path
|
|
310
|
+
const targetPath = this.resolveRelativeLinkPath(link.href, resource.filePath);
|
|
311
|
+
|
|
312
|
+
// Look up resource by path
|
|
313
|
+
const targetResource = this.resourcesByPath.get(targetPath);
|
|
314
|
+
|
|
315
|
+
if (targetResource) {
|
|
316
|
+
link.resolvedId = targetResource.id;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Get a resource by its file path.
|
|
325
|
+
*
|
|
326
|
+
* @param filePath - Path to the resource (will be normalized to absolute)
|
|
327
|
+
* @returns Resource metadata or undefined if not found
|
|
328
|
+
*
|
|
329
|
+
* @example
|
|
330
|
+
* ```typescript
|
|
331
|
+
* const resource = registry.getResource('./docs/README.md');
|
|
332
|
+
* if (resource) {
|
|
333
|
+
* console.log(`Found: ${resource.id}`);
|
|
334
|
+
* }
|
|
335
|
+
* ```
|
|
336
|
+
*/
|
|
337
|
+
getResource(filePath: string): ResourceMetadata | undefined {
|
|
338
|
+
const absolutePath = path.resolve(filePath);
|
|
339
|
+
return this.resourcesByPath.get(absolutePath);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* Get a resource by its ID.
|
|
344
|
+
*
|
|
345
|
+
* @param id - Resource ID
|
|
346
|
+
* @returns Resource metadata or undefined if not found
|
|
347
|
+
*
|
|
348
|
+
* @example
|
|
349
|
+
* ```typescript
|
|
350
|
+
* const resource = registry.getResourceById('readme');
|
|
351
|
+
* ```
|
|
352
|
+
*/
|
|
353
|
+
getResourceById(id: string): ResourceMetadata | undefined {
|
|
354
|
+
return this.resourcesById.get(id);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Get all resources in the registry.
|
|
359
|
+
*
|
|
360
|
+
* @returns Array of all resource metadata
|
|
361
|
+
*
|
|
362
|
+
* @example
|
|
363
|
+
* ```typescript
|
|
364
|
+
* const allResources = registry.getAllResources();
|
|
365
|
+
* console.log(`Total: ${allResources.length}`);
|
|
366
|
+
* ```
|
|
367
|
+
*/
|
|
368
|
+
getAllResources(): ResourceMetadata[] {
|
|
369
|
+
return [...this.resourcesByPath.values()];
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Get resources matching a glob pattern.
|
|
374
|
+
*
|
|
375
|
+
* Normalizes paths to Unix-style (forward slashes) before matching
|
|
376
|
+
* to ensure consistent behavior across platforms. On Windows,
|
|
377
|
+
* path.resolve() returns backslashes but glob patterns expect forward slashes.
|
|
378
|
+
*
|
|
379
|
+
* @param pattern - Glob pattern (e.g., 'docs/**', '**\/README.md')
|
|
380
|
+
* @returns Array of matching resources
|
|
381
|
+
*
|
|
382
|
+
* @example
|
|
383
|
+
* ```typescript
|
|
384
|
+
* const docs = registry.getResourcesByPattern('docs/**');
|
|
385
|
+
* const readmes = registry.getResourcesByPattern('**\/README.md');
|
|
386
|
+
* ```
|
|
387
|
+
*/
|
|
388
|
+
getResourcesByPattern(pattern: string): ResourceMetadata[] {
|
|
389
|
+
// Create matchers:
|
|
390
|
+
// 1. matchBase: true for simple filename patterns (e.g., '*.md', '**/file.md')
|
|
391
|
+
const matcherWithBase = picomatch(pattern, { matchBase: true });
|
|
392
|
+
// 2. matchBase: false for testing against path segments
|
|
393
|
+
const matcher = picomatch(pattern);
|
|
394
|
+
|
|
395
|
+
return [...this.resourcesByPath.values()].filter((resource) => {
|
|
396
|
+
const unixPath = toUnixPath(resource.filePath);
|
|
397
|
+
|
|
398
|
+
// Strategy 1: Try with matchBase for simple filename matching
|
|
399
|
+
if (matcherWithBase(unixPath)) {
|
|
400
|
+
return true;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
// Strategy 2: For directory patterns like '**/subdir/**', try matching
|
|
404
|
+
// against progressively longer path segments from the end
|
|
405
|
+
const segments = unixPath.split('/');
|
|
406
|
+
// Try matching the last 10, 9, 8, ... segments
|
|
407
|
+
// This allows patterns like '**/subdir/**' to match paths ending in '.../subdir/file.md'
|
|
408
|
+
for (let i = Math.min(10, segments.length); i > 0; i--) {
|
|
409
|
+
const partialPath = segments.slice(-i).join('/');
|
|
410
|
+
if (matcher(partialPath)) {
|
|
411
|
+
return true;
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
return false;
|
|
416
|
+
});
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* Clear all resources from the registry.
|
|
421
|
+
*
|
|
422
|
+
* @example
|
|
423
|
+
* ```typescript
|
|
424
|
+
* registry.clear();
|
|
425
|
+
* console.log(registry.getAllResources().length); // 0
|
|
426
|
+
* ```
|
|
427
|
+
*/
|
|
428
|
+
clear(): void {
|
|
429
|
+
this.resourcesByPath.clear();
|
|
430
|
+
this.resourcesById.clear();
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
/**
|
|
434
|
+
* Get statistics about the resources in the registry.
|
|
435
|
+
*
|
|
436
|
+
* @returns Statistics object with counts
|
|
437
|
+
*
|
|
438
|
+
* @example
|
|
439
|
+
* ```typescript
|
|
440
|
+
* const stats = registry.getStats();
|
|
441
|
+
* console.log(`Resources: ${stats.totalResources}`);
|
|
442
|
+
* console.log(`Links: ${stats.totalLinks}`);
|
|
443
|
+
* console.log(`Local file links: ${stats.linksByType.local_file}`);
|
|
444
|
+
* ```
|
|
445
|
+
*/
|
|
446
|
+
getStats(): RegistryStats {
|
|
447
|
+
const totalResources = this.resourcesByPath.size;
|
|
448
|
+
let totalLinks = 0;
|
|
449
|
+
const linksByType: Record<string, number> = {};
|
|
450
|
+
|
|
451
|
+
for (const resource of this.resourcesByPath.values()) {
|
|
452
|
+
totalLinks += resource.links.length;
|
|
453
|
+
for (const link of resource.links) {
|
|
454
|
+
linksByType[link.type] = (linksByType[link.type] ?? 0) + 1;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
return {
|
|
459
|
+
totalResources,
|
|
460
|
+
totalLinks,
|
|
461
|
+
linksByType,
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
/**
|
|
466
|
+
* Generate a unique ID from a file path.
|
|
467
|
+
*
|
|
468
|
+
* Process:
|
|
469
|
+
* 1. Get basename without extension
|
|
470
|
+
* 2. Convert to kebab-case
|
|
471
|
+
* 3. Handle collisions by appending suffix (-2, -3, etc.)
|
|
472
|
+
*
|
|
473
|
+
* @param filePath - Absolute file path
|
|
474
|
+
* @returns Unique ID
|
|
475
|
+
*/
|
|
476
|
+
private generateUniqueId(filePath: string): string {
|
|
477
|
+
const baseId = generateIdFromPath(filePath);
|
|
478
|
+
|
|
479
|
+
// Check for collision
|
|
480
|
+
if (!this.resourcesById.has(baseId)) {
|
|
481
|
+
return baseId;
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// Handle collision by appending suffix
|
|
485
|
+
let suffix = 2;
|
|
486
|
+
while (this.resourcesById.has(`${baseId}-${suffix}`)) {
|
|
487
|
+
suffix++;
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
return `${baseId}-${suffix}`;
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
/**
|
|
494
|
+
* Build a map of file paths to their heading trees.
|
|
495
|
+
*
|
|
496
|
+
* Used for link validation.
|
|
497
|
+
*/
|
|
498
|
+
private buildHeadingsByFileMap(): Map<string, HeadingNode[]> {
|
|
499
|
+
const map = new Map<string, HeadingNode[]>();
|
|
500
|
+
for (const resource of this.resourcesByPath.values()) {
|
|
501
|
+
map.set(resource.filePath, resource.headings);
|
|
502
|
+
}
|
|
503
|
+
return map;
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
/**
|
|
507
|
+
* Resolve a relative link href to an absolute file path.
|
|
508
|
+
*
|
|
509
|
+
* @param linkHref - The href from the link (e.g., './file.md', '../dir/file.md#anchor')
|
|
510
|
+
* @param sourceFilePath - Absolute path to the source file
|
|
511
|
+
* @returns Absolute path to the target file
|
|
512
|
+
*/
|
|
513
|
+
private resolveRelativeLinkPath(linkHref: string, sourceFilePath: string): string {
|
|
514
|
+
// Strip anchor if present
|
|
515
|
+
const [filePath] = splitHrefAnchor(linkHref);
|
|
516
|
+
|
|
517
|
+
// Resolve relative to source file's directory
|
|
518
|
+
const sourceDir = path.dirname(sourceFilePath);
|
|
519
|
+
return path.resolve(sourceDir, filePath);
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
/**
|
|
524
|
+
* Generate an ID from a file path.
|
|
525
|
+
*
|
|
526
|
+
* Process:
|
|
527
|
+
* 1. Remove extension (.md)
|
|
528
|
+
* 2. Get basename
|
|
529
|
+
* 3. Convert to kebab-case
|
|
530
|
+
* 4. Remove non-alphanumeric characters except hyphens
|
|
531
|
+
*
|
|
532
|
+
* @param filePath - File path
|
|
533
|
+
* @returns Generated ID (not yet checked for uniqueness)
|
|
534
|
+
*
|
|
535
|
+
* @example
|
|
536
|
+
* ```typescript
|
|
537
|
+
* generateIdFromPath('/project/docs/User Guide.md') // 'user-guide'
|
|
538
|
+
* generateIdFromPath('/project/README.md') // 'readme'
|
|
539
|
+
* generateIdFromPath('/project/docs/API_v2.md') // 'api-v2'
|
|
540
|
+
* ```
|
|
541
|
+
*/
|
|
542
|
+
function generateIdFromPath(filePath: string): string {
|
|
543
|
+
// Get basename without extension
|
|
544
|
+
const basename = path.basename(filePath, path.extname(filePath));
|
|
545
|
+
|
|
546
|
+
// Convert to kebab-case:
|
|
547
|
+
// 1. Replace underscores and spaces with hyphens
|
|
548
|
+
// 2. Convert to lowercase
|
|
549
|
+
// 3. Remove non-alphanumeric except hyphens
|
|
550
|
+
// 4. Collapse multiple hyphens
|
|
551
|
+
return basename
|
|
552
|
+
.replaceAll(/[_\s]+/g, '-')
|
|
553
|
+
.toLowerCase()
|
|
554
|
+
.replaceAll(/[^\da-z-]/g, '')
|
|
555
|
+
.replaceAll(/-{2,}/g, '-') // Collapse multiple hyphens (2 or more)
|
|
556
|
+
.replace(/^-/, '') // Trim leading hyphen
|
|
557
|
+
.replace(/-$/, ''); // Trim trailing hyphen
|
|
558
|
+
}
|
|
559
|
+
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Type of link found in markdown resources.
|
|
5
|
+
*
|
|
6
|
+
* - `local_file`: Link to a local file (relative or absolute path)
|
|
7
|
+
* - `anchor`: Link to a heading anchor (e.g., #heading-slug)
|
|
8
|
+
* - `external`: HTTP/HTTPS URL to external resource
|
|
9
|
+
* - `email`: Mailto link
|
|
10
|
+
* - `unknown`: Unclassified link type
|
|
11
|
+
*/
|
|
12
|
+
export const LinkTypeSchema = z.enum([
|
|
13
|
+
'local_file',
|
|
14
|
+
'anchor',
|
|
15
|
+
'external',
|
|
16
|
+
'email',
|
|
17
|
+
'unknown',
|
|
18
|
+
]).describe('Type of link found in markdown resources');
|
|
19
|
+
|
|
20
|
+
export type LinkType = z.infer<typeof LinkTypeSchema>;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Represents a heading node in the document's table of contents.
|
|
24
|
+
*
|
|
25
|
+
* Forms a recursive tree structure where child headings are nested under parent headings
|
|
26
|
+
* based on their level (e.g., h3 nodes are children of the preceding h2).
|
|
27
|
+
*/
|
|
28
|
+
export type HeadingNode = {
|
|
29
|
+
level: number;
|
|
30
|
+
text: string;
|
|
31
|
+
slug: string;
|
|
32
|
+
line?: number | undefined;
|
|
33
|
+
children?: HeadingNode[] | undefined;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Zod schema for heading nodes in the document's table of contents.
|
|
38
|
+
*
|
|
39
|
+
* This is a recursive schema using z.lazy() to handle the self-referential structure.
|
|
40
|
+
* The type is defined separately above to work with TypeScript's exactOptionalPropertyTypes.
|
|
41
|
+
*/
|
|
42
|
+
export const HeadingNodeSchema: z.ZodType<HeadingNode> = z.lazy(() =>
|
|
43
|
+
z.object({
|
|
44
|
+
level: z.number().int().min(1).max(6).describe('Heading level (1-6)'),
|
|
45
|
+
text: z.string().describe('Raw text content of the heading'),
|
|
46
|
+
slug: z.string().describe('GitHub-style slug for anchor links (lowercase, hyphenated)'),
|
|
47
|
+
line: z.number().int().positive().optional().describe('Line number in source file'),
|
|
48
|
+
children: z.array(HeadingNodeSchema).optional().describe('Nested child headings'),
|
|
49
|
+
}).describe('Heading node in the document\'s table of contents')
|
|
50
|
+
);
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Represents a link found in a markdown resource.
|
|
54
|
+
*
|
|
55
|
+
* Includes the raw link data from markdown as well as resolved paths and IDs
|
|
56
|
+
* for link validation and cross-referencing.
|
|
57
|
+
*/
|
|
58
|
+
export const ResourceLinkSchema = z.object({
|
|
59
|
+
text: z.string().describe('Link text displayed to users'),
|
|
60
|
+
href: z.string().describe('Raw href attribute from markdown'),
|
|
61
|
+
type: LinkTypeSchema.describe('Classified link type'),
|
|
62
|
+
line: z.number().int().positive().optional().describe('Line number in source file'),
|
|
63
|
+
resolvedPath: z.string().optional().describe('Absolute file path (for local_file links)'),
|
|
64
|
+
anchorTarget: z.string().optional().describe('Target heading slug (for anchor links)'),
|
|
65
|
+
resolvedId: z.string().optional().describe('Resolved resource ID in the collection (for local_file links)'),
|
|
66
|
+
}).describe('Link found in a markdown resource');
|
|
67
|
+
|
|
68
|
+
export type ResourceLink = z.infer<typeof ResourceLinkSchema>;
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Complete metadata for a markdown resource.
|
|
72
|
+
*
|
|
73
|
+
* Includes all parsed information about the resource: its links, headings structure,
|
|
74
|
+
* file stats, and identifiers. Designed to be extensible for future front matter support.
|
|
75
|
+
*/
|
|
76
|
+
export const ResourceMetadataSchema = z.object({
|
|
77
|
+
id: z.string().describe('Unique identifier (inferred from filePath or overridden by frontmatter)'),
|
|
78
|
+
filePath: z.string().describe('Absolute path to the resource file'),
|
|
79
|
+
links: z.array(ResourceLinkSchema).describe('All links found in the resource'),
|
|
80
|
+
headings: z.array(HeadingNodeSchema).describe('Document table of contents (top-level headings only; children are nested)'),
|
|
81
|
+
sizeBytes: z.number().int().nonnegative().describe('File size in bytes'),
|
|
82
|
+
estimatedTokenCount: z.number().int().nonnegative().describe('Estimated token count for LLM context (roughly 1 token per 4 chars)'),
|
|
83
|
+
modifiedAt: z.date().describe('Last modified timestamp'),
|
|
84
|
+
}).describe('Complete metadata for a markdown resource');
|
|
85
|
+
|
|
86
|
+
export type ResourceMetadata = z.infer<typeof ResourceMetadataSchema>;
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Severity level for validation issues.
|
|
5
|
+
*
|
|
6
|
+
* - `error`: Critical issue that should block usage (e.g., broken file link)
|
|
7
|
+
* - `warning`: Non-critical issue that should be addressed (e.g., questionable link format)
|
|
8
|
+
* - `info`: Informational message (e.g., external URL not validated)
|
|
9
|
+
*/
|
|
10
|
+
export const ValidationSeveritySchema = z.enum([
|
|
11
|
+
'error',
|
|
12
|
+
'warning',
|
|
13
|
+
'info',
|
|
14
|
+
]).describe('Severity level for validation issues');
|
|
15
|
+
|
|
16
|
+
export type ValidationSeverity = z.infer<typeof ValidationSeveritySchema>;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* A single validation issue found during link validation.
|
|
20
|
+
*
|
|
21
|
+
* Includes details about what went wrong, where it occurred, and optionally
|
|
22
|
+
* how to fix it.
|
|
23
|
+
*/
|
|
24
|
+
export const ValidationIssueSchema = z.object({
|
|
25
|
+
severity: ValidationSeveritySchema.describe('Issue severity level'),
|
|
26
|
+
resourcePath: z.string().describe('Absolute path to the resource containing the issue'),
|
|
27
|
+
line: z.number().int().positive().optional().describe('Line number where the issue occurs'),
|
|
28
|
+
type: z.string().describe('Issue type identifier (e.g., "broken_file", "broken_anchor", "external_url")'),
|
|
29
|
+
link: z.string().describe('The problematic link'),
|
|
30
|
+
message: z.string().describe('Human-readable description of the issue'),
|
|
31
|
+
suggestion: z.string().optional().describe('Optional suggestion for fixing the issue'),
|
|
32
|
+
}).describe('A single validation issue found during link validation');
|
|
33
|
+
|
|
34
|
+
export type ValidationIssue = z.infer<typeof ValidationIssueSchema>;
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Complete results from validating a collection of resources.
|
|
38
|
+
*
|
|
39
|
+
* Provides summary statistics, detailed issues, and validation metadata.
|
|
40
|
+
* The `passed` field indicates whether validation succeeded (no errors).
|
|
41
|
+
*/
|
|
42
|
+
export const ValidationResultSchema = z.object({
|
|
43
|
+
totalResources: z.number().int().nonnegative().describe('Total number of resources validated'),
|
|
44
|
+
totalLinks: z.number().int().nonnegative().describe('Total number of links found across all resources'),
|
|
45
|
+
linksByType: z.record(z.string(), z.number().int().nonnegative()).describe('Count of links by type (e.g., {"local_file": 10, "external": 5})'),
|
|
46
|
+
issues: z.array(ValidationIssueSchema).describe('All validation issues found'),
|
|
47
|
+
errorCount: z.number().int().nonnegative().describe('Number of error-level issues'),
|
|
48
|
+
warningCount: z.number().int().nonnegative().describe('Number of warning-level issues'),
|
|
49
|
+
infoCount: z.number().int().nonnegative().describe('Number of info-level issues'),
|
|
50
|
+
passed: z.boolean().describe('True if validation succeeded (errorCount === 0)'),
|
|
51
|
+
durationMs: z.number().nonnegative().describe('Validation duration in milliseconds'),
|
|
52
|
+
timestamp: z.date().describe('When validation was performed'),
|
|
53
|
+
}).describe('Complete results from validating a collection of resources');
|
|
54
|
+
|
|
55
|
+
export type ValidationResult = z.infer<typeof ValidationResultSchema>;
|