@vibe-agent-toolkit/resources 0.1.12 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,395 @@
1
+ /**
2
+ * Content transform engine for rewriting markdown links.
3
+ *
4
+ * Provides a pure function for transforming markdown link references
5
+ * based on configurable rules. Used by both RAG (rewriting links before
6
+ * persistence) and agent-skills (rewriting links during skill packaging).
7
+ *
8
+ * @example
9
+ * ```typescript
10
+ * import { transformContent, type LinkRewriteRule } from '@vibe-agent-toolkit/resources';
11
+ *
12
+ * const rules: LinkRewriteRule[] = [
13
+ * {
14
+ * match: { type: 'local_file' },
15
+ * template: '{{link.text}} (see: {{link.resource.id}})',
16
+ * },
17
+ * ];
18
+ *
19
+ * const result = transformContent(content, links, { linkRewriteRules: rules, resourceRegistry: registry });
20
+ * ```
21
+ */
22
+
23
+ import path from 'node:path';
24
+
25
+ import { renderTemplate } from '@vibe-agent-toolkit/utils';
26
+
27
+ import type { LinkType, ResourceLink, ResourceMetadata } from './schemas/resource-metadata.js';
28
+ import { matchesGlobPattern, splitHrefAnchor } from './utils.js';
29
+
30
+ /**
31
+ * Extension-to-MIME-type mapping for common resource file types.
32
+ */
33
+ const EXTENSION_MIME_MAP: Record<string, string> = {
34
+ '.md': 'text/markdown',
35
+ '.ts': 'text/typescript',
36
+ '.js': 'text/javascript',
37
+ '.json': 'application/json',
38
+ '.yaml': 'text/yaml',
39
+ '.yml': 'text/yaml',
40
+ '.xml': 'application/xml',
41
+ '.html': 'text/html',
42
+ '.css': 'text/css',
43
+ '.txt': 'text/plain',
44
+ };
45
+
46
+ /**
47
+ * Default MIME type when the file extension is unknown.
48
+ */
49
+ const DEFAULT_MIME_TYPE = 'application/octet-stream';
50
+
51
+ /**
52
+ * Infer MIME type from a file extension.
53
+ *
54
+ * @param filePath - File path to extract extension from
55
+ * @returns Inferred MIME type string
56
+ */
57
+ function inferMimeType(filePath: string): string {
58
+ const ext = path.extname(filePath).toLowerCase();
59
+ return EXTENSION_MIME_MAP[ext] ?? DEFAULT_MIME_TYPE;
60
+ }
61
+
62
+ /**
63
+ * Interface for looking up resources by ID.
64
+ *
65
+ * Intentionally minimal to avoid tight coupling to ResourceRegistry.
66
+ * Any object providing `getResourceById` satisfies this contract.
67
+ */
68
+ export interface ResourceLookup {
69
+ /** Look up a resource by its unique ID */
70
+ getResourceById(id: string): ResourceMetadata | undefined;
71
+ }
72
+
73
+ /**
74
+ * Match criteria for a link rewrite rule.
75
+ *
76
+ * A rule matches a link when ALL specified criteria are satisfied:
77
+ * - `type`: Link type matches (if specified)
78
+ * - `pattern`: Target resource's filePath matches a glob pattern (if specified)
79
+ * - `excludeResourceIds`: Target resource's ID is NOT in the exclusion list
80
+ */
81
+ export interface LinkRewriteMatch {
82
+ /**
83
+ * Link type(s) to match. If omitted, matches any type.
84
+ * Can be a single LinkType or an array of LinkType values.
85
+ */
86
+ type?: LinkType | LinkType[];
87
+
88
+ /**
89
+ * Glob pattern(s) to match against the target resource's filePath.
90
+ * If omitted, matches any path. Requires the link to have a resolvedId
91
+ * so the target resource can be looked up.
92
+ * Can be a single glob string or an array of glob strings.
93
+ */
94
+ pattern?: string | string[];
95
+
96
+ /**
97
+ * Resource IDs to exclude from matching.
98
+ * If the link's resolvedId is in this list, the rule does not match.
99
+ */
100
+ excludeResourceIds?: string[];
101
+ }
102
+
103
+ /**
104
+ * A rule for rewriting markdown links in content.
105
+ *
106
+ * Rules are evaluated in order; the first matching rule wins.
107
+ * Links that match no rule are left untouched.
108
+ */
109
+ export interface LinkRewriteRule {
110
+ /**
111
+ * Match criteria. All specified criteria must be satisfied for the rule to match.
112
+ */
113
+ match: LinkRewriteMatch;
114
+
115
+ /**
116
+ * Handlebars template for the replacement text.
117
+ *
118
+ * Available template variables:
119
+ * - `link.text` - Link display text
120
+ * - `link.href` - Original href (without fragment)
121
+ * - `link.fragment` - Fragment portion including `#` prefix (or empty string)
122
+ * - `link.type` - Link type (local_file, anchor, external, email, unknown)
123
+ * - `link.resource.id` - Target resource ID (if resolved)
124
+ * - `link.resource.filePath` - Target resource file path (if resolved)
125
+ * - `link.resource.extension` - Target resource file extension (if resolved)
126
+ * - `link.resource.mimeType` - Inferred MIME type (if resolved)
127
+ * - `link.resource.frontmatter.*` - Target resource frontmatter fields (if resolved)
128
+ * - `link.resource.sizeBytes` - Target resource size in bytes (if resolved)
129
+ * - `link.resource.estimatedTokenCount` - Target resource estimated token count (if resolved)
130
+ * - Plus any variables from `context`
131
+ */
132
+ template: string;
133
+ }
134
+
135
+ /**
136
+ * Options for the `transformContent` function.
137
+ */
138
+ export interface ContentTransformOptions {
139
+ /** Ordered list of link rewrite rules. First matching rule wins. */
140
+ linkRewriteRules: LinkRewriteRule[];
141
+
142
+ /**
143
+ * Resource lookup for resolving `link.resource.*` template variables.
144
+ * If not provided, `link.resource.*` variables will be undefined in templates.
145
+ */
146
+ resourceRegistry?: ResourceLookup;
147
+
148
+ /**
149
+ * Additional context variables available in all templates.
150
+ * These are merged at the top level of the template context.
151
+ */
152
+ context?: Record<string, unknown>;
153
+ }
154
+
155
+ /**
156
+ * Build the template context for a matched link.
157
+ *
158
+ * @param link - The ResourceLink being transformed
159
+ * @param hrefWithoutFragment - The href with fragment stripped
160
+ * @param fragment - The fragment string including '#' prefix, or empty string
161
+ * @param resource - The resolved target resource (if available)
162
+ * @param extraContext - Additional context variables
163
+ * @returns Template context object
164
+ */
165
+ function buildTemplateContext(
166
+ link: ResourceLink,
167
+ hrefWithoutFragment: string,
168
+ fragment: string,
169
+ resource: ResourceMetadata | undefined,
170
+ extraContext: Record<string, unknown> | undefined,
171
+ ): Record<string, unknown> {
172
+ const resourceContext = resource === undefined
173
+ ? undefined
174
+ : {
175
+ id: resource.id,
176
+ filePath: resource.filePath,
177
+ extension: path.extname(resource.filePath),
178
+ mimeType: inferMimeType(resource.filePath),
179
+ frontmatter: resource.frontmatter,
180
+ sizeBytes: resource.sizeBytes,
181
+ estimatedTokenCount: resource.estimatedTokenCount,
182
+ };
183
+
184
+ return {
185
+ ...extraContext,
186
+ link: {
187
+ text: link.text,
188
+ href: hrefWithoutFragment,
189
+ fragment,
190
+ type: link.type,
191
+ resource: resourceContext,
192
+ },
193
+ };
194
+ }
195
+
196
+ /**
197
+ * Check if a link's type matches the rule's type criteria.
198
+ *
199
+ * @param linkType - The link's type
200
+ * @param matchType - The rule's type criteria (single or array, or undefined = match all)
201
+ * @returns True if the type matches
202
+ */
203
+ function matchesType(linkType: LinkType, matchType: LinkType | LinkType[] | undefined): boolean {
204
+ if (matchType === undefined) {
205
+ return true;
206
+ }
207
+ if (Array.isArray(matchType)) {
208
+ return matchType.includes(linkType);
209
+ }
210
+ return linkType === matchType;
211
+ }
212
+
213
+ /**
214
+ * Check if a link's target resource matches the rule's pattern criteria.
215
+ *
216
+ * @param resource - The target resource (if resolved)
217
+ * @param patterns - The pattern(s) to match against (or undefined = match all)
218
+ * @returns True if the pattern matches or no pattern is specified
219
+ */
220
+ function matchesPattern(
221
+ resource: ResourceMetadata | undefined,
222
+ patterns: string | string[] | undefined,
223
+ ): boolean {
224
+ if (patterns === undefined) {
225
+ return true;
226
+ }
227
+
228
+ // Pattern matching requires a resolved resource
229
+ if (resource === undefined) {
230
+ return false;
231
+ }
232
+
233
+ const patternArray = Array.isArray(patterns) ? patterns : [patterns];
234
+ return patternArray.some((pattern) => matchesGlobPattern(resource.filePath, pattern));
235
+ }
236
+
237
+ /**
238
+ * Check if a link's resolvedId is excluded by the rule.
239
+ *
240
+ * @param resolvedId - The link's resolved resource ID (if any)
241
+ * @param excludeResourceIds - IDs to exclude (if any)
242
+ * @returns True if the link is excluded (should NOT match)
243
+ */
244
+ function isExcluded(
245
+ resolvedId: string | undefined,
246
+ excludeResourceIds: string[] | undefined,
247
+ ): boolean {
248
+ if (excludeResourceIds === undefined || excludeResourceIds.length === 0) {
249
+ return false;
250
+ }
251
+ if (resolvedId === undefined) {
252
+ return false;
253
+ }
254
+ return excludeResourceIds.includes(resolvedId);
255
+ }
256
+
257
+ /**
258
+ * Find the first matching rule for a given link.
259
+ *
260
+ * @param link - The ResourceLink to match
261
+ * @param resource - The resolved target resource (if available)
262
+ * @param rules - Ordered list of rules
263
+ * @returns The first matching rule, or undefined if no rule matches
264
+ */
265
+ function findMatchingRule(
266
+ link: ResourceLink,
267
+ resource: ResourceMetadata | undefined,
268
+ rules: LinkRewriteRule[],
269
+ ): LinkRewriteRule | undefined {
270
+ for (const rule of rules) {
271
+ const { match } = rule;
272
+
273
+ if (!matchesType(link.type, match.type)) {
274
+ continue;
275
+ }
276
+
277
+ if (!matchesPattern(resource, match.pattern)) {
278
+ continue;
279
+ }
280
+
281
+ if (isExcluded(link.resolvedId, match.excludeResourceIds)) {
282
+ continue;
283
+ }
284
+
285
+ return rule;
286
+ }
287
+
288
+ return undefined;
289
+ }
290
+
291
+ /**
292
+ * Regex pattern matching markdown links: `[text](href)`
293
+ *
294
+ * Captures:
295
+ * - Group 0: Full match including brackets and parentheses
296
+ * - Group 1: Link text
297
+ * - Group 2: Link href
298
+ *
299
+ * Does NOT handle nested brackets in link text — the negated character class
300
+ * `[^\]]*` excludes `]` characters, so `[text [with] brackets](href)` would
301
+ * not be matched as a single link.
302
+ */
303
+ // eslint-disable-next-line sonarjs/slow-regex -- negated character classes [^\]] and [^)] are inherently non-backtracking
304
+ const MARKDOWN_LINK_REGEX = /\[([^\]]*)\]\(([^)]*)\)/g;
305
+
306
+ /**
307
+ * Transform markdown content by rewriting links according to rules.
308
+ *
309
+ * This is a pure function that takes content, its parsed links, and transform options,
310
+ * and returns the content with matching links rewritten according to the first matching rule.
311
+ *
312
+ * Links are matched by their original markdown syntax `[text](href)`. For each link found
313
+ * in the content, the function checks the provided rules in order. The first matching rule
314
+ * determines the replacement. Links matching no rule are left untouched.
315
+ *
316
+ * @param content - The markdown content to transform
317
+ * @param links - Parsed links from the content (from ResourceMetadata.links)
318
+ * @param options - Transform options including rules, registry, and context
319
+ * @returns The transformed content with rewritten links
320
+ *
321
+ * @example
322
+ * ```typescript
323
+ * const rules: LinkRewriteRule[] = [
324
+ * {
325
+ * match: { type: 'local_file' },
326
+ * template: '{{link.text}} (ref: {{link.resource.id}})',
327
+ * },
328
+ * {
329
+ * match: { type: 'external' },
330
+ * template: '[{{link.text}}]({{link.href}})',
331
+ * },
332
+ * ];
333
+ *
334
+ * const result = transformContent(content, resource.links, {
335
+ * linkRewriteRules: rules,
336
+ * resourceRegistry: registry,
337
+ * });
338
+ * ```
339
+ */
340
+ export function transformContent(
341
+ content: string,
342
+ links: ResourceLink[],
343
+ options: ContentTransformOptions,
344
+ ): string {
345
+ const { linkRewriteRules, resourceRegistry, context } = options;
346
+
347
+ // If there are no rules or no links, return content unchanged
348
+ if (linkRewriteRules.length === 0 || links.length === 0) {
349
+ return content;
350
+ }
351
+
352
+ // Build a lookup map from "[text](href)" to the corresponding ResourceLink.
353
+ // Multiple links can share the same text+href combination; we process them all
354
+ // with the first matching ResourceLink (they are identical in terms of match criteria).
355
+ const linkBySignature = new Map<string, ResourceLink>();
356
+ for (const link of links) {
357
+ const signature = `[${link.text}](${link.href})`;
358
+ if (!linkBySignature.has(signature)) {
359
+ linkBySignature.set(signature, link);
360
+ }
361
+ }
362
+
363
+ // Replace markdown links in content
364
+ return content.replaceAll(MARKDOWN_LINK_REGEX, (fullMatch, text: string, href: string) => {
365
+ // Find the corresponding ResourceLink
366
+ const signature = `[${text}](${href})`;
367
+ const link = linkBySignature.get(signature);
368
+
369
+ if (!link) {
370
+ // Link not in the parsed links array - leave untouched
371
+ return fullMatch;
372
+ }
373
+
374
+ // Resolve the target resource if available
375
+ const resource = link.resolvedId === undefined || resourceRegistry === undefined
376
+ ? undefined
377
+ : resourceRegistry.getResourceById(link.resolvedId);
378
+
379
+ // Find the first matching rule
380
+ const rule = findMatchingRule(link, resource, linkRewriteRules);
381
+
382
+ if (!rule) {
383
+ // No rule matches - leave untouched
384
+ return fullMatch;
385
+ }
386
+
387
+ // Parse fragment from href
388
+ const [hrefWithoutFragment, anchor] = splitHrefAnchor(href);
389
+ const fragment = anchor === undefined ? '' : `#${anchor}`;
390
+
391
+ // Build template context and render
392
+ const templateContext = buildTemplateContext(link, hrefWithoutFragment, fragment, resource, context);
393
+ return renderTemplate(rule.template, templateContext);
394
+ });
395
+ }
package/src/index.ts CHANGED
@@ -26,9 +26,10 @@
26
26
  * ```
27
27
  */
28
28
 
29
- // Export main ResourceRegistry class
29
+ // Export main ResourceRegistry class and ID generation utility
30
30
  export {
31
31
  ResourceRegistry,
32
+ generateIdFromPath,
32
33
  type CrawlOptions,
33
34
  type ResourceRegistryOptions,
34
35
  type RegistryStats,
@@ -79,5 +80,14 @@ export { parseMarkdown, type ParseResult } from './link-parser.js';
79
80
  // Export frontmatter validation
80
81
  export { validateFrontmatter } from './frontmatter-validator.js';
81
82
 
83
+ // Export content transform engine for link rewriting
84
+ export {
85
+ transformContent,
86
+ type ContentTransformOptions,
87
+ type LinkRewriteMatch,
88
+ type LinkRewriteRule,
89
+ type ResourceLookup,
90
+ } from './content-transform.js';
91
+
82
92
  // Note: link-parser and link-validator internals are NOT exported
83
93
  // They are implementation details. Users should use ResourceRegistry API.