agentmap 0.7.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +96 -0
- package/README.md +24 -0
- package/dist/cli.js +44 -12
- package/dist/cli.js.map +1 -1
- package/dist/extract/definitions.js +12 -12
- package/dist/extract/definitions.js.map +1 -1
- package/dist/extract/definitions.test.js +30 -259
- package/dist/extract/definitions.test.js.map +1 -1
- package/dist/extract/git-status.d.ts +11 -4
- package/dist/extract/git-status.d.ts.map +1 -1
- package/dist/extract/git-status.js +21 -16
- package/dist/extract/git-status.js.map +1 -1
- package/dist/extract/markdown.js +1 -1
- package/dist/extract/markdown.test.js +3 -3
- package/dist/extract/markdown.test.js.map +1 -1
- package/dist/extract/marker.js +1 -1
- package/dist/extract/marker.test.js +4 -4
- package/dist/extract/marker.test.js.map +1 -1
- package/dist/extract/submodules.d.ts +12 -0
- package/dist/extract/submodules.d.ts.map +1 -0
- package/dist/extract/submodules.js +234 -0
- package/dist/extract/submodules.js.map +1 -0
- package/dist/extract/submodules.test.d.ts +2 -0
- package/dist/extract/submodules.test.d.ts.map +1 -0
- package/dist/extract/submodules.test.js +84 -0
- package/dist/extract/submodules.test.js.map +1 -0
- package/dist/index.d.ts +4 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +10 -9
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts +10 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +41 -0
- package/dist/logger.js.map +1 -0
- package/dist/map/builder.d.ts +3 -3
- package/dist/map/builder.d.ts.map +1 -1
- package/dist/map/builder.js +59 -9
- package/dist/map/builder.js.map +1 -1
- package/dist/map/builder.test.d.ts +2 -0
- package/dist/map/builder.test.d.ts.map +1 -0
- package/dist/map/builder.test.js +66 -0
- package/dist/map/builder.test.js.map +1 -0
- package/dist/map/truncate.d.ts +7 -3
- package/dist/map/truncate.d.ts.map +1 -1
- package/dist/map/truncate.js +90 -9
- package/dist/map/truncate.js.map +1 -1
- package/dist/map/yaml.d.ts.map +1 -1
- package/dist/map/yaml.js +13 -3
- package/dist/map/yaml.js.map +1 -1
- package/dist/scanner.d.ts +9 -2
- package/dist/scanner.d.ts.map +1 -1
- package/dist/scanner.js +172 -49
- package/dist/scanner.js.map +1 -1
- package/dist/scanner.test.d.ts +2 -0
- package/dist/scanner.test.d.ts.map +1 -0
- package/dist/scanner.test.js +84 -0
- package/dist/scanner.test.js.map +1 -0
- package/dist/test-helpers/git-test-helpers.d.ts +13 -0
- package/dist/test-helpers/git-test-helpers.d.ts.map +1 -0
- package/dist/test-helpers/git-test-helpers.js +48 -0
- package/dist/test-helpers/git-test-helpers.js.map +1 -0
- package/dist/types.d.ts +42 -2
- package/dist/types.d.ts.map +1 -1
- package/package.json +15 -3
- package/src/cli.ts +164 -0
- package/src/extract/definitions.test.ts +2040 -0
- package/src/extract/definitions.ts +379 -0
- package/src/extract/git-status.test.ts +507 -0
- package/src/extract/git-status.ts +359 -0
- package/src/extract/markdown.test.ts +159 -0
- package/src/extract/markdown.ts +202 -0
- package/src/extract/marker.test.ts +566 -0
- package/src/extract/marker.ts +398 -0
- package/src/extract/submodules.test.ts +95 -0
- package/src/extract/submodules.ts +269 -0
- package/src/extract/utils.ts +27 -0
- package/src/index.ts +106 -0
- package/src/languages/cpp.ts +129 -0
- package/src/languages/go.ts +72 -0
- package/src/languages/index.ts +231 -0
- package/src/languages/javascript.ts +33 -0
- package/src/languages/python.ts +41 -0
- package/src/languages/rust.ts +72 -0
- package/src/languages/typescript.ts +74 -0
- package/src/languages/zig.ts +106 -0
- package/src/logger.ts +55 -0
- package/src/map/builder.test.ts +72 -0
- package/src/map/builder.ts +175 -0
- package/src/map/truncate.ts +188 -0
- package/src/map/yaml.ts +66 -0
- package/src/parser/index.ts +53 -0
- package/src/parser/languages.ts +64 -0
- package/src/scanner.test.ts +95 -0
- package/src/scanner.ts +364 -0
- package/src/test-helpers/git-test-helpers.ts +62 -0
- package/src/types.ts +191 -0
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
// Parse git diff output and calculate definition-level diff stats.
|
|
2
|
+
// Uses defensive git options for cross-platform reliability.
|
|
3
|
+
|
|
4
|
+
import { execSync } from 'child_process'
|
|
5
|
+
import { createConsoleLogger } from '../logger.js'
|
|
6
|
+
import type { Definition, DefinitionDiff, DiffHunk, FileDiff, FileDiffStats } from '../types.js'
|
|
7
|
+
import type { Logger } from '../logger.js'
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Defensive git options to ensure consistent output across platforms/configs
|
|
11
|
+
*/
|
|
12
|
+
const GIT_DIFF_OPTIONS = [
|
|
13
|
+
'--no-color', // No ANSI color codes
|
|
14
|
+
'--no-ext-diff', // No external diff tools
|
|
15
|
+
'--no-textconv', // No text conversion filters
|
|
16
|
+
'--no-renames', // Don't detect renames (simpler parsing)
|
|
17
|
+
].join(' ')
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Normalize file path for cross-platform compatibility
|
|
21
|
+
* - Converts backslashes to forward slashes
|
|
22
|
+
* - Handles quoted paths from git (e.g., paths with spaces/unicode)
|
|
23
|
+
*/
|
|
24
|
+
function normalizePath(path: string): string {
|
|
25
|
+
// Git quotes paths with special characters: "path/with spaces/file.ts"
|
|
26
|
+
if (path.startsWith('"') && path.endsWith('"')) {
|
|
27
|
+
path = path.slice(1, -1)
|
|
28
|
+
// Handle escaped characters in quoted paths
|
|
29
|
+
path = path.replace(/\\"/g, '"').replace(/\\\\/g, '\\')
|
|
30
|
+
}
|
|
31
|
+
// Normalize to forward slashes
|
|
32
|
+
return path.replace(/\\/g, '/')
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Safely execute a git command, returning empty string on any error
|
|
37
|
+
*/
|
|
38
|
+
function safeExec(cmd: string, dir: string, logger: Logger): string {
|
|
39
|
+
try {
|
|
40
|
+
return execSync(cmd, {
|
|
41
|
+
cwd: dir,
|
|
42
|
+
encoding: 'utf8',
|
|
43
|
+
maxBuffer: 1024 * 1024 * 10, // 10MB
|
|
44
|
+
stdio: ['pipe', 'pipe', 'pipe'], // Capture stderr too
|
|
45
|
+
})
|
|
46
|
+
} catch (err) {
|
|
47
|
+
const message = err instanceof Error ? err.message : String(err)
|
|
48
|
+
logger.warn(`Warning: git diff failed: ${message}`)
|
|
49
|
+
return ''
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Parse git diff --numstat output for file-level stats
|
|
55
|
+
* Format: "added<TAB>deleted<TAB>path" or "-<TAB>-<TAB>path" for binary
|
|
56
|
+
*
|
|
57
|
+
* This is much more reliable than parsing full diff output.
|
|
58
|
+
*/
|
|
59
|
+
export function parseNumstat(numstatOutput: string): Map<string, FileDiffStats> {
|
|
60
|
+
const stats = new Map<string, FileDiffStats>()
|
|
61
|
+
|
|
62
|
+
if (!numstatOutput.trim()) {
|
|
63
|
+
return stats
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const lines = numstatOutput.split('\n')
|
|
67
|
+
|
|
68
|
+
for (const line of lines) {
|
|
69
|
+
if (!line.trim()) continue
|
|
70
|
+
|
|
71
|
+
// Split by tab - format is: added<TAB>deleted<TAB>path
|
|
72
|
+
const parts = line.split('\t')
|
|
73
|
+
if (parts.length < 3) continue
|
|
74
|
+
|
|
75
|
+
const [addedStr, deletedStr, ...pathParts] = parts
|
|
76
|
+
const path = normalizePath(pathParts.join('\t')) // Path might contain tabs (rare but possible)
|
|
77
|
+
|
|
78
|
+
// Binary files show as "-" for both counts - skip them
|
|
79
|
+
if (addedStr === '-' || deletedStr === '-') {
|
|
80
|
+
continue
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const added = parseInt(addedStr, 10)
|
|
84
|
+
const deleted = parseInt(deletedStr, 10)
|
|
85
|
+
|
|
86
|
+
// Skip if parsing failed or no changes
|
|
87
|
+
if (isNaN(added) || isNaN(deleted)) continue
|
|
88
|
+
if (added === 0 && deleted === 0) continue
|
|
89
|
+
|
|
90
|
+
stats.set(path, { added, deleted })
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return stats
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Parse a hunk header like "@@ -10,5 +12,7 @@" or "@@ -10 +12,7 @@"
|
|
98
|
+
*/
|
|
99
|
+
export function parseHunkHeader(line: string): DiffHunk | null {
|
|
100
|
+
// Match: @@ -oldStart[,oldCount] +newStart[,newCount] @@
|
|
101
|
+
const match = line.match(/@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/)
|
|
102
|
+
if (!match) return null
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
oldStart: parseInt(match[1], 10),
|
|
106
|
+
oldCount: match[2] ? parseInt(match[2], 10) : 1,
|
|
107
|
+
newStart: parseInt(match[3], 10),
|
|
108
|
+
newCount: match[4] ? parseInt(match[4], 10) : 1,
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Parse git diff output into structured file diffs (for definition-level analysis)
|
|
114
|
+
* Only extracts hunk positions, not content.
|
|
115
|
+
*/
|
|
116
|
+
export function parseDiff(diffOutput: string): Map<string, FileDiff> {
|
|
117
|
+
const files = new Map<string, FileDiff>()
|
|
118
|
+
|
|
119
|
+
if (!diffOutput.trim()) {
|
|
120
|
+
return files
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const lines = diffOutput.split('\n')
|
|
124
|
+
|
|
125
|
+
let currentFile: string | null = null
|
|
126
|
+
let hunks: DiffHunk[] = []
|
|
127
|
+
|
|
128
|
+
for (const line of lines) {
|
|
129
|
+
// New file header: "diff --git a/path b/path"
|
|
130
|
+
if (line.startsWith('diff --git ')) {
|
|
131
|
+
// Save previous file
|
|
132
|
+
if (currentFile && hunks.length > 0) {
|
|
133
|
+
files.set(currentFile, { path: currentFile, hunks })
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Extract path from "diff --git a/path b/path"
|
|
137
|
+
// Use the b/ path (destination) as the canonical path
|
|
138
|
+
const match = line.match(/diff --git a\/.+ b\/(.+)/)
|
|
139
|
+
if (match) {
|
|
140
|
+
currentFile = normalizePath(match[1])
|
|
141
|
+
} else {
|
|
142
|
+
currentFile = null
|
|
143
|
+
}
|
|
144
|
+
hunks = []
|
|
145
|
+
continue
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Skip binary files indicator
|
|
149
|
+
if (line.startsWith('Binary files ')) {
|
|
150
|
+
currentFile = null
|
|
151
|
+
hunks = []
|
|
152
|
+
continue
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Hunk header
|
|
156
|
+
if (line.startsWith('@@') && currentFile) {
|
|
157
|
+
try {
|
|
158
|
+
const hunk = parseHunkHeader(line)
|
|
159
|
+
if (hunk) {
|
|
160
|
+
hunks.push(hunk)
|
|
161
|
+
}
|
|
162
|
+
} catch {
|
|
163
|
+
// Skip malformed hunk headers
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Save last file
|
|
169
|
+
if (currentFile && hunks.length > 0) {
|
|
170
|
+
files.set(currentFile, { path: currentFile, hunks })
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return files
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Get file-level diff stats using --numstat (most reliable)
|
|
178
|
+
*/
|
|
179
|
+
export function getFileStats(dir: string, logger: Logger = createConsoleLogger()): Map<string, FileDiffStats> {
|
|
180
|
+
const cmd = `git diff ${GIT_DIFF_OPTIONS} --numstat HEAD`
|
|
181
|
+
const output = safeExec(cmd, dir, logger)
|
|
182
|
+
return parseNumstat(output)
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Get hunk-level diff for definition analysis
|
|
187
|
+
*/
|
|
188
|
+
export function getHunkDiff(dir: string, logger: Logger = createConsoleLogger()): Map<string, FileDiff> {
|
|
189
|
+
const cmd = `git diff ${GIT_DIFF_OPTIONS} --unified=0 HEAD`
|
|
190
|
+
const output = safeExec(cmd, dir, logger)
|
|
191
|
+
return parseDiff(output)
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Combined function to get all diff data needed
|
|
196
|
+
* Returns both file stats and hunk data, with error isolation.
|
|
197
|
+
* Filters out submodule paths to prevent misleading stats (submodule pointer
|
|
198
|
+
* changes show as 1/1 in numstat, and produce "Subproject commit" pseudo-diffs).
|
|
199
|
+
*/
|
|
200
|
+
export function getAllDiffData(dir: string, submodulePaths?: Set<string>): {
|
|
201
|
+
fileStats: Map<string, FileDiffStats>
|
|
202
|
+
fileDiffs: Map<string, FileDiff>
|
|
203
|
+
}
|
|
204
|
+
export function getAllDiffData(dir: string, submodulePaths: Set<string> | undefined, logger: Logger): {
|
|
205
|
+
fileStats: Map<string, FileDiffStats>
|
|
206
|
+
fileDiffs: Map<string, FileDiff>
|
|
207
|
+
}
|
|
208
|
+
export function getAllDiffData(
|
|
209
|
+
dir: string,
|
|
210
|
+
submodulePaths?: Set<string>,
|
|
211
|
+
logger: Logger = createConsoleLogger()
|
|
212
|
+
): {
|
|
213
|
+
fileStats: Map<string, FileDiffStats>
|
|
214
|
+
fileDiffs: Map<string, FileDiff>
|
|
215
|
+
} {
|
|
216
|
+
// Get file stats (for file-level +N-M display)
|
|
217
|
+
let fileStats: Map<string, FileDiffStats>
|
|
218
|
+
try {
|
|
219
|
+
fileStats = getFileStats(dir, logger)
|
|
220
|
+
} catch (err) {
|
|
221
|
+
const message = err instanceof Error ? err.message : String(err)
|
|
222
|
+
logger.warn(`Warning: failed to get file stats: ${message}`)
|
|
223
|
+
fileStats = new Map()
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Get hunk data (for definition-level analysis)
|
|
227
|
+
let fileDiffs: Map<string, FileDiff>
|
|
228
|
+
try {
|
|
229
|
+
fileDiffs = getHunkDiff(dir, logger)
|
|
230
|
+
} catch (err) {
|
|
231
|
+
const message = err instanceof Error ? err.message : String(err)
|
|
232
|
+
logger.warn(`Warning: failed to get hunk diff: ${message}`)
|
|
233
|
+
fileDiffs = new Map()
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Filter out submodule paths - their diff output is misleading:
|
|
237
|
+
// - numstat shows 1/1 for pointer changes (not real line counts)
|
|
238
|
+
// - unified diff shows "Subproject commit" pseudo-patches
|
|
239
|
+
if (submodulePaths && submodulePaths.size > 0) {
|
|
240
|
+
for (const subPath of submodulePaths) {
|
|
241
|
+
fileStats.delete(subPath)
|
|
242
|
+
fileDiffs.delete(subPath)
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return { fileStats, fileDiffs }
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Calculate diff stats for a single definition based on file hunks
|
|
251
|
+
*
|
|
252
|
+
* A definition is "added" if all its lines are new additions.
|
|
253
|
+
* Otherwise it's "updated" if any of its lines were changed.
|
|
254
|
+
*/
|
|
255
|
+
export function calculateDefinitionDiff(
|
|
256
|
+
def: Definition,
|
|
257
|
+
hunks: DiffHunk[]
|
|
258
|
+
): DefinitionDiff | null {
|
|
259
|
+
try {
|
|
260
|
+
const defStart = def.line
|
|
261
|
+
const defEnd = def.endLine
|
|
262
|
+
const defLineCount = defEnd - defStart + 1
|
|
263
|
+
|
|
264
|
+
let addedInDef = 0
|
|
265
|
+
let deletedInDef = 0
|
|
266
|
+
|
|
267
|
+
for (const hunk of hunks) {
|
|
268
|
+
// Check if this hunk's NEW lines overlap with definition range
|
|
269
|
+
const hunkNewStart = hunk.newStart
|
|
270
|
+
const hunkNewEnd = hunk.newStart + hunk.newCount - 1
|
|
271
|
+
|
|
272
|
+
// Calculate overlap between [defStart, defEnd] and [hunkNewStart, hunkNewEnd]
|
|
273
|
+
const overlapStart = Math.max(defStart, hunkNewStart)
|
|
274
|
+
const overlapEnd = Math.min(defEnd, hunkNewEnd)
|
|
275
|
+
|
|
276
|
+
if (overlapStart <= overlapEnd) {
|
|
277
|
+
// There's overlap - count the added lines in this overlap
|
|
278
|
+
const addedLines = overlapEnd - overlapStart + 1
|
|
279
|
+
addedInDef += addedLines
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// For deleted lines, check if hunk's new position overlaps with definition
|
|
283
|
+
if (hunk.oldCount > 0) {
|
|
284
|
+
if (hunkNewStart <= defEnd && hunkNewEnd >= defStart) {
|
|
285
|
+
deletedInDef += hunk.oldCount
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// No changes in this definition
|
|
291
|
+
if (addedInDef === 0 && deletedInDef === 0) {
|
|
292
|
+
return null
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Determine status
|
|
296
|
+
// "added" = the entire definition consists of new lines AND nothing was deleted
|
|
297
|
+
const status = addedInDef >= defLineCount && deletedInDef === 0 ? 'added' : 'updated'
|
|
298
|
+
|
|
299
|
+
return {
|
|
300
|
+
status,
|
|
301
|
+
added: addedInDef,
|
|
302
|
+
deleted: deletedInDef,
|
|
303
|
+
}
|
|
304
|
+
} catch {
|
|
305
|
+
// Any calculation error - return null (no diff info)
|
|
306
|
+
return null
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Calculate total diff stats for a file by summing all hunks
|
|
312
|
+
* @deprecated Use getFileStats() with --numstat instead for reliability
|
|
313
|
+
*/
|
|
314
|
+
export function calculateFileDiff(hunks: DiffHunk[]): FileDiffStats | null {
|
|
315
|
+
if (hunks.length === 0) {
|
|
316
|
+
return null
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
let added = 0
|
|
320
|
+
let deleted = 0
|
|
321
|
+
|
|
322
|
+
for (const hunk of hunks) {
|
|
323
|
+
added += hunk.newCount
|
|
324
|
+
deleted += hunk.oldCount
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
if (added === 0 && deleted === 0) {
|
|
328
|
+
return null
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
return { added, deleted }
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* Apply diff information to definitions for a file
|
|
336
|
+
*/
|
|
337
|
+
export function applyDiffToDefinitions(
|
|
338
|
+
definitions: Definition[],
|
|
339
|
+
fileDiff: FileDiff | undefined
|
|
340
|
+
): Definition[] {
|
|
341
|
+
if (!fileDiff || fileDiff.hunks.length === 0) {
|
|
342
|
+
return definitions
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
return definitions.map(def => {
|
|
346
|
+
try {
|
|
347
|
+
const diff = calculateDefinitionDiff(def, fileDiff.hunks)
|
|
348
|
+
if (diff) {
|
|
349
|
+
return { ...def, diff }
|
|
350
|
+
}
|
|
351
|
+
} catch {
|
|
352
|
+
// Skip diff for this definition on error
|
|
353
|
+
}
|
|
354
|
+
return def
|
|
355
|
+
})
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// Legacy exports for backwards compatibility
|
|
359
|
+
export { getFileStats as getGitDiffAll }
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
// Tests for markdown description extraction.
|
|
2
|
+
|
|
3
|
+
import { describe, expect, test } from 'bun:test'
|
|
4
|
+
import { writeFile, unlink, mkdir } from 'fs/promises'
|
|
5
|
+
import { join } from 'path'
|
|
6
|
+
import { tmpdir } from 'os'
|
|
7
|
+
import { extractMarkdownDescription } from './markdown.js'
|
|
8
|
+
|
|
9
|
+
const TEST_DIR = join(tmpdir(), 'agentmap-markdown-test')
|
|
10
|
+
|
|
11
|
+
async function testMarkdown(content: string): Promise<string | null> {
|
|
12
|
+
await mkdir(TEST_DIR, { recursive: true })
|
|
13
|
+
const filepath = join(TEST_DIR, 'README.md')
|
|
14
|
+
await writeFile(filepath, content, 'utf8')
|
|
15
|
+
try {
|
|
16
|
+
return await extractMarkdownDescription(filepath)
|
|
17
|
+
} finally {
|
|
18
|
+
await unlink(filepath).catch(() => {})
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
describe('Markdown extraction', () => {
|
|
23
|
+
test('simple heading and paragraph', async () => {
|
|
24
|
+
const desc = await testMarkdown(`# My Project
|
|
25
|
+
|
|
26
|
+
This is a description of my project.
|
|
27
|
+
It does amazing things.
|
|
28
|
+
`)
|
|
29
|
+
expect(desc).toMatchInlineSnapshot(`
|
|
30
|
+
"My Project
|
|
31
|
+
This is a description of my project.
|
|
32
|
+
It does amazing things."
|
|
33
|
+
`)
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
test('ignores HTML comments', async () => {
|
|
37
|
+
const desc = await testMarkdown(`<!-- This is a comment -->
|
|
38
|
+
# Title
|
|
39
|
+
|
|
40
|
+
Some content here.
|
|
41
|
+
`)
|
|
42
|
+
expect(desc).toMatchInlineSnapshot(`
|
|
43
|
+
"Title
|
|
44
|
+
Some content here."
|
|
45
|
+
`)
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
test('ignores badge images', async () => {
|
|
49
|
+
const desc = await testMarkdown(`
|
|
50
|
+

|
|
51
|
+
|
|
52
|
+
# My Library
|
|
53
|
+
|
|
54
|
+
A useful library.
|
|
55
|
+
`)
|
|
56
|
+
expect(desc).toMatchInlineSnapshot(`
|
|
57
|
+
"My Library
|
|
58
|
+
A useful library."
|
|
59
|
+
`)
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
test('handles lists', async () => {
|
|
63
|
+
const desc = await testMarkdown(`# Features
|
|
64
|
+
|
|
65
|
+
- Feature one
|
|
66
|
+
- Feature two
|
|
67
|
+
- Feature three
|
|
68
|
+
`)
|
|
69
|
+
expect(desc).toMatchInlineSnapshot(`
|
|
70
|
+
"Features
|
|
71
|
+
- Feature one
|
|
72
|
+
- Feature two
|
|
73
|
+
- Feature three"
|
|
74
|
+
`)
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
test('handles code blocks', async () => {
|
|
78
|
+
const desc = await testMarkdown(`# Usage
|
|
79
|
+
|
|
80
|
+
Install the package:
|
|
81
|
+
|
|
82
|
+
\`\`\`bash
|
|
83
|
+
npm install mypackage
|
|
84
|
+
\`\`\`
|
|
85
|
+
`)
|
|
86
|
+
expect(desc).toMatchInlineSnapshot(`
|
|
87
|
+
"Usage
|
|
88
|
+
Install the package:
|
|
89
|
+
\`\`\`bash
|
|
90
|
+
npm install mypackage
|
|
91
|
+
\`\`\`"
|
|
92
|
+
`)
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
test('handles blockquotes', async () => {
|
|
96
|
+
const desc = await testMarkdown(`# Quote Example
|
|
97
|
+
|
|
98
|
+
> This is a blockquote
|
|
99
|
+
> with multiple lines
|
|
100
|
+
`)
|
|
101
|
+
expect(desc).toMatchInlineSnapshot(`
|
|
102
|
+
"Quote Example
|
|
103
|
+
> This is a blockquote
|
|
104
|
+
with multiple lines"
|
|
105
|
+
`)
|
|
106
|
+
})
|
|
107
|
+
|
|
108
|
+
test('truncates long content with indicator', async () => {
|
|
109
|
+
// Create 40 lines without blank lines between (so all fit in first 50 lines read)
|
|
110
|
+
const lines = Array.from({ length: 40 }, (_, i) => `- Item ${i + 1}`).join('\n')
|
|
111
|
+
const desc = await testMarkdown(`# Title\n\n${lines}`)
|
|
112
|
+
const descLines = desc?.split('\n') ?? []
|
|
113
|
+
// 20 content lines + 1 truncation indicator (Title + 19 items, then indicator)
|
|
114
|
+
expect(descLines.length).toBe(21)
|
|
115
|
+
expect(descLines[20]).toBe('... and 21 more lines')
|
|
116
|
+
})
|
|
117
|
+
|
|
118
|
+
test('returns null for empty markdown', async () => {
|
|
119
|
+
const desc = await testMarkdown(``)
|
|
120
|
+
expect(desc).toBeNull()
|
|
121
|
+
})
|
|
122
|
+
|
|
123
|
+
test('returns null for only HTML comments', async () => {
|
|
124
|
+
const desc = await testMarkdown(`<!-- Just a comment -->
|
|
125
|
+
|
|
126
|
+
<!-- Another comment -->
|
|
127
|
+
`)
|
|
128
|
+
expect(desc).toBeNull()
|
|
129
|
+
})
|
|
130
|
+
|
|
131
|
+
test('handles mixed content', async () => {
|
|
132
|
+
const desc = await testMarkdown(`<!-- Header comment -->
|
|
133
|
+

|
|
134
|
+
|
|
135
|
+
# agentmap
|
|
136
|
+
|
|
137
|
+
A compact, YAML-based inventory of your codebase.
|
|
138
|
+
|
|
139
|
+
## Features
|
|
140
|
+
|
|
141
|
+
- Fast scanning
|
|
142
|
+
- Tree-sitter parsing
|
|
143
|
+
|
|
144
|
+
\`\`\`bash
|
|
145
|
+
npm install agentmap
|
|
146
|
+
\`\`\`
|
|
147
|
+
`)
|
|
148
|
+
expect(desc).toMatchInlineSnapshot(`
|
|
149
|
+
"agentmap
|
|
150
|
+
A compact, YAML-based inventory of your codebase.
|
|
151
|
+
Features
|
|
152
|
+
- Fast scanning
|
|
153
|
+
- Tree-sitter parsing
|
|
154
|
+
\`\`\`bash
|
|
155
|
+
npm install agentmap
|
|
156
|
+
\`\`\`"
|
|
157
|
+
`)
|
|
158
|
+
})
|
|
159
|
+
})
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
// Extract description from markdown files using marked AST.
|
|
2
|
+
|
|
3
|
+
import { Lexer, type Token, type Tokens } from 'marked'
|
|
4
|
+
import { readFirstLines } from './utils.js'
|
|
5
|
+
|
|
6
|
+
const MAX_LINES = 50
|
|
7
|
+
const MAX_DESC_LINES = 20
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Truncate lines to MAX_DESC_LINES, adding indicator if truncated
|
|
11
|
+
*/
|
|
12
|
+
function truncateDescription(lines: string[]): string {
|
|
13
|
+
const trimmed = lines.join('\n').trim()
|
|
14
|
+
const trimmedLines = trimmed.split('\n')
|
|
15
|
+
|
|
16
|
+
if (trimmedLines.length <= MAX_DESC_LINES) {
|
|
17
|
+
return trimmed
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const truncated = trimmedLines.slice(0, MAX_DESC_LINES)
|
|
21
|
+
const remaining = trimmedLines.length - MAX_DESC_LINES
|
|
22
|
+
truncated.push(`... and ${remaining} more lines`)
|
|
23
|
+
return truncated.join('\n')
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Extract plain text from inline tokens, skipping images.
|
|
28
|
+
*/
|
|
29
|
+
function extractInlineText(tokens: Token[] | undefined): string {
|
|
30
|
+
if (!tokens) return ''
|
|
31
|
+
|
|
32
|
+
const parts: string[] = []
|
|
33
|
+
for (const token of tokens) {
|
|
34
|
+
// Skip images
|
|
35
|
+
if (token.type === 'image') {
|
|
36
|
+
continue
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Handle text
|
|
40
|
+
if (token.type === 'text') {
|
|
41
|
+
const text = token as Tokens.Text
|
|
42
|
+
if (text.text) {
|
|
43
|
+
parts.push(text.text)
|
|
44
|
+
}
|
|
45
|
+
continue
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Handle links - extract the text content
|
|
49
|
+
if (token.type === 'link') {
|
|
50
|
+
const link = token as Tokens.Link
|
|
51
|
+
if (link.text) {
|
|
52
|
+
parts.push(link.text)
|
|
53
|
+
}
|
|
54
|
+
continue
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Handle strong/em - extract nested text
|
|
58
|
+
if (token.type === 'strong' || token.type === 'em') {
|
|
59
|
+
const styled = token as Tokens.Strong | Tokens.Em
|
|
60
|
+
const inner = extractInlineText(styled.tokens)
|
|
61
|
+
if (inner) {
|
|
62
|
+
parts.push(inner)
|
|
63
|
+
}
|
|
64
|
+
continue
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Handle codespan (inline code)
|
|
68
|
+
if (token.type === 'codespan') {
|
|
69
|
+
const code = token as Tokens.Codespan
|
|
70
|
+
if (code.text) {
|
|
71
|
+
parts.push('`' + code.text + '`')
|
|
72
|
+
}
|
|
73
|
+
continue
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return parts.join('')
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Extract text content from markdown tokens recursively.
|
|
82
|
+
* Skips HTML, comments, and images. Returns plain text lines.
|
|
83
|
+
*/
|
|
84
|
+
function extractTextFromTokens(tokens: Token[]): string[] {
|
|
85
|
+
const lines: string[] = []
|
|
86
|
+
|
|
87
|
+
for (const token of tokens) {
|
|
88
|
+
// Skip HTML (includes comments)
|
|
89
|
+
if (token.type === 'html') {
|
|
90
|
+
continue
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Skip spaces
|
|
94
|
+
if (token.type === 'space') {
|
|
95
|
+
continue
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Handle headings - extract inline text
|
|
99
|
+
if (token.type === 'heading') {
|
|
100
|
+
const heading = token as Tokens.Heading
|
|
101
|
+
const text = extractInlineText(heading.tokens)
|
|
102
|
+
if (text) {
|
|
103
|
+
lines.push(text)
|
|
104
|
+
}
|
|
105
|
+
continue
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Handle paragraphs - extract inline text (skips images)
|
|
109
|
+
if (token.type === 'paragraph') {
|
|
110
|
+
const para = token as Tokens.Paragraph
|
|
111
|
+
const text = extractInlineText(para.tokens)
|
|
112
|
+
if (text) {
|
|
113
|
+
lines.push(text)
|
|
114
|
+
}
|
|
115
|
+
continue
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Handle lists - extract text from items
|
|
119
|
+
if (token.type === 'list') {
|
|
120
|
+
const list = token as Tokens.List
|
|
121
|
+
for (const item of list.items) {
|
|
122
|
+
const text = extractInlineText(item.tokens)
|
|
123
|
+
if (text) {
|
|
124
|
+
lines.push('- ' + text.split('\n')[0])
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
continue
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Handle blockquotes - extract nested tokens
|
|
131
|
+
if (token.type === 'blockquote') {
|
|
132
|
+
const quote = token as Tokens.Blockquote
|
|
133
|
+
if (quote.tokens) {
|
|
134
|
+
const nestedLines = extractTextFromTokens(quote.tokens)
|
|
135
|
+
lines.push(...nestedLines.map(l => '> ' + l))
|
|
136
|
+
}
|
|
137
|
+
continue
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Handle code blocks - include with fence
|
|
141
|
+
if (token.type === 'code') {
|
|
142
|
+
const code = token as Tokens.Code
|
|
143
|
+
if (code.lang) {
|
|
144
|
+
lines.push('```' + code.lang)
|
|
145
|
+
} else {
|
|
146
|
+
lines.push('```')
|
|
147
|
+
}
|
|
148
|
+
lines.push(...code.text.split('\n'))
|
|
149
|
+
lines.push('```')
|
|
150
|
+
continue
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Handle text tokens (inline)
|
|
154
|
+
if (token.type === 'text') {
|
|
155
|
+
const text = token as Tokens.Text
|
|
156
|
+
if (text.text) {
|
|
157
|
+
lines.push(text.text)
|
|
158
|
+
}
|
|
159
|
+
continue
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return lines
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Extract description from a markdown file using marked lexer.
|
|
168
|
+
* Parses first N lines, extracts plain text from AST nodes,
|
|
169
|
+
* ignoring HTML comments and images.
|
|
170
|
+
* Falls back to raw content if parsing fails.
|
|
171
|
+
*/
|
|
172
|
+
export async function extractMarkdownDescription(filepath: string): Promise<string | null> {
|
|
173
|
+
const head = await readFirstLines(filepath, MAX_LINES)
|
|
174
|
+
if (head === null) {
|
|
175
|
+
// File couldn't be read - skip silently
|
|
176
|
+
return null
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
try {
|
|
180
|
+
// Parse markdown to tokens using marked lexer
|
|
181
|
+
const lexer = new Lexer()
|
|
182
|
+
const tokens = lexer.lex(head)
|
|
183
|
+
|
|
184
|
+
// Extract text from tokens
|
|
185
|
+
const lines = extractTextFromTokens(tokens)
|
|
186
|
+
|
|
187
|
+
// Filter empty lines
|
|
188
|
+
const contentLines = lines.filter(l => l.trim() !== '')
|
|
189
|
+
if (contentLines.length === 0) {
|
|
190
|
+
return null
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
return truncateDescription(contentLines)
|
|
194
|
+
} catch {
|
|
195
|
+
// Fallback: return raw content if parsing fails
|
|
196
|
+
const lines = head.split('\n').filter(l => l.trim() !== '')
|
|
197
|
+
if (lines.length === 0) {
|
|
198
|
+
return null
|
|
199
|
+
}
|
|
200
|
+
return truncateDescription(lines)
|
|
201
|
+
}
|
|
202
|
+
}
|