agentmap 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/CHANGELOG.md +96 -0
  2. package/README.md +24 -0
  3. package/dist/cli.js +44 -12
  4. package/dist/cli.js.map +1 -1
  5. package/dist/extract/definitions.js +12 -12
  6. package/dist/extract/definitions.js.map +1 -1
  7. package/dist/extract/definitions.test.js +30 -259
  8. package/dist/extract/definitions.test.js.map +1 -1
  9. package/dist/extract/git-status.d.ts +11 -4
  10. package/dist/extract/git-status.d.ts.map +1 -1
  11. package/dist/extract/git-status.js +21 -16
  12. package/dist/extract/git-status.js.map +1 -1
  13. package/dist/extract/markdown.js +1 -1
  14. package/dist/extract/markdown.test.js +3 -3
  15. package/dist/extract/markdown.test.js.map +1 -1
  16. package/dist/extract/marker.js +1 -1
  17. package/dist/extract/marker.test.js +4 -4
  18. package/dist/extract/marker.test.js.map +1 -1
  19. package/dist/extract/submodules.d.ts +12 -0
  20. package/dist/extract/submodules.d.ts.map +1 -0
  21. package/dist/extract/submodules.js +234 -0
  22. package/dist/extract/submodules.js.map +1 -0
  23. package/dist/extract/submodules.test.d.ts +2 -0
  24. package/dist/extract/submodules.test.d.ts.map +1 -0
  25. package/dist/extract/submodules.test.js +84 -0
  26. package/dist/extract/submodules.test.js.map +1 -0
  27. package/dist/index.d.ts +4 -1
  28. package/dist/index.d.ts.map +1 -1
  29. package/dist/index.js +10 -9
  30. package/dist/index.js.map +1 -1
  31. package/dist/logger.d.ts +10 -0
  32. package/dist/logger.d.ts.map +1 -0
  33. package/dist/logger.js +41 -0
  34. package/dist/logger.js.map +1 -0
  35. package/dist/map/builder.d.ts +3 -3
  36. package/dist/map/builder.d.ts.map +1 -1
  37. package/dist/map/builder.js +59 -9
  38. package/dist/map/builder.js.map +1 -1
  39. package/dist/map/builder.test.d.ts +2 -0
  40. package/dist/map/builder.test.d.ts.map +1 -0
  41. package/dist/map/builder.test.js +66 -0
  42. package/dist/map/builder.test.js.map +1 -0
  43. package/dist/map/truncate.d.ts +7 -3
  44. package/dist/map/truncate.d.ts.map +1 -1
  45. package/dist/map/truncate.js +90 -9
  46. package/dist/map/truncate.js.map +1 -1
  47. package/dist/map/yaml.d.ts.map +1 -1
  48. package/dist/map/yaml.js +13 -3
  49. package/dist/map/yaml.js.map +1 -1
  50. package/dist/scanner.d.ts +9 -2
  51. package/dist/scanner.d.ts.map +1 -1
  52. package/dist/scanner.js +172 -49
  53. package/dist/scanner.js.map +1 -1
  54. package/dist/scanner.test.d.ts +2 -0
  55. package/dist/scanner.test.d.ts.map +1 -0
  56. package/dist/scanner.test.js +84 -0
  57. package/dist/scanner.test.js.map +1 -0
  58. package/dist/test-helpers/git-test-helpers.d.ts +13 -0
  59. package/dist/test-helpers/git-test-helpers.d.ts.map +1 -0
  60. package/dist/test-helpers/git-test-helpers.js +48 -0
  61. package/dist/test-helpers/git-test-helpers.js.map +1 -0
  62. package/dist/types.d.ts +42 -2
  63. package/dist/types.d.ts.map +1 -1
  64. package/package.json +15 -3
  65. package/src/cli.ts +164 -0
  66. package/src/extract/definitions.test.ts +2040 -0
  67. package/src/extract/definitions.ts +379 -0
  68. package/src/extract/git-status.test.ts +507 -0
  69. package/src/extract/git-status.ts +359 -0
  70. package/src/extract/markdown.test.ts +159 -0
  71. package/src/extract/markdown.ts +202 -0
  72. package/src/extract/marker.test.ts +566 -0
  73. package/src/extract/marker.ts +398 -0
  74. package/src/extract/submodules.test.ts +95 -0
  75. package/src/extract/submodules.ts +269 -0
  76. package/src/extract/utils.ts +27 -0
  77. package/src/index.ts +106 -0
  78. package/src/languages/cpp.ts +129 -0
  79. package/src/languages/go.ts +72 -0
  80. package/src/languages/index.ts +231 -0
  81. package/src/languages/javascript.ts +33 -0
  82. package/src/languages/python.ts +41 -0
  83. package/src/languages/rust.ts +72 -0
  84. package/src/languages/typescript.ts +74 -0
  85. package/src/languages/zig.ts +106 -0
  86. package/src/logger.ts +55 -0
  87. package/src/map/builder.test.ts +72 -0
  88. package/src/map/builder.ts +175 -0
  89. package/src/map/truncate.ts +188 -0
  90. package/src/map/yaml.ts +66 -0
  91. package/src/parser/index.ts +53 -0
  92. package/src/parser/languages.ts +64 -0
  93. package/src/scanner.test.ts +95 -0
  94. package/src/scanner.ts +364 -0
  95. package/src/test-helpers/git-test-helpers.ts +62 -0
  96. package/src/types.ts +191 -0
@@ -0,0 +1,359 @@
1
+ // Parse git diff output and calculate definition-level diff stats.
2
+ // Uses defensive git options for cross-platform reliability.
3
+
4
+ import { execSync } from 'child_process'
5
+ import { createConsoleLogger } from '../logger.js'
6
+ import type { Definition, DefinitionDiff, DiffHunk, FileDiff, FileDiffStats } from '../types.js'
7
+ import type { Logger } from '../logger.js'
8
+
9
+ /**
10
+ * Defensive git options to ensure consistent output across platforms/configs
11
+ */
12
+ const GIT_DIFF_OPTIONS = [
13
+ '--no-color', // No ANSI color codes
14
+ '--no-ext-diff', // No external diff tools
15
+ '--no-textconv', // No text conversion filters
16
+ '--no-renames', // Don't detect renames (simpler parsing)
17
+ ].join(' ')
18
+
19
+ /**
20
+ * Normalize file path for cross-platform compatibility
21
+ * - Converts backslashes to forward slashes
22
+ * - Handles quoted paths from git (e.g., paths with spaces/unicode)
23
+ */
24
+ function normalizePath(path: string): string {
25
+ // Git quotes paths with special characters: "path/with spaces/file.ts"
26
+ if (path.startsWith('"') && path.endsWith('"')) {
27
+ path = path.slice(1, -1)
28
+ // Handle escaped characters in quoted paths
29
+ path = path.replace(/\\"/g, '"').replace(/\\\\/g, '\\')
30
+ }
31
+ // Normalize to forward slashes
32
+ return path.replace(/\\/g, '/')
33
+ }
34
+
35
+ /**
36
+ * Safely execute a git command, returning empty string on any error
37
+ */
38
+ function safeExec(cmd: string, dir: string, logger: Logger): string {
39
+ try {
40
+ return execSync(cmd, {
41
+ cwd: dir,
42
+ encoding: 'utf8',
43
+ maxBuffer: 1024 * 1024 * 10, // 10MB
44
+ stdio: ['pipe', 'pipe', 'pipe'], // Capture stderr too
45
+ })
46
+ } catch (err) {
47
+ const message = err instanceof Error ? err.message : String(err)
48
+ logger.warn(`Warning: git diff failed: ${message}`)
49
+ return ''
50
+ }
51
+ }
52
+
53
+ /**
54
+ * Parse git diff --numstat output for file-level stats
55
+ * Format: "added<TAB>deleted<TAB>path" or "-<TAB>-<TAB>path" for binary
56
+ *
57
+ * This is much more reliable than parsing full diff output.
58
+ */
59
+ export function parseNumstat(numstatOutput: string): Map<string, FileDiffStats> {
60
+ const stats = new Map<string, FileDiffStats>()
61
+
62
+ if (!numstatOutput.trim()) {
63
+ return stats
64
+ }
65
+
66
+ const lines = numstatOutput.split('\n')
67
+
68
+ for (const line of lines) {
69
+ if (!line.trim()) continue
70
+
71
+ // Split by tab - format is: added<TAB>deleted<TAB>path
72
+ const parts = line.split('\t')
73
+ if (parts.length < 3) continue
74
+
75
+ const [addedStr, deletedStr, ...pathParts] = parts
76
+ const path = normalizePath(pathParts.join('\t')) // Path might contain tabs (rare but possible)
77
+
78
+ // Binary files show as "-" for both counts - skip them
79
+ if (addedStr === '-' || deletedStr === '-') {
80
+ continue
81
+ }
82
+
83
+ const added = parseInt(addedStr, 10)
84
+ const deleted = parseInt(deletedStr, 10)
85
+
86
+ // Skip if parsing failed or no changes
87
+ if (isNaN(added) || isNaN(deleted)) continue
88
+ if (added === 0 && deleted === 0) continue
89
+
90
+ stats.set(path, { added, deleted })
91
+ }
92
+
93
+ return stats
94
+ }
95
+
96
+ /**
97
+ * Parse a hunk header like "@@ -10,5 +12,7 @@" or "@@ -10 +12,7 @@"
98
+ */
99
+ export function parseHunkHeader(line: string): DiffHunk | null {
100
+ // Match: @@ -oldStart[,oldCount] +newStart[,newCount] @@
101
+ const match = line.match(/@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/)
102
+ if (!match) return null
103
+
104
+ return {
105
+ oldStart: parseInt(match[1], 10),
106
+ oldCount: match[2] ? parseInt(match[2], 10) : 1,
107
+ newStart: parseInt(match[3], 10),
108
+ newCount: match[4] ? parseInt(match[4], 10) : 1,
109
+ }
110
+ }
111
+
112
+ /**
113
+ * Parse git diff output into structured file diffs (for definition-level analysis)
114
+ * Only extracts hunk positions, not content.
115
+ */
116
+ export function parseDiff(diffOutput: string): Map<string, FileDiff> {
117
+ const files = new Map<string, FileDiff>()
118
+
119
+ if (!diffOutput.trim()) {
120
+ return files
121
+ }
122
+
123
+ const lines = diffOutput.split('\n')
124
+
125
+ let currentFile: string | null = null
126
+ let hunks: DiffHunk[] = []
127
+
128
+ for (const line of lines) {
129
+ // New file header: "diff --git a/path b/path"
130
+ if (line.startsWith('diff --git ')) {
131
+ // Save previous file
132
+ if (currentFile && hunks.length > 0) {
133
+ files.set(currentFile, { path: currentFile, hunks })
134
+ }
135
+
136
+ // Extract path from "diff --git a/path b/path"
137
+ // Use the b/ path (destination) as the canonical path
138
+ const match = line.match(/diff --git a\/.+ b\/(.+)/)
139
+ if (match) {
140
+ currentFile = normalizePath(match[1])
141
+ } else {
142
+ currentFile = null
143
+ }
144
+ hunks = []
145
+ continue
146
+ }
147
+
148
+ // Skip binary files indicator
149
+ if (line.startsWith('Binary files ')) {
150
+ currentFile = null
151
+ hunks = []
152
+ continue
153
+ }
154
+
155
+ // Hunk header
156
+ if (line.startsWith('@@') && currentFile) {
157
+ try {
158
+ const hunk = parseHunkHeader(line)
159
+ if (hunk) {
160
+ hunks.push(hunk)
161
+ }
162
+ } catch {
163
+ // Skip malformed hunk headers
164
+ }
165
+ }
166
+ }
167
+
168
+ // Save last file
169
+ if (currentFile && hunks.length > 0) {
170
+ files.set(currentFile, { path: currentFile, hunks })
171
+ }
172
+
173
+ return files
174
+ }
175
+
176
+ /**
177
+ * Get file-level diff stats using --numstat (most reliable)
178
+ */
179
+ export function getFileStats(dir: string, logger: Logger = createConsoleLogger()): Map<string, FileDiffStats> {
180
+ const cmd = `git diff ${GIT_DIFF_OPTIONS} --numstat HEAD`
181
+ const output = safeExec(cmd, dir, logger)
182
+ return parseNumstat(output)
183
+ }
184
+
185
+ /**
186
+ * Get hunk-level diff for definition analysis
187
+ */
188
+ export function getHunkDiff(dir: string, logger: Logger = createConsoleLogger()): Map<string, FileDiff> {
189
+ const cmd = `git diff ${GIT_DIFF_OPTIONS} --unified=0 HEAD`
190
+ const output = safeExec(cmd, dir, logger)
191
+ return parseDiff(output)
192
+ }
193
+
194
+ /**
195
+ * Combined function to get all diff data needed
196
+ * Returns both file stats and hunk data, with error isolation.
197
+ * Filters out submodule paths to prevent misleading stats (submodule pointer
198
+ * changes show as 1/1 in numstat, and produce "Subproject commit" pseudo-diffs).
199
+ */
200
+ export function getAllDiffData(dir: string, submodulePaths?: Set<string>): {
201
+ fileStats: Map<string, FileDiffStats>
202
+ fileDiffs: Map<string, FileDiff>
203
+ }
204
+ export function getAllDiffData(dir: string, submodulePaths: Set<string> | undefined, logger: Logger): {
205
+ fileStats: Map<string, FileDiffStats>
206
+ fileDiffs: Map<string, FileDiff>
207
+ }
208
+ export function getAllDiffData(
209
+ dir: string,
210
+ submodulePaths?: Set<string>,
211
+ logger: Logger = createConsoleLogger()
212
+ ): {
213
+ fileStats: Map<string, FileDiffStats>
214
+ fileDiffs: Map<string, FileDiff>
215
+ } {
216
+ // Get file stats (for file-level +N-M display)
217
+ let fileStats: Map<string, FileDiffStats>
218
+ try {
219
+ fileStats = getFileStats(dir, logger)
220
+ } catch (err) {
221
+ const message = err instanceof Error ? err.message : String(err)
222
+ logger.warn(`Warning: failed to get file stats: ${message}`)
223
+ fileStats = new Map()
224
+ }
225
+
226
+ // Get hunk data (for definition-level analysis)
227
+ let fileDiffs: Map<string, FileDiff>
228
+ try {
229
+ fileDiffs = getHunkDiff(dir, logger)
230
+ } catch (err) {
231
+ const message = err instanceof Error ? err.message : String(err)
232
+ logger.warn(`Warning: failed to get hunk diff: ${message}`)
233
+ fileDiffs = new Map()
234
+ }
235
+
236
+ // Filter out submodule paths - their diff output is misleading:
237
+ // - numstat shows 1/1 for pointer changes (not real line counts)
238
+ // - unified diff shows "Subproject commit" pseudo-patches
239
+ if (submodulePaths && submodulePaths.size > 0) {
240
+ for (const subPath of submodulePaths) {
241
+ fileStats.delete(subPath)
242
+ fileDiffs.delete(subPath)
243
+ }
244
+ }
245
+
246
+ return { fileStats, fileDiffs }
247
+ }
248
+
249
+ /**
250
+ * Calculate diff stats for a single definition based on file hunks
251
+ *
252
+ * A definition is "added" if all its lines are new additions.
253
+ * Otherwise it's "updated" if any of its lines were changed.
254
+ */
255
+ export function calculateDefinitionDiff(
256
+ def: Definition,
257
+ hunks: DiffHunk[]
258
+ ): DefinitionDiff | null {
259
+ try {
260
+ const defStart = def.line
261
+ const defEnd = def.endLine
262
+ const defLineCount = defEnd - defStart + 1
263
+
264
+ let addedInDef = 0
265
+ let deletedInDef = 0
266
+
267
+ for (const hunk of hunks) {
268
+ // Check if this hunk's NEW lines overlap with definition range
269
+ const hunkNewStart = hunk.newStart
270
+ const hunkNewEnd = hunk.newStart + hunk.newCount - 1
271
+
272
+ // Calculate overlap between [defStart, defEnd] and [hunkNewStart, hunkNewEnd]
273
+ const overlapStart = Math.max(defStart, hunkNewStart)
274
+ const overlapEnd = Math.min(defEnd, hunkNewEnd)
275
+
276
+ if (overlapStart <= overlapEnd) {
277
+ // There's overlap - count the added lines in this overlap
278
+ const addedLines = overlapEnd - overlapStart + 1
279
+ addedInDef += addedLines
280
+ }
281
+
282
+ // For deleted lines, check if hunk's new position overlaps with definition
283
+ if (hunk.oldCount > 0) {
284
+ if (hunkNewStart <= defEnd && hunkNewEnd >= defStart) {
285
+ deletedInDef += hunk.oldCount
286
+ }
287
+ }
288
+ }
289
+
290
+ // No changes in this definition
291
+ if (addedInDef === 0 && deletedInDef === 0) {
292
+ return null
293
+ }
294
+
295
+ // Determine status
296
+ // "added" = the entire definition consists of new lines AND nothing was deleted
297
+ const status = addedInDef >= defLineCount && deletedInDef === 0 ? 'added' : 'updated'
298
+
299
+ return {
300
+ status,
301
+ added: addedInDef,
302
+ deleted: deletedInDef,
303
+ }
304
+ } catch {
305
+ // Any calculation error - return null (no diff info)
306
+ return null
307
+ }
308
+ }
309
+
310
+ /**
311
+ * Calculate total diff stats for a file by summing all hunks
312
+ * @deprecated Use getFileStats() with --numstat instead for reliability
313
+ */
314
+ export function calculateFileDiff(hunks: DiffHunk[]): FileDiffStats | null {
315
+ if (hunks.length === 0) {
316
+ return null
317
+ }
318
+
319
+ let added = 0
320
+ let deleted = 0
321
+
322
+ for (const hunk of hunks) {
323
+ added += hunk.newCount
324
+ deleted += hunk.oldCount
325
+ }
326
+
327
+ if (added === 0 && deleted === 0) {
328
+ return null
329
+ }
330
+
331
+ return { added, deleted }
332
+ }
333
+
334
+ /**
335
+ * Apply diff information to definitions for a file
336
+ */
337
+ export function applyDiffToDefinitions(
338
+ definitions: Definition[],
339
+ fileDiff: FileDiff | undefined
340
+ ): Definition[] {
341
+ if (!fileDiff || fileDiff.hunks.length === 0) {
342
+ return definitions
343
+ }
344
+
345
+ return definitions.map(def => {
346
+ try {
347
+ const diff = calculateDefinitionDiff(def, fileDiff.hunks)
348
+ if (diff) {
349
+ return { ...def, diff }
350
+ }
351
+ } catch {
352
+ // Skip diff for this definition on error
353
+ }
354
+ return def
355
+ })
356
+ }
357
+
358
+ // Legacy exports for backwards compatibility
359
+ export { getFileStats as getGitDiffAll }
@@ -0,0 +1,159 @@
1
+ // Tests for markdown description extraction.
2
+
3
+ import { describe, expect, test } from 'bun:test'
4
+ import { writeFile, unlink, mkdir } from 'fs/promises'
5
+ import { join } from 'path'
6
+ import { tmpdir } from 'os'
7
+ import { extractMarkdownDescription } from './markdown.js'
8
+
9
+ const TEST_DIR = join(tmpdir(), 'agentmap-markdown-test')
10
+
11
+ async function testMarkdown(content: string): Promise<string | null> {
12
+ await mkdir(TEST_DIR, { recursive: true })
13
+ const filepath = join(TEST_DIR, 'README.md')
14
+ await writeFile(filepath, content, 'utf8')
15
+ try {
16
+ return await extractMarkdownDescription(filepath)
17
+ } finally {
18
+ await unlink(filepath).catch(() => {})
19
+ }
20
+ }
21
+
22
+ describe('Markdown extraction', () => {
23
+ test('simple heading and paragraph', async () => {
24
+ const desc = await testMarkdown(`# My Project
25
+
26
+ This is a description of my project.
27
+ It does amazing things.
28
+ `)
29
+ expect(desc).toMatchInlineSnapshot(`
30
+ "My Project
31
+ This is a description of my project.
32
+ It does amazing things."
33
+ `)
34
+ })
35
+
36
+ test('ignores HTML comments', async () => {
37
+ const desc = await testMarkdown(`<!-- This is a comment -->
38
+ # Title
39
+
40
+ Some content here.
41
+ `)
42
+ expect(desc).toMatchInlineSnapshot(`
43
+ "Title
44
+ Some content here."
45
+ `)
46
+ })
47
+
48
+ test('ignores badge images', async () => {
49
+ const desc = await testMarkdown(`![Build Status](https://shields.io/badge/build-passing)
50
+ ![Coverage](https://img.shields.io/coverage/80)
51
+
52
+ # My Library
53
+
54
+ A useful library.
55
+ `)
56
+ expect(desc).toMatchInlineSnapshot(`
57
+ "My Library
58
+ A useful library."
59
+ `)
60
+ })
61
+
62
+ test('handles lists', async () => {
63
+ const desc = await testMarkdown(`# Features
64
+
65
+ - Feature one
66
+ - Feature two
67
+ - Feature three
68
+ `)
69
+ expect(desc).toMatchInlineSnapshot(`
70
+ "Features
71
+ - Feature one
72
+ - Feature two
73
+ - Feature three"
74
+ `)
75
+ })
76
+
77
+ test('handles code blocks', async () => {
78
+ const desc = await testMarkdown(`# Usage
79
+
80
+ Install the package:
81
+
82
+ \`\`\`bash
83
+ npm install mypackage
84
+ \`\`\`
85
+ `)
86
+ expect(desc).toMatchInlineSnapshot(`
87
+ "Usage
88
+ Install the package:
89
+ \`\`\`bash
90
+ npm install mypackage
91
+ \`\`\`"
92
+ `)
93
+ })
94
+
95
+ test('handles blockquotes', async () => {
96
+ const desc = await testMarkdown(`# Quote Example
97
+
98
+ > This is a blockquote
99
+ > with multiple lines
100
+ `)
101
+ expect(desc).toMatchInlineSnapshot(`
102
+ "Quote Example
103
+ > This is a blockquote
104
+ with multiple lines"
105
+ `)
106
+ })
107
+
108
+ test('truncates long content with indicator', async () => {
109
+ // Create 40 lines without blank lines between (so all fit in first 50 lines read)
110
+ const lines = Array.from({ length: 40 }, (_, i) => `- Item ${i + 1}`).join('\n')
111
+ const desc = await testMarkdown(`# Title\n\n${lines}`)
112
+ const descLines = desc?.split('\n') ?? []
113
+ // 20 content lines + 1 truncation indicator (Title + 19 items, then indicator)
114
+ expect(descLines.length).toBe(21)
115
+ expect(descLines[20]).toBe('... and 21 more lines')
116
+ })
117
+
118
+ test('returns null for empty markdown', async () => {
119
+ const desc = await testMarkdown(``)
120
+ expect(desc).toBeNull()
121
+ })
122
+
123
+ test('returns null for only HTML comments', async () => {
124
+ const desc = await testMarkdown(`<!-- Just a comment -->
125
+
126
+ <!-- Another comment -->
127
+ `)
128
+ expect(desc).toBeNull()
129
+ })
130
+
131
+ test('handles mixed content', async () => {
132
+ const desc = await testMarkdown(`<!-- Header comment -->
133
+ ![Badge](https://example.com/badge.svg)
134
+
135
+ # agentmap
136
+
137
+ A compact, YAML-based inventory of your codebase.
138
+
139
+ ## Features
140
+
141
+ - Fast scanning
142
+ - Tree-sitter parsing
143
+
144
+ \`\`\`bash
145
+ npm install agentmap
146
+ \`\`\`
147
+ `)
148
+ expect(desc).toMatchInlineSnapshot(`
149
+ "agentmap
150
+ A compact, YAML-based inventory of your codebase.
151
+ Features
152
+ - Fast scanning
153
+ - Tree-sitter parsing
154
+ \`\`\`bash
155
+ npm install agentmap
156
+ \`\`\`"
157
+ `)
158
+ })
159
+ })
@@ -0,0 +1,202 @@
1
+ // Extract description from markdown files using marked AST.
2
+
3
+ import { Lexer, type Token, type Tokens } from 'marked'
4
+ import { readFirstLines } from './utils.js'
5
+
6
+ const MAX_LINES = 50
7
+ const MAX_DESC_LINES = 20
8
+
9
+ /**
10
+ * Truncate lines to MAX_DESC_LINES, adding indicator if truncated
11
+ */
12
+ function truncateDescription(lines: string[]): string {
13
+ const trimmed = lines.join('\n').trim()
14
+ const trimmedLines = trimmed.split('\n')
15
+
16
+ if (trimmedLines.length <= MAX_DESC_LINES) {
17
+ return trimmed
18
+ }
19
+
20
+ const truncated = trimmedLines.slice(0, MAX_DESC_LINES)
21
+ const remaining = trimmedLines.length - MAX_DESC_LINES
22
+ truncated.push(`... and ${remaining} more lines`)
23
+ return truncated.join('\n')
24
+ }
25
+
26
+ /**
27
+ * Extract plain text from inline tokens, skipping images.
28
+ */
29
+ function extractInlineText(tokens: Token[] | undefined): string {
30
+ if (!tokens) return ''
31
+
32
+ const parts: string[] = []
33
+ for (const token of tokens) {
34
+ // Skip images
35
+ if (token.type === 'image') {
36
+ continue
37
+ }
38
+
39
+ // Handle text
40
+ if (token.type === 'text') {
41
+ const text = token as Tokens.Text
42
+ if (text.text) {
43
+ parts.push(text.text)
44
+ }
45
+ continue
46
+ }
47
+
48
+ // Handle links - extract the text content
49
+ if (token.type === 'link') {
50
+ const link = token as Tokens.Link
51
+ if (link.text) {
52
+ parts.push(link.text)
53
+ }
54
+ continue
55
+ }
56
+
57
+ // Handle strong/em - extract nested text
58
+ if (token.type === 'strong' || token.type === 'em') {
59
+ const styled = token as Tokens.Strong | Tokens.Em
60
+ const inner = extractInlineText(styled.tokens)
61
+ if (inner) {
62
+ parts.push(inner)
63
+ }
64
+ continue
65
+ }
66
+
67
+ // Handle codespan (inline code)
68
+ if (token.type === 'codespan') {
69
+ const code = token as Tokens.Codespan
70
+ if (code.text) {
71
+ parts.push('`' + code.text + '`')
72
+ }
73
+ continue
74
+ }
75
+ }
76
+
77
+ return parts.join('')
78
+ }
79
+
80
+ /**
81
+ * Extract text content from markdown tokens recursively.
82
+ * Skips HTML, comments, and images. Returns plain text lines.
83
+ */
84
+ function extractTextFromTokens(tokens: Token[]): string[] {
85
+ const lines: string[] = []
86
+
87
+ for (const token of tokens) {
88
+ // Skip HTML (includes comments)
89
+ if (token.type === 'html') {
90
+ continue
91
+ }
92
+
93
+ // Skip spaces
94
+ if (token.type === 'space') {
95
+ continue
96
+ }
97
+
98
+ // Handle headings - extract inline text
99
+ if (token.type === 'heading') {
100
+ const heading = token as Tokens.Heading
101
+ const text = extractInlineText(heading.tokens)
102
+ if (text) {
103
+ lines.push(text)
104
+ }
105
+ continue
106
+ }
107
+
108
+ // Handle paragraphs - extract inline text (skips images)
109
+ if (token.type === 'paragraph') {
110
+ const para = token as Tokens.Paragraph
111
+ const text = extractInlineText(para.tokens)
112
+ if (text) {
113
+ lines.push(text)
114
+ }
115
+ continue
116
+ }
117
+
118
+ // Handle lists - extract text from items
119
+ if (token.type === 'list') {
120
+ const list = token as Tokens.List
121
+ for (const item of list.items) {
122
+ const text = extractInlineText(item.tokens)
123
+ if (text) {
124
+ lines.push('- ' + text.split('\n')[0])
125
+ }
126
+ }
127
+ continue
128
+ }
129
+
130
+ // Handle blockquotes - extract nested tokens
131
+ if (token.type === 'blockquote') {
132
+ const quote = token as Tokens.Blockquote
133
+ if (quote.tokens) {
134
+ const nestedLines = extractTextFromTokens(quote.tokens)
135
+ lines.push(...nestedLines.map(l => '> ' + l))
136
+ }
137
+ continue
138
+ }
139
+
140
+ // Handle code blocks - include with fence
141
+ if (token.type === 'code') {
142
+ const code = token as Tokens.Code
143
+ if (code.lang) {
144
+ lines.push('```' + code.lang)
145
+ } else {
146
+ lines.push('```')
147
+ }
148
+ lines.push(...code.text.split('\n'))
149
+ lines.push('```')
150
+ continue
151
+ }
152
+
153
+ // Handle text tokens (inline)
154
+ if (token.type === 'text') {
155
+ const text = token as Tokens.Text
156
+ if (text.text) {
157
+ lines.push(text.text)
158
+ }
159
+ continue
160
+ }
161
+ }
162
+
163
+ return lines
164
+ }
165
+
166
+ /**
167
+ * Extract description from a markdown file using marked lexer.
168
+ * Parses first N lines, extracts plain text from AST nodes,
169
+ * ignoring HTML comments and images.
170
+ * Falls back to raw content if parsing fails.
171
+ */
172
+ export async function extractMarkdownDescription(filepath: string): Promise<string | null> {
173
+ const head = await readFirstLines(filepath, MAX_LINES)
174
+ if (head === null) {
175
+ // File couldn't be read - skip silently
176
+ return null
177
+ }
178
+
179
+ try {
180
+ // Parse markdown to tokens using marked lexer
181
+ const lexer = new Lexer()
182
+ const tokens = lexer.lex(head)
183
+
184
+ // Extract text from tokens
185
+ const lines = extractTextFromTokens(tokens)
186
+
187
+ // Filter empty lines
188
+ const contentLines = lines.filter(l => l.trim() !== '')
189
+ if (contentLines.length === 0) {
190
+ return null
191
+ }
192
+
193
+ return truncateDescription(contentLines)
194
+ } catch {
195
+ // Fallback: return raw content if parsing fails
196
+ const lines = head.split('\n').filter(l => l.trim() !== '')
197
+ if (lines.length === 0) {
198
+ return null
199
+ }
200
+ return truncateDescription(lines)
201
+ }
202
+ }