@mmnto/cli 1.9.0 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/dist/commands/add-lesson.d.ts.map +1 -1
  2. package/dist/commands/add-lesson.js +5 -1
  3. package/dist/commands/add-lesson.js.map +1 -1
  4. package/dist/commands/compile.d.ts.map +1 -1
  5. package/dist/commands/compile.js +4 -1
  6. package/dist/commands/compile.js.map +1 -1
  7. package/dist/commands/extract-local.d.ts +6 -0
  8. package/dist/commands/extract-local.d.ts.map +1 -0
  9. package/dist/commands/extract-local.js +134 -0
  10. package/dist/commands/extract-local.js.map +1 -0
  11. package/dist/commands/extract-pr.d.ts +19 -0
  12. package/dist/commands/extract-pr.d.ts.map +1 -0
  13. package/dist/commands/extract-pr.js +193 -0
  14. package/dist/commands/extract-pr.js.map +1 -0
  15. package/dist/commands/extract-scan.d.ts +6 -0
  16. package/dist/commands/extract-scan.d.ts.map +1 -0
  17. package/dist/commands/extract-scan.js +100 -0
  18. package/dist/commands/extract-scan.js.map +1 -0
  19. package/dist/commands/extract-shared.d.ts +29 -0
  20. package/dist/commands/extract-shared.d.ts.map +1 -0
  21. package/dist/commands/extract-shared.js +351 -0
  22. package/dist/commands/extract-shared.js.map +1 -0
  23. package/dist/commands/extract-templates.d.ts +1 -0
  24. package/dist/commands/extract-templates.d.ts.map +1 -1
  25. package/dist/commands/extract-templates.js +33 -0
  26. package/dist/commands/extract-templates.js.map +1 -1
  27. package/dist/commands/extract.d.ts +7 -36
  28. package/dist/commands/extract.d.ts.map +1 -1
  29. package/dist/commands/extract.js +44 -588
  30. package/dist/commands/extract.js.map +1 -1
  31. package/dist/commands/extract.test.js +103 -2
  32. package/dist/commands/extract.test.js.map +1 -1
  33. package/dist/commands/init.d.ts +5 -0
  34. package/dist/commands/init.d.ts.map +1 -1
  35. package/dist/commands/init.js +85 -2
  36. package/dist/commands/init.js.map +1 -1
  37. package/dist/commands/init.test.js +54 -1
  38. package/dist/commands/init.test.js.map +1 -1
  39. package/dist/commands/install-hooks.d.ts +17 -7
  40. package/dist/commands/install-hooks.d.ts.map +1 -1
  41. package/dist/commands/install-hooks.js +123 -34
  42. package/dist/commands/install-hooks.js.map +1 -1
  43. package/dist/commands/install-hooks.test.js +136 -24
  44. package/dist/commands/install-hooks.test.js.map +1 -1
  45. package/dist/commands/review-alias.test.js +9 -15
  46. package/dist/commands/review-alias.test.js.map +1 -1
  47. package/dist/commands/shield.d.ts.map +1 -1
  48. package/dist/commands/shield.js +12 -1
  49. package/dist/commands/shield.js.map +1 -1
  50. package/dist/commands/shield.test.js +37 -0
  51. package/dist/commands/shield.test.js.map +1 -1
  52. package/dist/commands/spec.d.ts +6 -0
  53. package/dist/commands/spec.d.ts.map +1 -1
  54. package/dist/commands/spec.js +11 -1
  55. package/dist/commands/spec.js.map +1 -1
  56. package/dist/commands/spec.test.js +25 -1
  57. package/dist/commands/spec.test.js.map +1 -1
  58. package/dist/exemptions/__tests__/exemption-engine.test.js +9 -0
  59. package/dist/exemptions/__tests__/exemption-engine.test.js.map +1 -1
  60. package/dist/exemptions/exemption-engine.js +1 -1
  61. package/dist/exemptions/exemption-engine.js.map +1 -1
  62. package/dist/exemptions/exemption-schema.d.ts +6 -6
  63. package/dist/hooks/auto-context.d.ts.map +1 -1
  64. package/dist/hooks/auto-context.js +2 -19
  65. package/dist/hooks/auto-context.js.map +1 -1
  66. package/dist/index.js +33 -9
  67. package/dist/index.js.map +1 -1
  68. package/dist/schemas/handoff-checkpoint.d.ts +2 -2
  69. package/dist/utils/pilot.d.ts +44 -0
  70. package/dist/utils/pilot.d.ts.map +1 -0
  71. package/dist/utils/pilot.js +141 -0
  72. package/dist/utils/pilot.js.map +1 -0
  73. package/dist/utils/pilot.test.d.ts +2 -0
  74. package/dist/utils/pilot.test.d.ts.map +1 -0
  75. package/dist/utils/pilot.test.js +166 -0
  76. package/dist/utils/pilot.test.js.map +1 -0
  77. package/dist/utils.d.ts +8 -1
  78. package/dist/utils.d.ts.map +1 -1
  79. package/dist/utils.js +27 -15
  80. package/dist/utils.js.map +1 -1
  81. package/dist/utils.test.js +88 -2
  82. package/dist/utils.test.js.map +1 -1
  83. package/package.json +5 -3
@@ -1,355 +1,37 @@
1
- import { createEmbedder, deduplicateLessons, flagSuspiciousLessons, generateLessonHeading, LanceStore, loadCustomSecrets, runSync, TotemConfigError, truncateHeading, writeLessonFile, } from '@mmnto/totem';
1
+ import { TotemConfigError } from '@mmnto/totem';
2
2
  import { log } from '../ui.js';
3
- import { formatResults, getSystemPrompt, GH_TIMEOUT_MS, loadConfig, loadEnv, requireEmbedding, resolveConfigPath, runOrchestrator, sanitize, wrapUntrustedXml, } from '../utils.js';
4
- import { MAX_EXISTING_LESSONS, MAX_INPUTS, MAX_REVIEW_BODY_CHARS, SCAN_EXTRACT_SYSTEM_PROMPT, SYSTEM_PROMPT, } from './extract-templates.js';
5
- // ─── Constants (re-exported from extract-templates) ─────
6
- export { EXTRACT_SYSTEM_PROMPT, MAX_EXISTING_LESSONS, MAX_INPUTS, MAX_REVIEW_BODY_CHARS, SCAN_EXTRACT_SYSTEM_PROMPT, SEMANTIC_DEDUP_THRESHOLD, SYSTEM_PROMPT, } from './extract-templates.js';
3
+ import { isGlobalConfigPath, loadConfig, loadEnv, resolveConfigPath } from '../utils.js';
4
+ import { sharedPipeline } from './extract-shared.js';
5
+ import { MAX_INPUTS } from './extract-templates.js';
6
+ // ─── Re-exports: extract-templates ─────────────────────
7
+ export { EXTRACT_SYSTEM_PROMPT, LOCAL_EXTRACT_SYSTEM_PROMPT, MAX_EXISTING_LESSONS, MAX_INPUTS, MAX_REVIEW_BODY_CHARS, SCAN_EXTRACT_SYSTEM_PROMPT, SEMANTIC_DEDUP_THRESHOLD, SYSTEM_PROMPT, } from './extract-templates.js';
7
8
  export { cosineSimilarity, deduplicateLessons, flagSuspiciousLessons, isInstructionalContext, } from '@mmnto/totem';
8
- const TAG = 'Extract';
9
- function groupIntoThreads(comments) {
10
- const byId = new Map();
11
- for (const c of comments)
12
- byId.set(c.id, c);
13
- const threadMap = new Map();
14
- for (const c of comments) {
15
- const rootId = c.inReplyToId ?? c.id;
16
- const thread = threadMap.get(rootId) ?? [];
17
- thread.push(c);
18
- threadMap.set(rootId, thread);
19
- }
20
- const threads = [];
21
- for (const [rootId, threadComments] of threadMap) {
22
- threadComments.sort((a, b) => {
23
- if (!a.createdAt || !b.createdAt)
24
- return 0;
25
- return a.createdAt.localeCompare(b.createdAt);
26
- });
27
- const root = byId.get(rootId) ?? threadComments[0];
28
- threads.push({
29
- path: root.path,
30
- diffHunk: root.diffHunk,
31
- comments: threadComments.map((c) => ({ id: c.id, author: c.author, body: c.body })),
32
- });
33
- }
34
- return threads;
35
- }
36
- // ─── LanceDB retrieval ─────────────────────────────────
37
- async function retrieveExistingLessons(store) {
38
- return store.search({
39
- query: 'lesson trap pattern decision',
40
- typeFilter: 'spec',
41
- maxResults: MAX_EXISTING_LESSONS,
42
- });
43
- }
44
- // ─── Prompt assembly ────────────────────────────────────
45
- const DEFAULT_BOT_MARKERS = ['Using Gemini Code Assist', 'Gemini Code Assist'];
46
- function isGcaBoilerplate(body, botMarkers) {
47
- return botMarkers.some((marker) => body.includes(marker));
48
- }
49
- export function assemblePrompt(pr, threads, existingLessons, systemPrompt, nits, botMarkers = DEFAULT_BOT_MARKERS, scopeGlobs) {
50
- const sections = [systemPrompt];
51
- // Scope context from PR diff analysis (#1014) — globs derived from PR filenames (untrusted)
52
- if (scopeGlobs && scopeGlobs.length > 0) {
53
- sections.push('\n=== SCOPE CONTEXT (from PR diff analysis) ===');
54
- sections.push(wrapUntrustedXml('scope_context', `Suggested file scope: ${scopeGlobs.join(', ')}`));
55
- sections.push('Use this scope as the default unless a lesson truly applies globally.');
56
- sections.push('Include a "scope" field in each lesson JSON with the appropriate glob pattern.');
57
- }
58
- // PR metadata — sanitize untrusted fields (title, state come from PR author)
59
- sections.push('=== PR METADATA ===');
60
- sections.push(`PR #${pr.number}: ${sanitize(pr.title)}`);
61
- sections.push(`State: ${sanitize(pr.state)}`);
62
- if (pr.body) {
63
- sections.push('');
64
- sections.push(wrapUntrustedXml('pr_body', pr.body));
65
- }
66
- // Review summaries (non-empty review bodies)
67
- const reviewBodies = pr.reviews.filter((r) => r.body.trim());
68
- if (reviewBodies.length > 0) {
69
- sections.push('\n=== REVIEW SUMMARIES ===');
70
- for (const r of reviewBodies) {
71
- sections.push(`[${sanitize(r.author)} — ${sanitize(r.state)}]`);
72
- sections.push(wrapUntrustedXml('review_body', r.body));
73
- sections.push('');
74
- }
75
- }
76
- // CodeRabbit nits (pre-parsed and passed in)
77
- if (nits && nits.length > 0) {
78
- sections.push('\n=== CODERABBIT NITS (extract valuable architectural insights) ===');
79
- for (const nit of nits) {
80
- sections.push(wrapUntrustedXml('nit_body', nit));
81
- }
82
- }
83
- // Regular PR comments (filter GCA boilerplate)
84
- const prComments = pr.comments.filter((c) => !isGcaBoilerplate(c.body, botMarkers));
85
- if (prComments.length > 0) {
86
- sections.push('\n=== PR COMMENTS ===');
87
- for (const c of prComments) {
88
- sections.push(`[${sanitize(c.author)}]`);
89
- sections.push(wrapUntrustedXml('comment_body', c.body));
90
- sections.push('');
91
- }
92
- }
93
- // Inline review comment threads
94
- if (threads.length > 0) {
95
- sections.push('\n=== INLINE REVIEW THREADS ===');
96
- for (const thread of threads) {
97
- sections.push(`--- ${sanitize(thread.path)} ---`); // totem-ignore — thread.path is untrusted PR data, not local git
98
- sections.push(wrapUntrustedXml('diff_hunk', thread.diffHunk));
99
- for (const c of thread.comments) {
100
- sections.push(`[${sanitize(c.author)}]:\n${wrapUntrustedXml('comment_body', c.body)}`);
101
- }
102
- sections.push('');
103
- }
104
- }
105
- // Existing lessons for dedup context
106
- const lessonSection = formatResults(existingLessons, 'EXISTING LESSONS (do NOT duplicate)');
107
- if (lessonSection) {
108
- sections.push('\n=== DEDUP CONTEXT ===');
109
- sections.push(lessonSection);
110
- }
111
- // Truncate if needed
112
- let prompt = sections.join('\n');
113
- if (prompt.length > MAX_REVIEW_BODY_CHARS) {
114
- prompt = prompt.slice(0, MAX_REVIEW_BODY_CHARS) + '\n\n... [content truncated] ...';
115
- }
116
- return prompt;
117
- }
118
- // ─── Scan prompt assembly ──────────────────────────────
119
- export function assembleFromScanPrompt(alerts, diff, existingLessons, systemPrompt) {
120
- const sections = [systemPrompt];
121
- sections.push('\n=== FIXED CODE SCANNING ALERTS ===');
122
- for (const alert of alerts) {
123
- sections.push(`\n--- Alert #${alert.number} ---`);
124
- sections.push(wrapUntrustedXml('alert_rule', sanitize(alert.rule_id)));
125
- sections.push(wrapUntrustedXml('alert_message', alert.most_recent_instance.message.text));
126
- sections.push(wrapUntrustedXml('alert_location', `${alert.most_recent_instance.location.path}:${alert.most_recent_instance.location.start_line}`));
127
- }
128
- sections.push('\n=== FIX DIFF ===');
129
- const truncatedDiff = diff.length > MAX_REVIEW_BODY_CHARS
130
- ? diff.slice(0, MAX_REVIEW_BODY_CHARS) + '\n... [diff truncated] ...'
131
- : diff;
132
- sections.push(wrapUntrustedXml('fix_diff', truncatedDiff));
133
- // Existing lessons for dedup context
134
- const lessonSection = formatResults(existingLessons, 'EXISTING LESSONS (do NOT duplicate)');
135
- if (lessonSection) {
136
- sections.push('\n=== DEDUP CONTEXT ===');
137
- sections.push(lessonSection);
138
- }
139
- return sections.join('\n');
140
- }
141
- // ─── Lesson parser ──────────────────────────────────────
142
- const LESSON_RE = /---LESSON---\s*\n(?:Heading:\s*(.+)\n)?Tags:\s*(.+)\n([\s\S]+?)---END---/g;
143
- /** Strip markdown heading markers and "Lesson —" prefixes, then enforce max length. */
144
- function sanitizeHeading(heading) {
145
- const cleaned = heading
146
- .replace(/^#+\s*/, '')
147
- .replace(/^Lesson\s*[-—:]\s*/i, '')
148
- .trim();
149
- return truncateHeading(cleaned);
150
- }
151
- /** Max allowed length for a single lesson's text to prevent corrupted/hallucinated output. */
152
- const MAX_LESSON_TEXT_LENGTH = 2000;
153
- /** Max allowed tags per lesson. */
154
- const MAX_TAGS_PER_LESSON = 10;
155
- /** Max allowed length for a single tag. */
156
- const MAX_TAG_LENGTH = 50;
157
- /** Extract a JSON array from LLM output, handling code fences and conversational wrapping. */
158
- function extractJsonArray(input) {
159
- const trimmed = input.trim();
160
- // Try markdown code fences (backtick or tilde)
161
- const fenced = trimmed.match(/(?:```|~~~)(?:json)?\s*\n?([\s\S]*?)(?:```|~~~)/i);
162
- if (fenced)
163
- return fenced[1].trim();
164
- // Look for `[` followed by optional whitespace then `{` — handles both compact and pretty-printed
165
- const arrayStart = trimmed.search(/\[\s*\{/);
166
- if (arrayStart !== -1) {
167
- // Find matching ] respecting JSON string literals (brackets inside strings don't count)
168
- let depth = 0;
169
- let inString = false;
170
- let escaped = false;
171
- for (let i = arrayStart; i < trimmed.length; i++) {
172
- const ch = trimmed[i];
173
- if (escaped) {
174
- escaped = false;
175
- continue;
176
- }
177
- if (ch === '\\' && inString) {
178
- escaped = true;
179
- continue;
180
- }
181
- if (ch === '"') {
182
- inString = !inString;
183
- continue;
184
- }
185
- if (inString)
186
- continue;
187
- if (ch === '[')
188
- depth++;
189
- else if (ch === ']') {
190
- depth--;
191
- if (depth === 0)
192
- return trimmed.slice(arrayStart, i + 1);
193
- }
194
- }
195
- }
196
- return null;
197
- }
198
- /** Validate a single parsed lesson object. Returns null if invalid. */
199
- function validateLesson(obj) {
200
- if (typeof obj !== 'object' || obj === null || Array.isArray(obj))
201
- return null;
202
- const rec = obj;
203
- // Normalize text
204
- const text = typeof rec.text === 'string' ? rec.text.trim() : null;
205
- if (!text || text.length > MAX_LESSON_TEXT_LENGTH)
206
- return null;
207
- // Normalize tags — trim and filter empty
208
- const tags = Array.isArray(rec.tags)
209
- ? rec.tags
210
- .filter((t) => typeof t === 'string')
211
- .map((t) => t.trim())
212
- .filter(Boolean)
213
- : null;
214
- if (!tags || tags.length === 0 || tags.length > MAX_TAGS_PER_LESSON)
215
- return null;
216
- if (tags.some((t) => t.length > MAX_TAG_LENGTH))
217
- return null;
218
- // Validate optional heading
219
- const heading = typeof rec.heading === 'string' ? sanitizeHeading(rec.heading) : undefined;
220
- // Validate optional scope (#1014) — reject newlines to prevent body injection
221
- const rawScope = typeof rec.scope === 'string' ? rec.scope.trim() : undefined;
222
- const scope = rawScope && !/[\n\r]/.test(rawScope) ? rawScope : undefined;
223
- return { ...(heading && { heading }), tags, text, ...(scope && { scope }) };
224
- }
225
- /** Try to parse JSON lessons with manual validation. Returns null on failure. */
226
- function tryParseJson(llmOutput) {
227
- try {
228
- const jsonStr = extractJsonArray(llmOutput);
229
- if (!jsonStr)
230
- return null;
231
- const parsed = JSON.parse(jsonStr);
232
- if (!Array.isArray(parsed))
233
- return null;
234
- const lessons = [];
235
- for (const item of parsed) {
236
- const validated = validateLesson(item);
237
- if (validated)
238
- lessons.push(validated);
9
+ export { appendLessons, assembleExtractPrompt, parseLessons, selectLessons, } from './extract-shared.js';
10
+ // ─── Re-exports: extract-pr ────────────────────────────
11
+ export { assemblePrompt } from './extract-pr.js';
12
+ // ─── Re-exports: extract-scan ──────────────────────────
13
+ export { assembleFromScanPrompt } from './extract-scan.js';
14
+ // ─── Re-exports: extract-local ─────────────────────────
15
+ export { assembleLocalPrompt } from './extract-local.js';
16
+ // ─── Main command ──────────────────────────────────────
17
+ export async function extractCommand(prNumbers, options) {
18
+ // ─── Local extraction mode (--local) ─────────────────
19
+ if (options.local) {
20
+ if (options.fromScan) {
21
+ throw new TotemConfigError('Cannot combine --local with --from-scan.', 'Use --local for local diffs or --from-scan with PR numbers for code scanning alerts.', 'CONFIG_INVALID');
239
22
  }
240
- // JSON was detected and parsed — return results even if empty.
241
- // Returning [] (not null) prevents regex fallback from accepting
242
- // injected ---LESSON--- content after JSON was already found.
243
- return lessons;
244
- }
245
- catch {
246
- return null;
247
- }
248
- }
249
- /** Fallback: parse lessons using the legacy ---LESSON---...---END--- regex format. */
250
- function parseWithRegex(llmOutput) {
251
- const lessons = [];
252
- let match;
253
- while ((match = LESSON_RE.exec(llmOutput)) !== null) {
254
- const rawHeading = match[1]; // undefined if Heading: line was absent
255
- const tags = match[2]
256
- .split(',')
257
- .map((t) => t.trim())
258
- .filter(Boolean);
259
- const text = match[3].trim();
260
- // Validate: reject malformed or hallucinated lessons before they reach disk
261
- if (!text)
262
- continue;
263
- if (text.length > MAX_LESSON_TEXT_LENGTH)
264
- continue;
265
- if (tags.length === 0 || tags.length > MAX_TAGS_PER_LESSON)
266
- continue;
267
- if (tags.some((t) => t.length > MAX_TAG_LENGTH))
268
- continue;
269
- const heading = rawHeading ? sanitizeHeading(rawHeading) : undefined;
270
- lessons.push({ ...(heading && { heading }), tags, text });
271
- }
272
- return lessons;
273
- }
274
- export function parseLessons(llmOutput) {
275
- if (llmOutput.trim() === 'NONE')
276
- return [];
277
- // Primary path: JSON + manual validation
278
- const jsonLessons = tryParseJson(llmOutput);
279
- if (jsonLessons !== null)
280
- return jsonLessons;
281
- // Fallback: regex parsing for models that don't produce clean JSON
282
- return parseWithRegex(llmOutput);
283
- }
284
- // ─── Lesson writer ──────────────────────────────────────
285
- export function appendLessons(lessons, lessonsDir) {
286
- for (const l of lessons) {
287
- const heading = l.heading || generateLessonHeading(l.text);
288
- const tags = l.tags.join(', ');
289
- const scopeLine = l.scope ? `\n**Scope:** ${l.scope}` : '';
290
- const entry = `## Lesson — ${heading}\n\n**Tags:** ${tags}${scopeLine}\n\n${l.text}\n`;
291
- writeLessonFile(lessonsDir, entry);
292
- }
293
- }
294
- // ─── Lesson selection ───────────────────────────────────
295
- const LABEL_MAX_CHARS = 70;
296
- function truncateLabel(text) {
297
- const oneLine = text.replace(/\n/g, ' ');
298
- if (oneLine.length <= LABEL_MAX_CHARS)
299
- return oneLine;
300
- return oneLine.slice(0, LABEL_MAX_CHARS - 1) + '…';
301
- }
302
- /**
303
- * Prompts the user to select which lessons to keep via multi-select.
304
- * In --yes mode, suspicious lessons are blocked (dropped with warnings).
305
- * Returns the selected lessons.
306
- * Throws in non-interactive environments without --yes.
307
- */
308
- export async function selectLessons(lessons, opts) {
309
- if (opts.yes) {
310
- // --yes mode: block suspicious lessons (#291)
311
- const clean = lessons.filter((l) => !l.suspiciousFlags?.length);
312
- const dropped = lessons.filter((l) => l.suspiciousFlags?.length);
313
- if (dropped.length > 0) {
314
- for (const l of dropped) {
315
- log.warn(TAG, `Blocked suspicious lesson: ${truncateLabel(sanitize(l.text))}`);
316
- for (const flag of l.suspiciousFlags) {
317
- log.warn(TAG, ` - ${flag}`);
318
- }
319
- }
23
+ if (prNumbers.length > 0) {
24
+ throw new TotemConfigError('Cannot combine --local with PR numbers.', 'Use either --local for local diffs or PR numbers for remote extraction.', 'CONFIG_INVALID');
320
25
  }
321
- return clean;
322
- }
323
- if (!opts.isTTY) {
324
- throw new TotemConfigError('Refusing to write lessons in non-interactive mode.', 'Use --yes to bypass confirmation, or run in an interactive terminal.', 'CONFIG_INVALID');
325
- }
326
- const { isCancel, multiselect } = await import('@clack/prompts');
327
- const result = await multiselect({
328
- message: `Select lessons to persist (${lessons.length} extracted):`,
329
- options: lessons.map((lesson, i) => ({
330
- value: i,
331
- label: lesson.suspiciousFlags?.length
332
- ? `[!] ${truncateLabel(sanitize(lesson.text))}`
333
- : truncateLabel(sanitize(lesson.text)),
334
- hint: lesson.suspiciousFlags?.length
335
- ? `${sanitize(lesson.tags.join(', '))} -- ${lesson.suspiciousFlags.join('; ')}`
336
- : sanitize(lesson.tags.join(', ')),
337
- })),
338
- // Pre-select only non-suspicious lessons
339
- initialValues: lessons
340
- .map((l, i) => (l.suspiciousFlags?.length ? null : i))
341
- .filter((i) => i !== null),
342
- required: false,
343
- });
344
- if (isCancel(result)) {
345
- return [];
26
+ const cwd = process.cwd();
27
+ const { extractFromLocal } = await import('./extract-local.js');
28
+ const lessons = await extractFromLocal(options, cwd);
29
+ if (lessons.length === 0)
30
+ return;
31
+ await sharedPipeline(lessons, options, cwd, 'local changes');
32
+ return;
346
33
  }
347
- return result.map((i) => lessons[i]);
348
- }
349
- export async function extractCommand(prNumbers, options) {
350
- const path = await import('node:path');
351
- const { GitHubCliPrAdapter } = await import('../adapters/github-cli-pr.js');
352
- // Validate and deduplicate PR numbers
34
+ // ─── PR number validation ────────────────────────────
353
35
  const unique = [...new Set(prNumbers)];
354
36
  if (unique.length > MAX_INPUTS) {
355
37
  throw new TotemConfigError(`Too many PR numbers (${unique.length}). Maximum is ${MAX_INPUTS}.`, `Pass at most ${MAX_INPUTS} PR numbers at a time.`, 'CONFIG_INVALID');
@@ -364,256 +46,30 @@ export async function extractCommand(prNumbers, options) {
364
46
  }
365
47
  const cwd = process.cwd();
366
48
  const configPath = resolveConfigPath(cwd);
49
+ if (isGlobalConfigPath(configPath)) {
50
+ throw new TotemConfigError('Cannot extract lessons without a local project.', "Run 'totem init' to create a local .totem/ directory first.", 'CONFIG_MISSING');
51
+ }
367
52
  loadEnv(cwd);
368
53
  const config = await loadConfig(configPath);
369
- // Load user-defined custom secrets for DLP (#921)
370
- const customSecrets = loadCustomSecrets(cwd, config.totemDir, (msg) => log.warn(TAG, msg));
371
- // Use project-configured bot markers if provided, otherwise keep defaults
372
- const botMarkers = config.botMarkers ?? DEFAULT_BOT_MARKERS;
373
- // Connect to LanceDB for dedup context
374
- const embedding = requireEmbedding(config);
375
- const embedder = createEmbedder(embedding);
376
- const store = new LanceStore(path.join(cwd, config.lanceDir), embedder);
377
- await store.connect();
378
- log.info(TAG, 'Querying existing lessons for dedup...');
379
- const existingLessons = await retrieveExistingLessons(store);
380
- log.info(TAG, `Found ${existingLessons.length} existing lessons for context`);
381
- // Resolve system prompt (allow .totem/prompts/extract.md override)
382
- const systemPrompt = getSystemPrompt('extract', SYSTEM_PROMPT, cwd, config.totemDir);
383
- // Process each PR sequentially, accumulating lessons
384
- const allLessons = [];
385
- const adapter = new GitHubCliPrAdapter(cwd);
386
- for (const num of nums) {
387
- // ─── Scan-based extraction (--from-scan) ─────────────
388
- if (options.fromScan) {
389
- if (!adapter.fetchCodeScanningAlerts) {
390
- throw new TotemConfigError('The current PR adapter does not support code scanning alerts.', 'Use the GitHub CLI adapter (default) to enable --from-scan.', 'CONFIG_INVALID');
391
- }
392
- // Fetch code scanning alerts for this PR
393
- const { safeExec: exec } = await import('@mmnto/totem');
394
- log.info(TAG, `Fetching code scanning alerts for PR #${num}...`);
395
- const allAlerts = adapter.fetchCodeScanningAlerts(num);
396
- const fixedAlerts = allAlerts.filter((a) => a.state === 'fixed');
397
- log.info(TAG, `Found ${allAlerts.length} alert(s), ${fixedAlerts.length} fixed`);
398
- if (fixedAlerts.length === 0) {
399
- log.dim(TAG, `No fixed code scanning alerts for PR #${num}. Skipping.`);
400
- continue;
401
- }
402
- // Fetch the PR diff filtered to affected files only (avoids truncation in large PRs)
403
- const affectedFiles = [
404
- ...new Set(fixedAlerts.map((a) => a.most_recent_instance.location.path)),
405
- ];
406
- log.info(TAG, `Fetching PR diff for ${affectedFiles.length} affected file(s)...`);
407
- const diffArgs = ['pr', 'diff', String(num), '--', ...affectedFiles];
408
- const diff = exec('gh', diffArgs, {
409
- cwd,
410
- timeout: GH_TIMEOUT_MS,
411
- maxBuffer: 10 * 1024 * 1024,
412
- env: { ...process.env, GH_PROMPT_DISABLED: '1' },
413
- });
414
- // Resolve system prompt (allow .totem/prompts/extract-scan.md override)
415
- const scanSystemPrompt = getSystemPrompt('extract-scan', SCAN_EXTRACT_SYSTEM_PROMPT, cwd, config.totemDir);
416
- // Assemble scan-specific prompt
417
- const prompt = assembleFromScanPrompt(fixedAlerts, diff, existingLessons, scanSystemPrompt);
418
- log.dim(TAG, `Prompt: ${(prompt.length / 1024).toFixed(0)}KB`);
419
- // Run orchestrator
420
- const content = await runOrchestrator({
421
- prompt,
422
- tag: TAG,
423
- options,
424
- config,
425
- cwd,
426
- temperature: 0.4,
427
- customSecrets,
428
- });
429
- if (content == null)
430
- continue; // --raw mode
431
- // Parse lessons from LLM output
432
- const lessons = parseLessons(content);
433
- if (lessons.length === 0) {
434
- log.dim(TAG, `No lessons extracted from scan alerts in PR #${num}.`);
435
- }
436
- else {
437
- log.success(TAG, `Extracted ${lessons.length} lesson(s) from scan alerts in PR #${num}`);
438
- allLessons.push(...lessons);
439
- }
440
- continue; // skip normal review-comment extraction
441
- }
442
- // Fetch PR data
443
- log.info(TAG, `Fetching PR #${num}...`);
444
- const pr = adapter.fetchPr(num);
445
- log.info(TAG, `Title: ${pr.title}`);
446
- // Fetch inline review comments
447
- log.info(TAG, 'Fetching review comments...');
448
- const reviewComments = adapter.fetchReviewComments(num);
449
- log.info(TAG, `Found ${reviewComments.length} inline review comments`);
450
- // Filter GCA boilerplate from inline comments
451
- const filteredComments = reviewComments.filter((c) => !isGcaBoilerplate(c.body, botMarkers));
452
- // Skip if no review content
453
- const hasReviewContent = pr.reviews.some((r) => r.body.trim()) ||
454
- pr.comments.some((c) => !isGcaBoilerplate(c.body, botMarkers)) ||
455
- filteredComments.length > 0;
456
- if (!hasReviewContent) {
457
- log.dim(TAG, `No review content found in PR #${num}. Skipping.`);
458
- continue;
459
- }
460
- // Group inline comments into threads
461
- const threads = groupIntoThreads(filteredComments);
462
- log.info(TAG, `Grouped into ${threads.length} review threads`);
463
- // Extract CodeRabbit nits from review bodies (lazy import)
464
- const { parseCodeRabbitNits } = await import('../parse-nits.js');
465
- const prNits = [];
466
- for (const r of pr.reviews) {
467
- if (r.author?.toLowerCase().includes('coderabbit')) {
468
- prNits.push(...parseCodeRabbitNits(r.body));
469
- }
470
- }
471
- // Scope inference (#1014): analyze PR changed files for scope suggestion
472
- let scopeGlobs = [];
473
- try {
474
- const { safeExec: exec, inferScopeFromFiles } = await import('@mmnto/totem');
475
- const diff = exec('gh', ['pr', 'diff', String(num), '--name-only'], {
476
- cwd,
477
- timeout: GH_TIMEOUT_MS,
478
- maxBuffer: 10 * 1024 * 1024, // 10MB for large PRs
479
- env: { ...process.env, GH_PROMPT_DISABLED: '1' },
480
- });
481
- const files = diff.trim().split(/\r?\n/).filter(Boolean);
482
- scopeGlobs = inferScopeFromFiles(files);
483
- if (scopeGlobs.length > 0) {
484
- log.dim(TAG, `Inferred scope: ${scopeGlobs.join(', ')}`);
485
- }
486
- }
487
- catch (err) {
488
- log.dim(TAG, `Skipping scope inference: ${err instanceof Error ? err.message : String(err)}`);
489
- }
490
- // Assemble prompt
491
- const prompt = assemblePrompt(pr, threads, existingLessons, systemPrompt, prNits, botMarkers, scopeGlobs);
492
- log.dim(TAG, `Prompt: ${(prompt.length / 1024).toFixed(0)}KB`);
493
- // Run orchestrator (handles --raw mode, validation, invocation, telemetry)
494
- const content = await runOrchestrator({
495
- prompt,
496
- tag: TAG,
497
- options,
498
- config,
499
- cwd,
500
- temperature: 0.4,
501
- customSecrets,
502
- });
503
- if (content == null)
504
- continue; // --raw mode — prompt already output, process next PR
505
- // Parse lessons from LLM output
506
- const lessons = parseLessons(content);
507
- if (lessons.length === 0) {
508
- log.dim(TAG, `No lessons extracted from PR #${num}.`);
509
- }
510
- else {
511
- log.success(TAG, `Extracted ${lessons.length} lesson(s) from PR #${num}`);
512
- allLessons.push(...lessons);
513
- }
54
+ // ─── Mode routing ────────────────────────────────────
55
+ let allLessons;
56
+ if (options.fromScan) {
57
+ const { extractFromScans } = await import('./extract-scan.js');
58
+ allLessons = await extractFromScans(nums, options, config, cwd, configPath);
59
+ }
60
+ else {
61
+ const { extractFromPrs } = await import('./extract-pr.js');
62
+ allLessons = await extractFromPrs(nums, options, config, cwd, configPath);
514
63
  }
515
64
  // In --raw mode, prompts were already output during the loop
516
65
  if (options.raw)
517
66
  return;
518
67
  if (allLessons.length === 0) {
519
- log.dim(TAG, 'No lessons extracted from any PR.');
520
- return;
521
- }
522
- // Semantic dedup against existing lessons and intra-batch (#347)
523
- log.info(TAG, 'Deduplicating against existing lessons...'); // totem-ignore — static string
524
- const { kept: novelLessons, dropped: dupLessons } = await deduplicateLessons(allLessons, store, embedder);
525
- if (dupLessons.length > 0) {
526
- log.dim(TAG, `Dropped ${dupLessons.length} semantically duplicate lesson(s)`); // totem-ignore — integer count
527
- }
528
- if (novelLessons.length === 0) {
529
- log.dim(TAG, 'All extracted lessons are duplicates of existing ones.'); // totem-ignore — static string
530
- return;
531
- }
532
- // Flag suspicious lessons before review (#290)
533
- const flaggedLessons = flagSuspiciousLessons(novelLessons);
534
- const suspiciousCount = flaggedLessons.filter((l) => l.suspiciousFlags?.length).length;
535
- if (suspiciousCount > 0) {
536
- log.warn(TAG, `${suspiciousCount} lesson(s) flagged as suspicious`); // totem-ignore — count only, no untrusted content
537
- }
538
- log.success(TAG, `Total: ${flaggedLessons.length} lesson(s) from ${nums.length} PR(s)`); // totem-ignore — count only, no untrusted content
539
- // --dry-run mode: preview lessons to stdout (pipeable) without writing
540
- if (options.dryRun) {
541
- log.dim(TAG, 'Dry run — lessons not written.');
542
- for (const lesson of flaggedLessons) {
543
- const prefix = lesson.suspiciousFlags?.length ? '[!] ' : '';
544
- console.log(`\n ${prefix}Tags: ${sanitize(lesson.tags.join(', ')).replace(/\n/g, ' ')}`); // totem-ignore — stdout for piping
545
- if (lesson.scope)
546
- console.log(` Scope: ${sanitize(lesson.scope)}`); // totem-ignore — stdout for piping
547
- console.log(` ${sanitize(lesson.text).replace(/\n/g, '\n ')}`); // totem-ignore — stdout for piping
548
- if (lesson.suspiciousFlags?.length) {
549
- for (const flag of lesson.suspiciousFlags) {
550
- console.log(` [!] ${flag}`); // totem-ignore — stdout for piping
551
- }
552
- }
553
- }
554
- // Exit non-zero if suspicious lessons detected in --yes mode (#291)
555
- if (options.yes && suspiciousCount > 0) {
556
- process.exitCode = 1;
557
- }
558
- return;
559
- }
560
- if (!options.yes) {
561
- // Display full text of each lesson for review before prompting
562
- console.error('');
563
- log.warn(TAG, 'WARNING: These lessons were extracted from PR comments, which may include content from untrusted contributors.');
564
- log.warn(TAG, 'Review each lesson carefully before accepting.\n');
565
- for (let i = 0; i < flaggedLessons.length; i++) {
566
- const lesson = flaggedLessons[i];
567
- const prefix = lesson.suspiciousFlags?.length ? `[!] ` : '';
568
- console.error(` [${i + 1}] ${prefix}Tags: ${sanitize(lesson.tags.join(', ')).replace(/\n/g, ' ')}`);
569
- if (lesson.scope)
570
- console.error(` Scope: ${sanitize(lesson.scope)}`);
571
- console.error(` ${sanitize(lesson.text).replace(/\n/g, '\n ')}`);
572
- if (lesson.suspiciousFlags?.length) {
573
- for (const flag of lesson.suspiciousFlags) {
574
- console.error(` [!] ${flag}`);
575
- }
576
- }
577
- console.error('');
578
- }
579
- }
580
- // Interactive multi-select (or --yes bypass with suspicious blocking)
581
- const selected = await selectLessons(flaggedLessons, {
582
- yes: options.yes,
583
- isTTY: !!process.stdin.isTTY,
584
- });
585
- if (selected.length === 0) {
586
- log.dim(TAG, 'No lessons selected — nothing written.');
68
+ log.dim('Extract', 'No lessons extracted from any PR.');
587
69
  return;
588
70
  }
589
- // Sanitize before persisting strip any terminal injection from stored lessons
590
- const sanitizedLessons = selected.map((l) => ({
591
- tags: l.tags.map((t) => sanitize(t)),
592
- text: sanitize(l.text),
593
- ...(l.scope && { scope: sanitize(l.scope) }),
594
- }));
595
- // Append lessons to .totem/lessons/
596
- const lessonsDir = path.join(cwd, config.totemDir, 'lessons');
597
- appendLessons(sanitizedLessons, lessonsDir);
598
- log.success(TAG, `Appended ${sanitizedLessons.length} lesson(s) to ${config.totemDir}/lessons/`); // totem-ignore
599
- // Run incremental sync so lessons are immediately searchable
600
- log.info(TAG, 'Running incremental sync...');
601
- const syncResult = await runSync(config, {
602
- projectRoot: cwd,
603
- incremental: true,
604
- onProgress: (msg) => log.dim(TAG, msg),
605
- });
606
- log.success(TAG, `Sync complete: ${syncResult.chunksProcessed} chunks from ${syncResult.filesProcessed} files`);
607
- // Print summary
71
+ // Shared pipeline: dedup, flag, select, persist, sync
608
72
  const prLabel = nums.length === 1 ? `PR #${nums[0]}` : `${nums.length} PRs`;
609
- console.log(`\nExtracted ${sanitizedLessons.length} lesson(s) from ${prLabel}:`);
610
- for (const lesson of sanitizedLessons) {
611
- console.log(`\n Tags: ${lesson.tags.join(', ').replace(/\n/g, ' ')}`);
612
- console.log(` ${lesson.text.replace(/\n/g, '\n ')}`);
613
- }
614
- // Exit non-zero if --yes mode dropped suspicious lessons (#291)
615
- if (options.yes && suspiciousCount > 0) {
616
- process.exitCode = 1;
617
- }
73
+ await sharedPipeline(allLessons, options, cwd, prLabel, config, configPath);
618
74
  }
619
75
  //# sourceMappingURL=extract.js.map