@mmnto/cli 1.9.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -190
- package/dist/commands/add-lesson.d.ts.map +1 -1
- package/dist/commands/add-lesson.js +5 -1
- package/dist/commands/add-lesson.js.map +1 -1
- package/dist/commands/anchor.d.ts +2 -0
- package/dist/commands/anchor.d.ts.map +1 -0
- package/dist/commands/anchor.js +84 -0
- package/dist/commands/anchor.js.map +1 -0
- package/dist/commands/compile.d.ts.map +1 -1
- package/dist/commands/compile.js +4 -1
- package/dist/commands/compile.js.map +1 -1
- package/dist/commands/config-cmd.d.ts +4 -0
- package/dist/commands/config-cmd.d.ts.map +1 -0
- package/dist/commands/config-cmd.js +60 -0
- package/dist/commands/config-cmd.js.map +1 -0
- package/dist/commands/extract-local.d.ts +6 -0
- package/dist/commands/extract-local.d.ts.map +1 -0
- package/dist/commands/extract-local.js +134 -0
- package/dist/commands/extract-local.js.map +1 -0
- package/dist/commands/extract-pr.d.ts +19 -0
- package/dist/commands/extract-pr.d.ts.map +1 -0
- package/dist/commands/extract-pr.js +193 -0
- package/dist/commands/extract-pr.js.map +1 -0
- package/dist/commands/extract-scan.d.ts +6 -0
- package/dist/commands/extract-scan.d.ts.map +1 -0
- package/dist/commands/extract-scan.js +100 -0
- package/dist/commands/extract-scan.js.map +1 -0
- package/dist/commands/extract-shared.d.ts +29 -0
- package/dist/commands/extract-shared.d.ts.map +1 -0
- package/dist/commands/extract-shared.js +351 -0
- package/dist/commands/extract-shared.js.map +1 -0
- package/dist/commands/extract-templates.d.ts +1 -0
- package/dist/commands/extract-templates.d.ts.map +1 -1
- package/dist/commands/extract-templates.js +33 -0
- package/dist/commands/extract-templates.js.map +1 -1
- package/dist/commands/extract.d.ts +7 -36
- package/dist/commands/extract.d.ts.map +1 -1
- package/dist/commands/extract.js +44 -588
- package/dist/commands/extract.js.map +1 -1
- package/dist/commands/extract.test.js +103 -2
- package/dist/commands/extract.test.js.map +1 -1
- package/dist/commands/init.d.ts +5 -0
- package/dist/commands/init.d.ts.map +1 -1
- package/dist/commands/init.js +85 -2
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/init.test.js +54 -1
- package/dist/commands/init.test.js.map +1 -1
- package/dist/commands/install-hooks.d.ts +17 -7
- package/dist/commands/install-hooks.d.ts.map +1 -1
- package/dist/commands/install-hooks.js +96 -16
- package/dist/commands/install-hooks.js.map +1 -1
- package/dist/commands/install-hooks.test.js +137 -22
- package/dist/commands/install-hooks.test.js.map +1 -1
- package/dist/commands/learn.d.ts +26 -0
- package/dist/commands/learn.d.ts.map +1 -0
- package/dist/commands/learn.js +325 -0
- package/dist/commands/learn.js.map +1 -0
- package/dist/commands/learn.test.d.ts +2 -0
- package/dist/commands/learn.test.d.ts.map +1 -0
- package/dist/commands/learn.test.js +169 -0
- package/dist/commands/learn.test.js.map +1 -0
- package/dist/commands/shield.d.ts.map +1 -1
- package/dist/commands/shield.js +12 -1
- package/dist/commands/shield.js.map +1 -1
- package/dist/commands/shield.test.js +37 -0
- package/dist/commands/shield.test.js.map +1 -1
- package/dist/commands/spec.d.ts +6 -0
- package/dist/commands/spec.d.ts.map +1 -1
- package/dist/commands/spec.js +11 -1
- package/dist/commands/spec.js.map +1 -1
- package/dist/commands/spec.test.js +25 -1
- package/dist/commands/spec.test.js.map +1 -1
- package/dist/exemptions/exemption-schema.d.ts +6 -6
- package/dist/hooks/auto-context.d.ts.map +1 -1
- package/dist/hooks/auto-context.js +2 -19
- package/dist/hooks/auto-context.js.map +1 -1
- package/dist/index.js +33 -9
- package/dist/index.js.map +1 -1
- package/dist/orchestrator.test.d.ts +2 -0
- package/dist/orchestrator.test.d.ts.map +1 -0
- package/dist/orchestrator.test.js +130 -0
- package/dist/orchestrator.test.js.map +1 -0
- package/dist/schemas/handoff-checkpoint.d.ts +2 -2
- package/dist/utils/pilot.d.ts +44 -0
- package/dist/utils/pilot.d.ts.map +1 -0
- package/dist/utils/pilot.js +141 -0
- package/dist/utils/pilot.js.map +1 -0
- package/dist/utils/pilot.test.d.ts +2 -0
- package/dist/utils/pilot.test.d.ts.map +1 -0
- package/dist/utils/pilot.test.js +166 -0
- package/dist/utils/pilot.test.js.map +1 -0
- package/dist/utils.d.ts +8 -1
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +27 -15
- package/dist/utils.js.map +1 -1
- package/dist/utils.test.js +81 -1
- package/dist/utils.test.js.map +1 -1
- package/package.json +3 -2
package/dist/commands/extract.js
CHANGED
|
@@ -1,355 +1,37 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { TotemConfigError } from '@mmnto/totem';
|
|
2
2
|
import { log } from '../ui.js';
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
import { isGlobalConfigPath, loadConfig, loadEnv, resolveConfigPath } from '../utils.js';
|
|
4
|
+
import { sharedPipeline } from './extract-shared.js';
|
|
5
|
+
import { MAX_INPUTS } from './extract-templates.js';
|
|
6
|
+
// ─── Re-exports: extract-templates ─────────────────────
|
|
7
|
+
export { EXTRACT_SYSTEM_PROMPT, LOCAL_EXTRACT_SYSTEM_PROMPT, MAX_EXISTING_LESSONS, MAX_INPUTS, MAX_REVIEW_BODY_CHARS, SCAN_EXTRACT_SYSTEM_PROMPT, SEMANTIC_DEDUP_THRESHOLD, SYSTEM_PROMPT, } from './extract-templates.js';
|
|
7
8
|
export { cosineSimilarity, deduplicateLessons, flagSuspiciousLessons, isInstructionalContext, } from '@mmnto/totem';
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
for (const [rootId, threadComments] of threadMap) {
|
|
22
|
-
threadComments.sort((a, b) => {
|
|
23
|
-
if (!a.createdAt || !b.createdAt)
|
|
24
|
-
return 0;
|
|
25
|
-
return a.createdAt.localeCompare(b.createdAt);
|
|
26
|
-
});
|
|
27
|
-
const root = byId.get(rootId) ?? threadComments[0];
|
|
28
|
-
threads.push({
|
|
29
|
-
path: root.path,
|
|
30
|
-
diffHunk: root.diffHunk,
|
|
31
|
-
comments: threadComments.map((c) => ({ id: c.id, author: c.author, body: c.body })),
|
|
32
|
-
});
|
|
33
|
-
}
|
|
34
|
-
return threads;
|
|
35
|
-
}
|
|
36
|
-
// ─── LanceDB retrieval ─────────────────────────────────
|
|
37
|
-
async function retrieveExistingLessons(store) {
|
|
38
|
-
return store.search({
|
|
39
|
-
query: 'lesson trap pattern decision',
|
|
40
|
-
typeFilter: 'spec',
|
|
41
|
-
maxResults: MAX_EXISTING_LESSONS,
|
|
42
|
-
});
|
|
43
|
-
}
|
|
44
|
-
// ─── Prompt assembly ────────────────────────────────────
|
|
45
|
-
const DEFAULT_BOT_MARKERS = ['Using Gemini Code Assist', 'Gemini Code Assist'];
|
|
46
|
-
function isGcaBoilerplate(body, botMarkers) {
|
|
47
|
-
return botMarkers.some((marker) => body.includes(marker));
|
|
48
|
-
}
|
|
49
|
-
export function assemblePrompt(pr, threads, existingLessons, systemPrompt, nits, botMarkers = DEFAULT_BOT_MARKERS, scopeGlobs) {
|
|
50
|
-
const sections = [systemPrompt];
|
|
51
|
-
// Scope context from PR diff analysis (#1014) — globs derived from PR filenames (untrusted)
|
|
52
|
-
if (scopeGlobs && scopeGlobs.length > 0) {
|
|
53
|
-
sections.push('\n=== SCOPE CONTEXT (from PR diff analysis) ===');
|
|
54
|
-
sections.push(wrapUntrustedXml('scope_context', `Suggested file scope: ${scopeGlobs.join(', ')}`));
|
|
55
|
-
sections.push('Use this scope as the default unless a lesson truly applies globally.');
|
|
56
|
-
sections.push('Include a "scope" field in each lesson JSON with the appropriate glob pattern.');
|
|
57
|
-
}
|
|
58
|
-
// PR metadata — sanitize untrusted fields (title, state come from PR author)
|
|
59
|
-
sections.push('=== PR METADATA ===');
|
|
60
|
-
sections.push(`PR #${pr.number}: ${sanitize(pr.title)}`);
|
|
61
|
-
sections.push(`State: ${sanitize(pr.state)}`);
|
|
62
|
-
if (pr.body) {
|
|
63
|
-
sections.push('');
|
|
64
|
-
sections.push(wrapUntrustedXml('pr_body', pr.body));
|
|
65
|
-
}
|
|
66
|
-
// Review summaries (non-empty review bodies)
|
|
67
|
-
const reviewBodies = pr.reviews.filter((r) => r.body.trim());
|
|
68
|
-
if (reviewBodies.length > 0) {
|
|
69
|
-
sections.push('\n=== REVIEW SUMMARIES ===');
|
|
70
|
-
for (const r of reviewBodies) {
|
|
71
|
-
sections.push(`[${sanitize(r.author)} — ${sanitize(r.state)}]`);
|
|
72
|
-
sections.push(wrapUntrustedXml('review_body', r.body));
|
|
73
|
-
sections.push('');
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
// CodeRabbit nits (pre-parsed and passed in)
|
|
77
|
-
if (nits && nits.length > 0) {
|
|
78
|
-
sections.push('\n=== CODERABBIT NITS (extract valuable architectural insights) ===');
|
|
79
|
-
for (const nit of nits) {
|
|
80
|
-
sections.push(wrapUntrustedXml('nit_body', nit));
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
// Regular PR comments (filter GCA boilerplate)
|
|
84
|
-
const prComments = pr.comments.filter((c) => !isGcaBoilerplate(c.body, botMarkers));
|
|
85
|
-
if (prComments.length > 0) {
|
|
86
|
-
sections.push('\n=== PR COMMENTS ===');
|
|
87
|
-
for (const c of prComments) {
|
|
88
|
-
sections.push(`[${sanitize(c.author)}]`);
|
|
89
|
-
sections.push(wrapUntrustedXml('comment_body', c.body));
|
|
90
|
-
sections.push('');
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
// Inline review comment threads
|
|
94
|
-
if (threads.length > 0) {
|
|
95
|
-
sections.push('\n=== INLINE REVIEW THREADS ===');
|
|
96
|
-
for (const thread of threads) {
|
|
97
|
-
sections.push(`--- ${sanitize(thread.path)} ---`); // totem-ignore — thread.path is untrusted PR data, not local git
|
|
98
|
-
sections.push(wrapUntrustedXml('diff_hunk', thread.diffHunk));
|
|
99
|
-
for (const c of thread.comments) {
|
|
100
|
-
sections.push(`[${sanitize(c.author)}]:\n${wrapUntrustedXml('comment_body', c.body)}`);
|
|
101
|
-
}
|
|
102
|
-
sections.push('');
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
// Existing lessons for dedup context
|
|
106
|
-
const lessonSection = formatResults(existingLessons, 'EXISTING LESSONS (do NOT duplicate)');
|
|
107
|
-
if (lessonSection) {
|
|
108
|
-
sections.push('\n=== DEDUP CONTEXT ===');
|
|
109
|
-
sections.push(lessonSection);
|
|
110
|
-
}
|
|
111
|
-
// Truncate if needed
|
|
112
|
-
let prompt = sections.join('\n');
|
|
113
|
-
if (prompt.length > MAX_REVIEW_BODY_CHARS) {
|
|
114
|
-
prompt = prompt.slice(0, MAX_REVIEW_BODY_CHARS) + '\n\n... [content truncated] ...';
|
|
115
|
-
}
|
|
116
|
-
return prompt;
|
|
117
|
-
}
|
|
118
|
-
// ─── Scan prompt assembly ──────────────────────────────
|
|
119
|
-
export function assembleFromScanPrompt(alerts, diff, existingLessons, systemPrompt) {
|
|
120
|
-
const sections = [systemPrompt];
|
|
121
|
-
sections.push('\n=== FIXED CODE SCANNING ALERTS ===');
|
|
122
|
-
for (const alert of alerts) {
|
|
123
|
-
sections.push(`\n--- Alert #${alert.number} ---`);
|
|
124
|
-
sections.push(wrapUntrustedXml('alert_rule', sanitize(alert.rule_id)));
|
|
125
|
-
sections.push(wrapUntrustedXml('alert_message', alert.most_recent_instance.message.text));
|
|
126
|
-
sections.push(wrapUntrustedXml('alert_location', `${alert.most_recent_instance.location.path}:${alert.most_recent_instance.location.start_line}`));
|
|
127
|
-
}
|
|
128
|
-
sections.push('\n=== FIX DIFF ===');
|
|
129
|
-
const truncatedDiff = diff.length > MAX_REVIEW_BODY_CHARS
|
|
130
|
-
? diff.slice(0, MAX_REVIEW_BODY_CHARS) + '\n... [diff truncated] ...'
|
|
131
|
-
: diff;
|
|
132
|
-
sections.push(wrapUntrustedXml('fix_diff', truncatedDiff));
|
|
133
|
-
// Existing lessons for dedup context
|
|
134
|
-
const lessonSection = formatResults(existingLessons, 'EXISTING LESSONS (do NOT duplicate)');
|
|
135
|
-
if (lessonSection) {
|
|
136
|
-
sections.push('\n=== DEDUP CONTEXT ===');
|
|
137
|
-
sections.push(lessonSection);
|
|
138
|
-
}
|
|
139
|
-
return sections.join('\n');
|
|
140
|
-
}
|
|
141
|
-
// ─── Lesson parser ──────────────────────────────────────
|
|
142
|
-
const LESSON_RE = /---LESSON---\s*\n(?:Heading:\s*(.+)\n)?Tags:\s*(.+)\n([\s\S]+?)---END---/g;
|
|
143
|
-
/** Strip markdown heading markers and "Lesson —" prefixes, then enforce max length. */
|
|
144
|
-
function sanitizeHeading(heading) {
|
|
145
|
-
const cleaned = heading
|
|
146
|
-
.replace(/^#+\s*/, '')
|
|
147
|
-
.replace(/^Lesson\s*[-—:]\s*/i, '')
|
|
148
|
-
.trim();
|
|
149
|
-
return truncateHeading(cleaned);
|
|
150
|
-
}
|
|
151
|
-
/** Max allowed length for a single lesson's text to prevent corrupted/hallucinated output. */
|
|
152
|
-
const MAX_LESSON_TEXT_LENGTH = 2000;
|
|
153
|
-
/** Max allowed tags per lesson. */
|
|
154
|
-
const MAX_TAGS_PER_LESSON = 10;
|
|
155
|
-
/** Max allowed length for a single tag. */
|
|
156
|
-
const MAX_TAG_LENGTH = 50;
|
|
157
|
-
/** Extract a JSON array from LLM output, handling code fences and conversational wrapping. */
|
|
158
|
-
function extractJsonArray(input) {
|
|
159
|
-
const trimmed = input.trim();
|
|
160
|
-
// Try markdown code fences (backtick or tilde)
|
|
161
|
-
const fenced = trimmed.match(/(?:```|~~~)(?:json)?\s*\n?([\s\S]*?)(?:```|~~~)/i);
|
|
162
|
-
if (fenced)
|
|
163
|
-
return fenced[1].trim();
|
|
164
|
-
// Look for `[` followed by optional whitespace then `{` — handles both compact and pretty-printed
|
|
165
|
-
const arrayStart = trimmed.search(/\[\s*\{/);
|
|
166
|
-
if (arrayStart !== -1) {
|
|
167
|
-
// Find matching ] respecting JSON string literals (brackets inside strings don't count)
|
|
168
|
-
let depth = 0;
|
|
169
|
-
let inString = false;
|
|
170
|
-
let escaped = false;
|
|
171
|
-
for (let i = arrayStart; i < trimmed.length; i++) {
|
|
172
|
-
const ch = trimmed[i];
|
|
173
|
-
if (escaped) {
|
|
174
|
-
escaped = false;
|
|
175
|
-
continue;
|
|
176
|
-
}
|
|
177
|
-
if (ch === '\\' && inString) {
|
|
178
|
-
escaped = true;
|
|
179
|
-
continue;
|
|
180
|
-
}
|
|
181
|
-
if (ch === '"') {
|
|
182
|
-
inString = !inString;
|
|
183
|
-
continue;
|
|
184
|
-
}
|
|
185
|
-
if (inString)
|
|
186
|
-
continue;
|
|
187
|
-
if (ch === '[')
|
|
188
|
-
depth++;
|
|
189
|
-
else if (ch === ']') {
|
|
190
|
-
depth--;
|
|
191
|
-
if (depth === 0)
|
|
192
|
-
return trimmed.slice(arrayStart, i + 1);
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
return null;
|
|
197
|
-
}
|
|
198
|
-
/** Validate a single parsed lesson object. Returns null if invalid. */
|
|
199
|
-
function validateLesson(obj) {
|
|
200
|
-
if (typeof obj !== 'object' || obj === null || Array.isArray(obj))
|
|
201
|
-
return null;
|
|
202
|
-
const rec = obj;
|
|
203
|
-
// Normalize text
|
|
204
|
-
const text = typeof rec.text === 'string' ? rec.text.trim() : null;
|
|
205
|
-
if (!text || text.length > MAX_LESSON_TEXT_LENGTH)
|
|
206
|
-
return null;
|
|
207
|
-
// Normalize tags — trim and filter empty
|
|
208
|
-
const tags = Array.isArray(rec.tags)
|
|
209
|
-
? rec.tags
|
|
210
|
-
.filter((t) => typeof t === 'string')
|
|
211
|
-
.map((t) => t.trim())
|
|
212
|
-
.filter(Boolean)
|
|
213
|
-
: null;
|
|
214
|
-
if (!tags || tags.length === 0 || tags.length > MAX_TAGS_PER_LESSON)
|
|
215
|
-
return null;
|
|
216
|
-
if (tags.some((t) => t.length > MAX_TAG_LENGTH))
|
|
217
|
-
return null;
|
|
218
|
-
// Validate optional heading
|
|
219
|
-
const heading = typeof rec.heading === 'string' ? sanitizeHeading(rec.heading) : undefined;
|
|
220
|
-
// Validate optional scope (#1014) — reject newlines to prevent body injection
|
|
221
|
-
const rawScope = typeof rec.scope === 'string' ? rec.scope.trim() : undefined;
|
|
222
|
-
const scope = rawScope && !/[\n\r]/.test(rawScope) ? rawScope : undefined;
|
|
223
|
-
return { ...(heading && { heading }), tags, text, ...(scope && { scope }) };
|
|
224
|
-
}
|
|
225
|
-
/** Try to parse JSON lessons with manual validation. Returns null on failure. */
|
|
226
|
-
function tryParseJson(llmOutput) {
|
|
227
|
-
try {
|
|
228
|
-
const jsonStr = extractJsonArray(llmOutput);
|
|
229
|
-
if (!jsonStr)
|
|
230
|
-
return null;
|
|
231
|
-
const parsed = JSON.parse(jsonStr);
|
|
232
|
-
if (!Array.isArray(parsed))
|
|
233
|
-
return null;
|
|
234
|
-
const lessons = [];
|
|
235
|
-
for (const item of parsed) {
|
|
236
|
-
const validated = validateLesson(item);
|
|
237
|
-
if (validated)
|
|
238
|
-
lessons.push(validated);
|
|
9
|
+
export { appendLessons, assembleExtractPrompt, parseLessons, selectLessons, } from './extract-shared.js';
|
|
10
|
+
// ─── Re-exports: extract-pr ────────────────────────────
|
|
11
|
+
export { assemblePrompt } from './extract-pr.js';
|
|
12
|
+
// ─── Re-exports: extract-scan ──────────────────────────
|
|
13
|
+
export { assembleFromScanPrompt } from './extract-scan.js';
|
|
14
|
+
// ─── Re-exports: extract-local ─────────────────────────
|
|
15
|
+
export { assembleLocalPrompt } from './extract-local.js';
|
|
16
|
+
// ─── Main command ──────────────────────────────────────
|
|
17
|
+
export async function extractCommand(prNumbers, options) {
|
|
18
|
+
// ─── Local extraction mode (--local) ─────────────────
|
|
19
|
+
if (options.local) {
|
|
20
|
+
if (options.fromScan) {
|
|
21
|
+
throw new TotemConfigError('Cannot combine --local with --from-scan.', 'Use --local for local diffs or --from-scan with PR numbers for code scanning alerts.', 'CONFIG_INVALID');
|
|
239
22
|
}
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
// injected ---LESSON--- content after JSON was already found.
|
|
243
|
-
return lessons;
|
|
244
|
-
}
|
|
245
|
-
catch {
|
|
246
|
-
return null;
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
/** Fallback: parse lessons using the legacy ---LESSON---...---END--- regex format. */
|
|
250
|
-
function parseWithRegex(llmOutput) {
|
|
251
|
-
const lessons = [];
|
|
252
|
-
let match;
|
|
253
|
-
while ((match = LESSON_RE.exec(llmOutput)) !== null) {
|
|
254
|
-
const rawHeading = match[1]; // undefined if Heading: line was absent
|
|
255
|
-
const tags = match[2]
|
|
256
|
-
.split(',')
|
|
257
|
-
.map((t) => t.trim())
|
|
258
|
-
.filter(Boolean);
|
|
259
|
-
const text = match[3].trim();
|
|
260
|
-
// Validate: reject malformed or hallucinated lessons before they reach disk
|
|
261
|
-
if (!text)
|
|
262
|
-
continue;
|
|
263
|
-
if (text.length > MAX_LESSON_TEXT_LENGTH)
|
|
264
|
-
continue;
|
|
265
|
-
if (tags.length === 0 || tags.length > MAX_TAGS_PER_LESSON)
|
|
266
|
-
continue;
|
|
267
|
-
if (tags.some((t) => t.length > MAX_TAG_LENGTH))
|
|
268
|
-
continue;
|
|
269
|
-
const heading = rawHeading ? sanitizeHeading(rawHeading) : undefined;
|
|
270
|
-
lessons.push({ ...(heading && { heading }), tags, text });
|
|
271
|
-
}
|
|
272
|
-
return lessons;
|
|
273
|
-
}
|
|
274
|
-
export function parseLessons(llmOutput) {
|
|
275
|
-
if (llmOutput.trim() === 'NONE')
|
|
276
|
-
return [];
|
|
277
|
-
// Primary path: JSON + manual validation
|
|
278
|
-
const jsonLessons = tryParseJson(llmOutput);
|
|
279
|
-
if (jsonLessons !== null)
|
|
280
|
-
return jsonLessons;
|
|
281
|
-
// Fallback: regex parsing for models that don't produce clean JSON
|
|
282
|
-
return parseWithRegex(llmOutput);
|
|
283
|
-
}
|
|
284
|
-
// ─── Lesson writer ──────────────────────────────────────
|
|
285
|
-
export function appendLessons(lessons, lessonsDir) {
|
|
286
|
-
for (const l of lessons) {
|
|
287
|
-
const heading = l.heading || generateLessonHeading(l.text);
|
|
288
|
-
const tags = l.tags.join(', ');
|
|
289
|
-
const scopeLine = l.scope ? `\n**Scope:** ${l.scope}` : '';
|
|
290
|
-
const entry = `## Lesson — ${heading}\n\n**Tags:** ${tags}${scopeLine}\n\n${l.text}\n`;
|
|
291
|
-
writeLessonFile(lessonsDir, entry);
|
|
292
|
-
}
|
|
293
|
-
}
|
|
294
|
-
// ─── Lesson selection ───────────────────────────────────
|
|
295
|
-
const LABEL_MAX_CHARS = 70;
|
|
296
|
-
function truncateLabel(text) {
|
|
297
|
-
const oneLine = text.replace(/\n/g, ' ');
|
|
298
|
-
if (oneLine.length <= LABEL_MAX_CHARS)
|
|
299
|
-
return oneLine;
|
|
300
|
-
return oneLine.slice(0, LABEL_MAX_CHARS - 1) + '…';
|
|
301
|
-
}
|
|
302
|
-
/**
|
|
303
|
-
* Prompts the user to select which lessons to keep via multi-select.
|
|
304
|
-
* In --yes mode, suspicious lessons are blocked (dropped with warnings).
|
|
305
|
-
* Returns the selected lessons.
|
|
306
|
-
* Throws in non-interactive environments without --yes.
|
|
307
|
-
*/
|
|
308
|
-
export async function selectLessons(lessons, opts) {
|
|
309
|
-
if (opts.yes) {
|
|
310
|
-
// --yes mode: block suspicious lessons (#291)
|
|
311
|
-
const clean = lessons.filter((l) => !l.suspiciousFlags?.length);
|
|
312
|
-
const dropped = lessons.filter((l) => l.suspiciousFlags?.length);
|
|
313
|
-
if (dropped.length > 0) {
|
|
314
|
-
for (const l of dropped) {
|
|
315
|
-
log.warn(TAG, `Blocked suspicious lesson: ${truncateLabel(sanitize(l.text))}`);
|
|
316
|
-
for (const flag of l.suspiciousFlags) {
|
|
317
|
-
log.warn(TAG, ` - ${flag}`);
|
|
318
|
-
}
|
|
319
|
-
}
|
|
23
|
+
if (prNumbers.length > 0) {
|
|
24
|
+
throw new TotemConfigError('Cannot combine --local with PR numbers.', 'Use either --local for local diffs or PR numbers for remote extraction.', 'CONFIG_INVALID');
|
|
320
25
|
}
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
message: `Select lessons to persist (${lessons.length} extracted):`,
|
|
329
|
-
options: lessons.map((lesson, i) => ({
|
|
330
|
-
value: i,
|
|
331
|
-
label: lesson.suspiciousFlags?.length
|
|
332
|
-
? `[!] ${truncateLabel(sanitize(lesson.text))}`
|
|
333
|
-
: truncateLabel(sanitize(lesson.text)),
|
|
334
|
-
hint: lesson.suspiciousFlags?.length
|
|
335
|
-
? `${sanitize(lesson.tags.join(', '))} -- ${lesson.suspiciousFlags.join('; ')}`
|
|
336
|
-
: sanitize(lesson.tags.join(', ')),
|
|
337
|
-
})),
|
|
338
|
-
// Pre-select only non-suspicious lessons
|
|
339
|
-
initialValues: lessons
|
|
340
|
-
.map((l, i) => (l.suspiciousFlags?.length ? null : i))
|
|
341
|
-
.filter((i) => i !== null),
|
|
342
|
-
required: false,
|
|
343
|
-
});
|
|
344
|
-
if (isCancel(result)) {
|
|
345
|
-
return [];
|
|
26
|
+
const cwd = process.cwd();
|
|
27
|
+
const { extractFromLocal } = await import('./extract-local.js');
|
|
28
|
+
const lessons = await extractFromLocal(options, cwd);
|
|
29
|
+
if (lessons.length === 0)
|
|
30
|
+
return;
|
|
31
|
+
await sharedPipeline(lessons, options, cwd, 'local changes');
|
|
32
|
+
return;
|
|
346
33
|
}
|
|
347
|
-
|
|
348
|
-
}
|
|
349
|
-
export async function extractCommand(prNumbers, options) {
|
|
350
|
-
const path = await import('node:path');
|
|
351
|
-
const { GitHubCliPrAdapter } = await import('../adapters/github-cli-pr.js');
|
|
352
|
-
// Validate and deduplicate PR numbers
|
|
34
|
+
// ─── PR number validation ────────────────────────────
|
|
353
35
|
const unique = [...new Set(prNumbers)];
|
|
354
36
|
if (unique.length > MAX_INPUTS) {
|
|
355
37
|
throw new TotemConfigError(`Too many PR numbers (${unique.length}). Maximum is ${MAX_INPUTS}.`, `Pass at most ${MAX_INPUTS} PR numbers at a time.`, 'CONFIG_INVALID');
|
|
@@ -364,256 +46,30 @@ export async function extractCommand(prNumbers, options) {
|
|
|
364
46
|
}
|
|
365
47
|
const cwd = process.cwd();
|
|
366
48
|
const configPath = resolveConfigPath(cwd);
|
|
49
|
+
if (isGlobalConfigPath(configPath)) {
|
|
50
|
+
throw new TotemConfigError('Cannot extract lessons without a local project.', "Run 'totem init' to create a local .totem/ directory first.", 'CONFIG_MISSING');
|
|
51
|
+
}
|
|
367
52
|
loadEnv(cwd);
|
|
368
53
|
const config = await loadConfig(configPath);
|
|
369
|
-
//
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
log.info(TAG, 'Querying existing lessons for dedup...');
|
|
379
|
-
const existingLessons = await retrieveExistingLessons(store);
|
|
380
|
-
log.info(TAG, `Found ${existingLessons.length} existing lessons for context`);
|
|
381
|
-
// Resolve system prompt (allow .totem/prompts/extract.md override)
|
|
382
|
-
const systemPrompt = getSystemPrompt('extract', SYSTEM_PROMPT, cwd, config.totemDir);
|
|
383
|
-
// Process each PR sequentially, accumulating lessons
|
|
384
|
-
const allLessons = [];
|
|
385
|
-
const adapter = new GitHubCliPrAdapter(cwd);
|
|
386
|
-
for (const num of nums) {
|
|
387
|
-
// ─── Scan-based extraction (--from-scan) ─────────────
|
|
388
|
-
if (options.fromScan) {
|
|
389
|
-
if (!adapter.fetchCodeScanningAlerts) {
|
|
390
|
-
throw new TotemConfigError('The current PR adapter does not support code scanning alerts.', 'Use the GitHub CLI adapter (default) to enable --from-scan.', 'CONFIG_INVALID');
|
|
391
|
-
}
|
|
392
|
-
// Fetch code scanning alerts for this PR
|
|
393
|
-
const { safeExec: exec } = await import('@mmnto/totem');
|
|
394
|
-
log.info(TAG, `Fetching code scanning alerts for PR #${num}...`);
|
|
395
|
-
const allAlerts = adapter.fetchCodeScanningAlerts(num);
|
|
396
|
-
const fixedAlerts = allAlerts.filter((a) => a.state === 'fixed');
|
|
397
|
-
log.info(TAG, `Found ${allAlerts.length} alert(s), ${fixedAlerts.length} fixed`);
|
|
398
|
-
if (fixedAlerts.length === 0) {
|
|
399
|
-
log.dim(TAG, `No fixed code scanning alerts for PR #${num}. Skipping.`);
|
|
400
|
-
continue;
|
|
401
|
-
}
|
|
402
|
-
// Fetch the PR diff filtered to affected files only (avoids truncation in large PRs)
|
|
403
|
-
const affectedFiles = [
|
|
404
|
-
...new Set(fixedAlerts.map((a) => a.most_recent_instance.location.path)),
|
|
405
|
-
];
|
|
406
|
-
log.info(TAG, `Fetching PR diff for ${affectedFiles.length} affected file(s)...`);
|
|
407
|
-
const diffArgs = ['pr', 'diff', String(num), '--', ...affectedFiles];
|
|
408
|
-
const diff = exec('gh', diffArgs, {
|
|
409
|
-
cwd,
|
|
410
|
-
timeout: GH_TIMEOUT_MS,
|
|
411
|
-
maxBuffer: 10 * 1024 * 1024,
|
|
412
|
-
env: { ...process.env, GH_PROMPT_DISABLED: '1' },
|
|
413
|
-
});
|
|
414
|
-
// Resolve system prompt (allow .totem/prompts/extract-scan.md override)
|
|
415
|
-
const scanSystemPrompt = getSystemPrompt('extract-scan', SCAN_EXTRACT_SYSTEM_PROMPT, cwd, config.totemDir);
|
|
416
|
-
// Assemble scan-specific prompt
|
|
417
|
-
const prompt = assembleFromScanPrompt(fixedAlerts, diff, existingLessons, scanSystemPrompt);
|
|
418
|
-
log.dim(TAG, `Prompt: ${(prompt.length / 1024).toFixed(0)}KB`);
|
|
419
|
-
// Run orchestrator
|
|
420
|
-
const content = await runOrchestrator({
|
|
421
|
-
prompt,
|
|
422
|
-
tag: TAG,
|
|
423
|
-
options,
|
|
424
|
-
config,
|
|
425
|
-
cwd,
|
|
426
|
-
temperature: 0.4,
|
|
427
|
-
customSecrets,
|
|
428
|
-
});
|
|
429
|
-
if (content == null)
|
|
430
|
-
continue; // --raw mode
|
|
431
|
-
// Parse lessons from LLM output
|
|
432
|
-
const lessons = parseLessons(content);
|
|
433
|
-
if (lessons.length === 0) {
|
|
434
|
-
log.dim(TAG, `No lessons extracted from scan alerts in PR #${num}.`);
|
|
435
|
-
}
|
|
436
|
-
else {
|
|
437
|
-
log.success(TAG, `Extracted ${lessons.length} lesson(s) from scan alerts in PR #${num}`);
|
|
438
|
-
allLessons.push(...lessons);
|
|
439
|
-
}
|
|
440
|
-
continue; // skip normal review-comment extraction
|
|
441
|
-
}
|
|
442
|
-
// Fetch PR data
|
|
443
|
-
log.info(TAG, `Fetching PR #${num}...`);
|
|
444
|
-
const pr = adapter.fetchPr(num);
|
|
445
|
-
log.info(TAG, `Title: ${pr.title}`);
|
|
446
|
-
// Fetch inline review comments
|
|
447
|
-
log.info(TAG, 'Fetching review comments...');
|
|
448
|
-
const reviewComments = adapter.fetchReviewComments(num);
|
|
449
|
-
log.info(TAG, `Found ${reviewComments.length} inline review comments`);
|
|
450
|
-
// Filter GCA boilerplate from inline comments
|
|
451
|
-
const filteredComments = reviewComments.filter((c) => !isGcaBoilerplate(c.body, botMarkers));
|
|
452
|
-
// Skip if no review content
|
|
453
|
-
const hasReviewContent = pr.reviews.some((r) => r.body.trim()) ||
|
|
454
|
-
pr.comments.some((c) => !isGcaBoilerplate(c.body, botMarkers)) ||
|
|
455
|
-
filteredComments.length > 0;
|
|
456
|
-
if (!hasReviewContent) {
|
|
457
|
-
log.dim(TAG, `No review content found in PR #${num}. Skipping.`);
|
|
458
|
-
continue;
|
|
459
|
-
}
|
|
460
|
-
// Group inline comments into threads
|
|
461
|
-
const threads = groupIntoThreads(filteredComments);
|
|
462
|
-
log.info(TAG, `Grouped into ${threads.length} review threads`);
|
|
463
|
-
// Extract CodeRabbit nits from review bodies (lazy import)
|
|
464
|
-
const { parseCodeRabbitNits } = await import('../parse-nits.js');
|
|
465
|
-
const prNits = [];
|
|
466
|
-
for (const r of pr.reviews) {
|
|
467
|
-
if (r.author?.toLowerCase().includes('coderabbit')) {
|
|
468
|
-
prNits.push(...parseCodeRabbitNits(r.body));
|
|
469
|
-
}
|
|
470
|
-
}
|
|
471
|
-
// Scope inference (#1014): analyze PR changed files for scope suggestion
|
|
472
|
-
let scopeGlobs = [];
|
|
473
|
-
try {
|
|
474
|
-
const { safeExec: exec, inferScopeFromFiles } = await import('@mmnto/totem');
|
|
475
|
-
const diff = exec('gh', ['pr', 'diff', String(num), '--name-only'], {
|
|
476
|
-
cwd,
|
|
477
|
-
timeout: GH_TIMEOUT_MS,
|
|
478
|
-
maxBuffer: 10 * 1024 * 1024, // 10MB for large PRs
|
|
479
|
-
env: { ...process.env, GH_PROMPT_DISABLED: '1' },
|
|
480
|
-
});
|
|
481
|
-
const files = diff.trim().split(/\r?\n/).filter(Boolean);
|
|
482
|
-
scopeGlobs = inferScopeFromFiles(files);
|
|
483
|
-
if (scopeGlobs.length > 0) {
|
|
484
|
-
log.dim(TAG, `Inferred scope: ${scopeGlobs.join(', ')}`);
|
|
485
|
-
}
|
|
486
|
-
}
|
|
487
|
-
catch (err) {
|
|
488
|
-
log.dim(TAG, `Skipping scope inference: ${err instanceof Error ? err.message : String(err)}`);
|
|
489
|
-
}
|
|
490
|
-
// Assemble prompt
|
|
491
|
-
const prompt = assemblePrompt(pr, threads, existingLessons, systemPrompt, prNits, botMarkers, scopeGlobs);
|
|
492
|
-
log.dim(TAG, `Prompt: ${(prompt.length / 1024).toFixed(0)}KB`);
|
|
493
|
-
// Run orchestrator (handles --raw mode, validation, invocation, telemetry)
|
|
494
|
-
const content = await runOrchestrator({
|
|
495
|
-
prompt,
|
|
496
|
-
tag: TAG,
|
|
497
|
-
options,
|
|
498
|
-
config,
|
|
499
|
-
cwd,
|
|
500
|
-
temperature: 0.4,
|
|
501
|
-
customSecrets,
|
|
502
|
-
});
|
|
503
|
-
if (content == null)
|
|
504
|
-
continue; // --raw mode — prompt already output, process next PR
|
|
505
|
-
// Parse lessons from LLM output
|
|
506
|
-
const lessons = parseLessons(content);
|
|
507
|
-
if (lessons.length === 0) {
|
|
508
|
-
log.dim(TAG, `No lessons extracted from PR #${num}.`);
|
|
509
|
-
}
|
|
510
|
-
else {
|
|
511
|
-
log.success(TAG, `Extracted ${lessons.length} lesson(s) from PR #${num}`);
|
|
512
|
-
allLessons.push(...lessons);
|
|
513
|
-
}
|
|
54
|
+
// ─── Mode routing ────────────────────────────────────
|
|
55
|
+
let allLessons;
|
|
56
|
+
if (options.fromScan) {
|
|
57
|
+
const { extractFromScans } = await import('./extract-scan.js');
|
|
58
|
+
allLessons = await extractFromScans(nums, options, config, cwd, configPath);
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
const { extractFromPrs } = await import('./extract-pr.js');
|
|
62
|
+
allLessons = await extractFromPrs(nums, options, config, cwd, configPath);
|
|
514
63
|
}
|
|
515
64
|
// In --raw mode, prompts were already output during the loop
|
|
516
65
|
if (options.raw)
|
|
517
66
|
return;
|
|
518
67
|
if (allLessons.length === 0) {
|
|
519
|
-
log.dim(
|
|
520
|
-
return;
|
|
521
|
-
}
|
|
522
|
-
// Semantic dedup against existing lessons and intra-batch (#347)
|
|
523
|
-
log.info(TAG, 'Deduplicating against existing lessons...'); // totem-ignore — static string
|
|
524
|
-
const { kept: novelLessons, dropped: dupLessons } = await deduplicateLessons(allLessons, store, embedder);
|
|
525
|
-
if (dupLessons.length > 0) {
|
|
526
|
-
log.dim(TAG, `Dropped ${dupLessons.length} semantically duplicate lesson(s)`); // totem-ignore — integer count
|
|
527
|
-
}
|
|
528
|
-
if (novelLessons.length === 0) {
|
|
529
|
-
log.dim(TAG, 'All extracted lessons are duplicates of existing ones.'); // totem-ignore — static string
|
|
530
|
-
return;
|
|
531
|
-
}
|
|
532
|
-
// Flag suspicious lessons before review (#290)
|
|
533
|
-
const flaggedLessons = flagSuspiciousLessons(novelLessons);
|
|
534
|
-
const suspiciousCount = flaggedLessons.filter((l) => l.suspiciousFlags?.length).length;
|
|
535
|
-
if (suspiciousCount > 0) {
|
|
536
|
-
log.warn(TAG, `${suspiciousCount} lesson(s) flagged as suspicious`); // totem-ignore — count only, no untrusted content
|
|
537
|
-
}
|
|
538
|
-
log.success(TAG, `Total: ${flaggedLessons.length} lesson(s) from ${nums.length} PR(s)`); // totem-ignore — count only, no untrusted content
|
|
539
|
-
// --dry-run mode: preview lessons to stdout (pipeable) without writing
|
|
540
|
-
if (options.dryRun) {
|
|
541
|
-
log.dim(TAG, 'Dry run — lessons not written.');
|
|
542
|
-
for (const lesson of flaggedLessons) {
|
|
543
|
-
const prefix = lesson.suspiciousFlags?.length ? '[!] ' : '';
|
|
544
|
-
console.log(`\n ${prefix}Tags: ${sanitize(lesson.tags.join(', ')).replace(/\n/g, ' ')}`); // totem-ignore — stdout for piping
|
|
545
|
-
if (lesson.scope)
|
|
546
|
-
console.log(` Scope: ${sanitize(lesson.scope)}`); // totem-ignore — stdout for piping
|
|
547
|
-
console.log(` ${sanitize(lesson.text).replace(/\n/g, '\n ')}`); // totem-ignore — stdout for piping
|
|
548
|
-
if (lesson.suspiciousFlags?.length) {
|
|
549
|
-
for (const flag of lesson.suspiciousFlags) {
|
|
550
|
-
console.log(` [!] ${flag}`); // totem-ignore — stdout for piping
|
|
551
|
-
}
|
|
552
|
-
}
|
|
553
|
-
}
|
|
554
|
-
// Exit non-zero if suspicious lessons detected in --yes mode (#291)
|
|
555
|
-
if (options.yes && suspiciousCount > 0) {
|
|
556
|
-
process.exitCode = 1;
|
|
557
|
-
}
|
|
558
|
-
return;
|
|
559
|
-
}
|
|
560
|
-
if (!options.yes) {
|
|
561
|
-
// Display full text of each lesson for review before prompting
|
|
562
|
-
console.error('');
|
|
563
|
-
log.warn(TAG, 'WARNING: These lessons were extracted from PR comments, which may include content from untrusted contributors.');
|
|
564
|
-
log.warn(TAG, 'Review each lesson carefully before accepting.\n');
|
|
565
|
-
for (let i = 0; i < flaggedLessons.length; i++) {
|
|
566
|
-
const lesson = flaggedLessons[i];
|
|
567
|
-
const prefix = lesson.suspiciousFlags?.length ? `[!] ` : '';
|
|
568
|
-
console.error(` [${i + 1}] ${prefix}Tags: ${sanitize(lesson.tags.join(', ')).replace(/\n/g, ' ')}`);
|
|
569
|
-
if (lesson.scope)
|
|
570
|
-
console.error(` Scope: ${sanitize(lesson.scope)}`);
|
|
571
|
-
console.error(` ${sanitize(lesson.text).replace(/\n/g, '\n ')}`);
|
|
572
|
-
if (lesson.suspiciousFlags?.length) {
|
|
573
|
-
for (const flag of lesson.suspiciousFlags) {
|
|
574
|
-
console.error(` [!] ${flag}`);
|
|
575
|
-
}
|
|
576
|
-
}
|
|
577
|
-
console.error('');
|
|
578
|
-
}
|
|
579
|
-
}
|
|
580
|
-
// Interactive multi-select (or --yes bypass with suspicious blocking)
|
|
581
|
-
const selected = await selectLessons(flaggedLessons, {
|
|
582
|
-
yes: options.yes,
|
|
583
|
-
isTTY: !!process.stdin.isTTY,
|
|
584
|
-
});
|
|
585
|
-
if (selected.length === 0) {
|
|
586
|
-
log.dim(TAG, 'No lessons selected — nothing written.');
|
|
68
|
+
log.dim('Extract', 'No lessons extracted from any PR.');
|
|
587
69
|
return;
|
|
588
70
|
}
|
|
589
|
-
//
|
|
590
|
-
const sanitizedLessons = selected.map((l) => ({
|
|
591
|
-
tags: l.tags.map((t) => sanitize(t)),
|
|
592
|
-
text: sanitize(l.text),
|
|
593
|
-
...(l.scope && { scope: sanitize(l.scope) }),
|
|
594
|
-
}));
|
|
595
|
-
// Append lessons to .totem/lessons/
|
|
596
|
-
const lessonsDir = path.join(cwd, config.totemDir, 'lessons');
|
|
597
|
-
appendLessons(sanitizedLessons, lessonsDir);
|
|
598
|
-
log.success(TAG, `Appended ${sanitizedLessons.length} lesson(s) to ${config.totemDir}/lessons/`); // totem-ignore
|
|
599
|
-
// Run incremental sync so lessons are immediately searchable
|
|
600
|
-
log.info(TAG, 'Running incremental sync...');
|
|
601
|
-
const syncResult = await runSync(config, {
|
|
602
|
-
projectRoot: cwd,
|
|
603
|
-
incremental: true,
|
|
604
|
-
onProgress: (msg) => log.dim(TAG, msg),
|
|
605
|
-
});
|
|
606
|
-
log.success(TAG, `Sync complete: ${syncResult.chunksProcessed} chunks from ${syncResult.filesProcessed} files`);
|
|
607
|
-
// Print summary
|
|
71
|
+
// Shared pipeline: dedup, flag, select, persist, sync
|
|
608
72
|
const prLabel = nums.length === 1 ? `PR #${nums[0]}` : `${nums.length} PRs`;
|
|
609
|
-
|
|
610
|
-
for (const lesson of sanitizedLessons) {
|
|
611
|
-
console.log(`\n Tags: ${lesson.tags.join(', ').replace(/\n/g, ' ')}`);
|
|
612
|
-
console.log(` ${lesson.text.replace(/\n/g, '\n ')}`);
|
|
613
|
-
}
|
|
614
|
-
// Exit non-zero if --yes mode dropped suspicious lessons (#291)
|
|
615
|
-
if (options.yes && suspiciousCount > 0) {
|
|
616
|
-
process.exitCode = 1;
|
|
617
|
-
}
|
|
73
|
+
await sharedPipeline(allLessons, options, cwd, prLabel, config, configPath);
|
|
618
74
|
}
|
|
619
75
|
//# sourceMappingURL=extract.js.map
|