@poltergeist-ai/cli 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js ADDED
@@ -0,0 +1,1213 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/cli.ts
4
+ import { appendFileSync, existsSync, mkdirSync, readFileSync as readFileSync4, writeFileSync } from "fs";
5
+ import path4 from "path";
6
+ import { execFileSync as execFileSync2 } from "child_process";
7
+ import { parseArgs } from "util";
8
+
9
+ // src/extractors/git.ts
10
+ import path from "path";
11
+
12
+ // src/utils.ts
13
+ import { execFileSync } from "child_process";
14
+ function slugify(name) {
15
+ return name.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "");
16
+ }
17
+ function runGit(repoPath, args) {
18
+ return execFileSync("git", ["-C", repoPath, ...args], {
19
+ encoding: "utf-8",
20
+ maxBuffer: 50 * 1024 * 1024
21
+ });
22
+ }
23
+ function log(verbose, tag, msg) {
24
+ if (verbose) {
25
+ console.log(`[${tag}] ${msg}`);
26
+ }
27
+ }
28
+ function increment(record, key, amount = 1) {
29
+ record[key] = (record[key] ?? 0) + amount;
30
+ }
31
+ function topN(record, n) {
32
+ return Object.entries(record).sort((a, b) => b[1] - a[1]).slice(0, n);
33
+ }
34
+
35
+ // src/extractors/git.ts
36
+ var CONVENTIONAL_RE = /^(\w+)(?:\(([^)]+)\))?[!]?:\s/;
37
+ var DOMAIN_RULES = [
38
+ [/\.(vue|svelte|jsx|tsx|css|scss|sass|less)$/, "frontend"],
39
+ [/\.(go|rs|py|java|rb|php|cs|ex|exs)$/, "backend"],
40
+ [/(Dockerfile|\.tf|\.yaml|\.yml)/, "infrastructure"],
41
+ [/\.(test|spec)\.[^.]+$|__tests__\//, "testing"],
42
+ [/\.md$/, "documentation"],
43
+ [/(config|rc\.|\.config\.|tsconfig|vite\.config|eslint|prettier|package\.json)/, "tooling"]
44
+ ];
45
+ function inferDomains(filesModified) {
46
+ const domainCounts = {};
47
+ for (const [filepath, count] of Object.entries(filesModified)) {
48
+ for (const [re, domain] of DOMAIN_RULES) {
49
+ if (re.test(filepath)) {
50
+ increment(domainCounts, domain, count);
51
+ break;
52
+ }
53
+ }
54
+ }
55
+ return topN(domainCounts, 4).filter(([, count]) => count >= 3).map(([domain]) => domain);
56
+ }
57
+ function profileFileTypes(filesModified) {
58
+ const profile = { tests: 0, configs: 0, components: 0, docs: 0, other: 0 };
59
+ for (const [filepath, count] of Object.entries(filesModified)) {
60
+ if (/\.(test|spec)\.[^.]+$|__tests__\//.test(filepath)) {
61
+ profile.tests += count;
62
+ } else if (/(config|rc\.|\.config\.|tsconfig|eslint|prettier)/.test(filepath)) {
63
+ profile.configs += count;
64
+ } else if (/\.(vue|svelte|jsx|tsx)$/.test(filepath)) {
65
+ profile.components += count;
66
+ } else if (/\.md$/.test(filepath)) {
67
+ profile.docs += count;
68
+ } else {
69
+ profile.other += count;
70
+ }
71
+ }
72
+ return profile;
73
+ }
74
+ function extractGitSignals(repoPath, contributor, email, verbose) {
75
+ const signals = {
76
+ commitMessages: [],
77
+ commitBodies: [],
78
+ filesCreated: [],
79
+ filesModified: {},
80
+ extensions: {},
81
+ namingPatterns: [],
82
+ commitCount: 0
83
+ };
84
+ const authorFilter = `--author=${email ?? contributor}`;
85
+ try {
86
+ const output = runGit(repoPath, [
87
+ "log",
88
+ authorFilter,
89
+ "--format=%s%x00%B%x01",
90
+ "--max-count=200"
91
+ ]);
92
+ const entries = output.split("").filter(Boolean);
93
+ for (const entry of entries) {
94
+ const [subject, ...bodyParts] = entry.split("\0");
95
+ const trimmedSubject = subject.trim();
96
+ if (trimmedSubject) {
97
+ signals.commitMessages.push(trimmedSubject);
98
+ const body = bodyParts.join("\0").trim();
99
+ if (body && body !== trimmedSubject) {
100
+ signals.commitBodies.push(body);
101
+ }
102
+ }
103
+ }
104
+ signals.commitCount = signals.commitMessages.length;
105
+ log(verbose, "git", `Found ${signals.commitCount} commits`);
106
+ } catch (e) {
107
+ console.log(`[git] Warning: could not read commit messages \u2014 ${e}`);
108
+ }
109
+ try {
110
+ const output = runGit(repoPath, [
111
+ "log",
112
+ authorFilter,
113
+ "--diff-filter=A",
114
+ "--name-only",
115
+ "--format="
116
+ ]);
117
+ signals.filesCreated = output.split("\n").map((l) => l.trim()).filter(Boolean);
118
+ for (const f of signals.filesCreated) {
119
+ const ext = path.extname(f);
120
+ if (ext) increment(signals.extensions, ext);
121
+ }
122
+ } catch {
123
+ }
124
+ try {
125
+ const output = runGit(repoPath, [
126
+ "log",
127
+ authorFilter,
128
+ "--name-only",
129
+ "--format="
130
+ ]);
131
+ for (const line of output.split("\n")) {
132
+ const trimmed = line.trim();
133
+ if (trimmed) increment(signals.filesModified, trimmed);
134
+ }
135
+ } catch {
136
+ }
137
+ try {
138
+ const output = runGit(repoPath, [
139
+ "log",
140
+ authorFilter,
141
+ "--max-count=50",
142
+ "-p",
143
+ "--no-merges"
144
+ ]);
145
+ signals.rawDiffOutput = output;
146
+ const addedLines = output.split("\n").filter((l) => l.startsWith("+") && !l.startsWith("+++")).map((l) => l.slice(1));
147
+ const nameRe = /\b(const|function|let|var|def|class)\s+([a-zA-Z_][a-zA-Z0-9_]*)/g;
148
+ for (const line of addedLines) {
149
+ let match;
150
+ while ((match = nameRe.exec(line)) !== null) {
151
+ signals.namingPatterns.push(match[2]);
152
+ }
153
+ }
154
+ } catch {
155
+ }
156
+ return signals;
157
+ }
158
+ function summariseGit(signals) {
159
+ const obs = {};
160
+ const msgs = signals.commitMessages;
161
+ if (msgs.length > 0) {
162
+ const avgLen = msgs.reduce((sum, m) => sum + m.length, 0) / msgs.length;
163
+ const imperative = msgs.filter(
164
+ (m) => m[0] === m[0].toUpperCase() && !m.startsWith("fix") && !m.startsWith("add")
165
+ ).length;
166
+ obs.commitMessageAvgLength = Math.round(avgLen);
167
+ obs.commitMessageSample = msgs.slice(0, 5);
168
+ obs.likelyUsesImperativeMood = imperative > msgs.length * 0.5;
169
+ const prefixCounts = {};
170
+ const scopes = /* @__PURE__ */ new Set();
171
+ let conventionalCount = 0;
172
+ for (const msg of msgs) {
173
+ const match = msg.match(CONVENTIONAL_RE);
174
+ if (match) {
175
+ conventionalCount++;
176
+ increment(prefixCounts, match[1].toLowerCase());
177
+ if (match[2]) scopes.add(match[2]);
178
+ }
179
+ }
180
+ if (conventionalCount > msgs.length * 0.3) {
181
+ obs.conventionalCommitPrefixes = prefixCounts;
182
+ if (scopes.size > 0) {
183
+ obs.commitScopePatterns = [...scopes].slice(0, 10);
184
+ }
185
+ }
186
+ }
187
+ if (Object.keys(signals.extensions).length > 0) {
188
+ obs.primaryExtensions = topN(signals.extensions, 5);
189
+ }
190
+ if (Object.keys(signals.filesModified).length > 0) {
191
+ const topFiles = topN(signals.filesModified, 10);
192
+ const dirs = {};
193
+ for (const [filepath, count] of topFiles) {
194
+ const parts = filepath.split(path.sep);
195
+ if (parts.length > 1) {
196
+ increment(dirs, parts[0], count);
197
+ }
198
+ }
199
+ obs.primaryDirectories = topN(dirs, 5);
200
+ obs.inferredDomains = inferDomains(signals.filesModified);
201
+ obs.fileTypeProfile = profileFileTypes(signals.filesModified);
202
+ }
203
+ const names = signals.namingPatterns;
204
+ if (names.length > 0) {
205
+ const camel = names.filter((n) => /^[a-z][a-zA-Z]+$/.test(n)).length;
206
+ const pascal = names.filter((n) => /^[A-Z][a-zA-Z]+$/.test(n)).length;
207
+ const snake = names.filter((n) => n.includes("_")).length;
208
+ obs.namingStyle = {
209
+ camelCase: camel,
210
+ PascalCase: pascal,
211
+ snake_case: snake
212
+ };
213
+ }
214
+ return obs;
215
+ }
216
+
217
+ // src/extractors/code-style.ts
218
+ var LANG_MAP = {
219
+ ".ts": "typescript",
220
+ ".tsx": "typescript",
221
+ ".js": "javascript",
222
+ ".jsx": "javascript",
223
+ ".mjs": "javascript",
224
+ ".vue": "typescript",
225
+ ".svelte": "typescript",
226
+ ".py": "python",
227
+ ".go": "go",
228
+ ".rs": "rust",
229
+ ".rb": "ruby",
230
+ ".java": "java",
231
+ ".php": "php"
232
+ };
233
+ var JS_TS = ["typescript", "javascript"];
234
+ var TS_ONLY = ["typescript"];
235
+ var PATTERN_RULES = [
236
+ // Imports
237
+ { category: "import_style", choice: "named_import", pattern: /^import\s+\{/, languages: JS_TS },
238
+ { category: "import_style", choice: "default_import", pattern: /^import\s+[A-Za-z_$][^\s{]*\s+from/, languages: JS_TS },
239
+ { category: "import_style", choice: "path_alias", pattern: /from\s+['"][@~]\//, languages: JS_TS },
240
+ { category: "import_style", choice: "relative_import", pattern: /from\s+['"]\.\.?\//, languages: JS_TS },
241
+ // Exports
242
+ { category: "export_style", choice: "named_export", pattern: /^export\s+(?:const|function|class|type|interface|enum)\s/, languages: JS_TS },
243
+ { category: "export_style", choice: "default_export", pattern: /^export\s+default\b/, languages: JS_TS },
244
+ { category: "export_style", choice: "re_export", pattern: /^export\s+\{[^}]*\}\s+from/, languages: JS_TS },
245
+ // Functions
246
+ { category: "function_style", choice: "arrow_function", pattern: /(?:const|let)\s+\w+\s*=\s*(?:async\s+)?\(/, languages: JS_TS },
247
+ { category: "function_style", choice: "function_declaration", pattern: /^(?:export\s+)?(?:async\s+)?function\s+\w/, languages: JS_TS },
248
+ // Async
249
+ { category: "async_style", choice: "async_await", pattern: /\bawait\s/, languages: [...JS_TS, "python"] },
250
+ { category: "async_style", choice: "then_chain", pattern: /\.then\s*\(/, languages: JS_TS },
251
+ // Control flow
252
+ { category: "control_flow", choice: "early_return", pattern: /^\s+if\s*\(.*\)\s*return\b/, languages: JS_TS },
253
+ { category: "control_flow", choice: "guard_clause", pattern: /^\s+if\s*\(!/, languages: JS_TS },
254
+ // Strings
255
+ { category: "string_style", choice: "template_literal", pattern: /`[^`]*\$\{/, languages: JS_TS },
256
+ // Modern operators
257
+ { category: "modern_operators", choice: "optional_chaining", pattern: /\?\.\w/, languages: JS_TS },
258
+ { category: "modern_operators", choice: "nullish_coalescing", pattern: /\?\?/, languages: JS_TS },
259
+ { category: "modern_operators", choice: "destructuring", pattern: /(?:const|let|var)\s+[\[{]/, languages: JS_TS },
260
+ // TypeScript types
261
+ { category: "type_definition", choice: "interface", pattern: /^(?:export\s+)?interface\s+\w/, languages: TS_ONLY },
262
+ { category: "type_definition", choice: "type_alias", pattern: /^(?:export\s+)?type\s+\w+\s*=/, languages: TS_ONLY },
263
+ { category: "enum_vs_union", choice: "enum", pattern: /^(?:export\s+)?(?:const\s+)?enum\s+\w/, languages: TS_ONLY },
264
+ { category: "enum_vs_union", choice: "union_type", pattern: /type\s+\w+\s*=\s*['"\w]+(?:\s*\|\s*['"\w]+){2,}/, languages: TS_ONLY },
265
+ { category: "type_features", choice: "as_const", pattern: /\bas\s+const\b/, languages: TS_ONLY },
266
+ { category: "type_features", choice: "generic_usage", pattern: /<[A-Z]\w*(?:,\s*[A-Z]\w*)*>/, languages: TS_ONLY },
267
+ { category: "type_features", choice: "explicit_return_type", pattern: /\)\s*:\s*(?:Promise<|void|string|number|boolean|\w+\[\])/, languages: TS_ONLY },
268
+ // Error handling
269
+ { category: "error_handling", choice: "try_catch", pattern: /^\s*(?:try\s*\{|\}\s*catch\s*\()/ },
270
+ { category: "error_handling", choice: "custom_error", pattern: /class\s+\w+Error\s+extends/ },
271
+ // Testing
272
+ { category: "test_structure", choice: "describe_it", pattern: /\b(?:describe|it)\s*\(/, languages: JS_TS },
273
+ { category: "test_structure", choice: "test_fn", pattern: /\btest\s*\(/, languages: JS_TS },
274
+ { category: "test_assertion", choice: "expect", pattern: /\bexpect\s*\(/, languages: JS_TS },
275
+ { category: "test_assertion", choice: "assert", pattern: /\bassert\.\w/, languages: [...JS_TS, "python"] },
276
+ // Architecture
277
+ { category: "composition_style", choice: "inheritance", pattern: /class\s+\w+\s+extends\s/, languages: JS_TS },
278
+ { category: "composition_style", choice: "composition", pattern: /\buse[A-Z]\w+\s*\(/, languages: JS_TS },
279
+ { category: "architecture", choice: "factory_function", pattern: /(?:create|make|build)[A-Z]\w+\s*\(/, languages: JS_TS },
280
+ { category: "architecture", choice: "event_pattern", pattern: /\.(?:on|emit|addEventListener|subscribe)\s*\(/, languages: JS_TS },
281
+ // Python-specific
282
+ { category: "python_style", choice: "type_hints", pattern: /def\s+\w+\(.*:\s*\w+/, languages: ["python"] },
283
+ { category: "python_style", choice: "list_comprehension", pattern: /\[.*\bfor\b.*\bin\b.*\]/, languages: ["python"] },
284
+ { category: "python_style", choice: "dataclass", pattern: /@dataclass/, languages: ["python"] },
285
+ { category: "python_style", choice: "f_string", pattern: /f['"].*\{/, languages: ["python"] }
286
+ ];
287
+ var CATEGORY_LABELS = {
288
+ import_style: "Import Style",
289
+ export_style: "Export Style",
290
+ function_style: "Function Style",
291
+ async_style: "Async Style",
292
+ control_flow: "Control Flow",
293
+ string_style: "String Style",
294
+ modern_operators: "Modern Operators",
295
+ type_definition: "Type Definition",
296
+ enum_vs_union: "Enum vs Union",
297
+ type_features: "TypeScript Features",
298
+ error_handling: "Error Handling",
299
+ test_structure: "Test Structure",
300
+ test_assertion: "Test Assertions",
301
+ composition_style: "Composition Style",
302
+ architecture: "Architecture Patterns",
303
+ python_style: "Python Style"
304
+ };
305
+ var CHOICE_LABELS = {
306
+ named_import: "named imports",
307
+ default_import: "default imports",
308
+ path_alias: "path aliases (@/)",
309
+ relative_import: "relative imports",
310
+ named_export: "named exports",
311
+ default_export: "default exports",
312
+ re_export: "re-exports",
313
+ arrow_function: "arrow functions",
314
+ function_declaration: "function declarations",
315
+ async_await: "async/await",
316
+ then_chain: ".then() chains",
317
+ early_return: "early returns",
318
+ guard_clause: "guard clauses",
319
+ template_literal: "template literals",
320
+ optional_chaining: "optional chaining (?.)",
321
+ nullish_coalescing: "nullish coalescing (??)",
322
+ destructuring: "destructuring",
323
+ interface: "interfaces",
324
+ type_alias: "type aliases",
325
+ enum: "enums",
326
+ union_type: "union types",
327
+ as_const: "as const assertions",
328
+ generic_usage: "generics",
329
+ explicit_return_type: "explicit return types",
330
+ try_catch: "try/catch",
331
+ custom_error: "custom error classes",
332
+ describe_it: "describe/it blocks",
333
+ test_fn: "test() functions",
334
+ expect: "expect()",
335
+ assert: "assert",
336
+ inheritance: "class inheritance",
337
+ composition: "composables/hooks",
338
+ factory_function: "factory functions",
339
+ event_pattern: "event/pub-sub patterns",
340
+ type_hints: "type hints",
341
+ list_comprehension: "list comprehensions",
342
+ dataclass: "dataclasses",
343
+ f_string: "f-strings"
344
+ };
345
+ function detectLanguages(diffOutput) {
346
+ const langCounts = {};
347
+ const headerRe = /^diff --git a\/(.*?) b\//gm;
348
+ let match;
349
+ while ((match = headerRe.exec(diffOutput)) !== null) {
350
+ const filepath = match[1];
351
+ const dotIdx = filepath.lastIndexOf(".");
352
+ if (dotIdx === -1) continue;
353
+ const ext = filepath.slice(dotIdx);
354
+ const lang = LANG_MAP[ext];
355
+ if (lang) increment(langCounts, lang);
356
+ }
357
+ return Object.entries(langCounts).sort((a, b) => b[1] - a[1]).map(([lang]) => lang);
358
+ }
359
+ function extractCodeStyleFromDiff(diffOutput) {
360
+ const signals = {
361
+ counters: {},
362
+ detectedLanguages: detectLanguages(diffOutput),
363
+ totalLinesAnalyzed: 0
364
+ };
365
+ if (!diffOutput) return signals;
366
+ const langSet = new Set(signals.detectedLanguages);
367
+ const addedLines = diffOutput.split("\n").filter((l) => l.startsWith("+") && !l.startsWith("+++")).map((l) => l.slice(1));
368
+ signals.totalLinesAnalyzed = addedLines.length;
369
+ const activeRules = PATTERN_RULES.filter(
370
+ (rule) => !rule.languages || rule.languages.some((lang) => langSet.has(lang))
371
+ );
372
+ for (const line of addedLines) {
373
+ const trimmed = line.trim();
374
+ if (!trimmed || trimmed.startsWith("//") || trimmed.startsWith("*")) continue;
375
+ for (const rule of activeRules) {
376
+ if (rule.pattern.test(trimmed)) {
377
+ if (!signals.counters[rule.category]) {
378
+ signals.counters[rule.category] = {};
379
+ }
380
+ increment(signals.counters[rule.category], rule.choice);
381
+ }
382
+ }
383
+ }
384
+ return signals;
385
+ }
386
+ function summariseCodeStyle(signals) {
387
+ const observations = [];
388
+ for (const [category, choices] of Object.entries(signals.counters)) {
389
+ const entries = Object.entries(choices).sort((a, b) => b[1] - a[1]);
390
+ const total = entries.reduce((sum, [, c]) => sum + c, 0);
391
+ if (total < 3) continue;
392
+ const [topChoice, topCount] = entries[0];
393
+ if (entries.length === 1) {
394
+ if (topCount >= 5) {
395
+ observations.push({
396
+ category: CATEGORY_LABELS[category] ?? category,
397
+ observation: `Frequently uses ${CHOICE_LABELS[topChoice] ?? topChoice} (${topCount} occurrences)`,
398
+ confidence: topCount >= 15 ? "strong" : "moderate"
399
+ });
400
+ }
401
+ continue;
402
+ }
403
+ const ratio = topCount / total;
404
+ if (ratio < 0.6) continue;
405
+ const confidence = ratio >= 0.8 ? "strong" : "moderate";
406
+ const pct = Math.round(ratio * 100);
407
+ const topLabel = CHOICE_LABELS[topChoice] ?? topChoice;
408
+ const runnerLabel = CHOICE_LABELS[entries[1][0]] ?? entries[1][0];
409
+ observations.push({
410
+ category: CATEGORY_LABELS[category] ?? category,
411
+ observation: `Prefers ${topLabel} over ${runnerLabel} (${pct}% of ${total})`,
412
+ confidence
413
+ });
414
+ }
415
+ observations.sort((a, b) => {
416
+ if (a.confidence !== b.confidence) return a.confidence === "strong" ? -1 : 1;
417
+ return a.category.localeCompare(b.category);
418
+ });
419
+ return {
420
+ observations,
421
+ primaryLanguage: signals.detectedLanguages[0],
422
+ totalLinesAnalyzed: signals.totalLinesAnalyzed
423
+ };
424
+ }
425
+
426
+ // src/extractors/gitlab.ts
427
+ import { readFileSync } from "fs";
428
+
429
+ // src/extractors/review-common.ts
430
+ function summariseReview(signals) {
431
+ const obs = { source: signals.source };
432
+ const comments = signals.reviewComments;
433
+ if (comments.length === 0) return obs;
434
+ obs.totalReviewComments = signals.totalComments;
435
+ const lengths = signals.commentLengths;
436
+ obs.avgCommentLength = lengths.length > 0 ? Math.round(lengths.reduce((a, b) => a + b, 0) / lengths.length) : 0;
437
+ obs.tendsToBeBrief = obs.avgCommentLength < 120;
438
+ obs.usesSeverityPrefixes = { ...signals.severityPrefixes };
439
+ obs.questionRatio = Math.round(signals.questionComments / comments.length * 100) / 100;
440
+ const sorted = [...comments].sort((a, b) => a.length - b.length);
441
+ const n = sorted.length;
442
+ const indices = [
443
+ 0,
444
+ Math.floor(n / 6),
445
+ Math.floor(n / 3),
446
+ Math.floor(n / 2),
447
+ Math.floor(2 * n / 3),
448
+ Math.floor(5 * n / 6),
449
+ n - 2,
450
+ n - 1
451
+ ];
452
+ obs.sampleComments = [
453
+ ...new Set(
454
+ indices.filter((i) => i >= 0 && i < n).map((i) => sorted[i])
455
+ )
456
+ ];
457
+ return obs;
458
+ }
459
+
460
+ // src/extractors/gitlab.ts
461
+ function extractGitLabSignals(exportPath, contributor, verbose) {
462
+ const signals = {
463
+ reviewComments: [],
464
+ commentLengths: [],
465
+ severityPrefixes: {},
466
+ questionComments: 0,
467
+ totalComments: 0,
468
+ source: "gitlab"
469
+ };
470
+ let data;
471
+ try {
472
+ data = JSON.parse(readFileSync(exportPath, "utf-8"));
473
+ } catch (e) {
474
+ console.log(`[gitlab] Could not read export \u2014 ${e}`);
475
+ return signals;
476
+ }
477
+ const comments = [];
478
+ if (Array.isArray(data)) {
479
+ for (const item of data) {
480
+ if (item && typeof item === "object") {
481
+ const obj = item;
482
+ if (Array.isArray(obj.notes)) {
483
+ comments.push(...obj.notes);
484
+ } else if (typeof obj.body === "string") {
485
+ comments.push(obj);
486
+ }
487
+ }
488
+ }
489
+ }
490
+ const contributorLower = contributor.toLowerCase();
491
+ const prefixRe = /^(nit|suggestion|blocking|question|thought|minor|major)[:\s]/i;
492
+ for (const comment of comments) {
493
+ const author = comment.author;
494
+ const authorName = String(author?.name ?? "");
495
+ if (!authorName.toLowerCase().includes(contributorLower)) continue;
496
+ const body = String(comment.body ?? "").trim();
497
+ if (!body) continue;
498
+ signals.reviewComments.push(body);
499
+ signals.commentLengths.push(body.length);
500
+ signals.totalComments += 1;
501
+ const prefixMatch = body.match(prefixRe);
502
+ if (prefixMatch) {
503
+ increment(signals.severityPrefixes, prefixMatch[1].toLowerCase());
504
+ }
505
+ if (body.endsWith("?") || body.toLowerCase().startsWith("do we") || body.toLowerCase().startsWith("should we")) {
506
+ signals.questionComments += 1;
507
+ }
508
+ }
509
+ log(verbose, "gitlab", `Found ${signals.totalComments} comments by ${contributor}`);
510
+ return signals;
511
+ }
512
+ function summariseGitLab(signals) {
513
+ return summariseReview(signals);
514
+ }
515
+
516
+ // src/extractors/github.ts
517
+ var USER_AGENT = "poltergeist-cli/0.1.0";
518
+ function parseGitHubUrl(url) {
519
+ const match = url.match(/github\.com[/:]([^/]+)\/([^/.]+)/);
520
+ if (!match) return null;
521
+ return { owner: match[1], repo: match[2] };
522
+ }
523
+ var apiCallCount = 0;
524
+ var rateLimited = false;
525
+ async function ghFetch(urlPath, opts) {
526
+ if (rateLimited) return null;
527
+ const url = urlPath.startsWith("https://") ? urlPath : `https://api.github.com${urlPath}`;
528
+ const headers = {
529
+ Accept: "application/vnd.github+json",
530
+ "User-Agent": USER_AGENT
531
+ };
532
+ if (opts.token) {
533
+ headers.Authorization = `Bearer ${opts.token}`;
534
+ }
535
+ apiCallCount++;
536
+ const res = await fetch(url, { headers });
537
+ if (!res.ok) {
538
+ if (res.status === 403 || res.status === 429) {
539
+ rateLimited = true;
540
+ return null;
541
+ }
542
+ log(opts.verbose, "github", `API ${res.status} for ${url}`);
543
+ return null;
544
+ }
545
+ const remaining = res.headers.get("X-RateLimit-Remaining");
546
+ if (remaining && parseInt(remaining) <= 1) {
547
+ rateLimited = true;
548
+ console.log("[github] Rate limit exhausted \u2014 stopping API calls");
549
+ }
550
+ return res.json();
551
+ }
552
+ async function searchPRsWithComments(owner, repo, contributor, opts) {
553
+ const query = encodeURIComponent(
554
+ `repo:${owner}/${repo} commenter:${contributor} type:pr`
555
+ );
556
+ const data = await ghFetch(
557
+ `/search/issues?q=${query}&per_page=100&sort=updated&order=desc`,
558
+ opts
559
+ );
560
+ if (!data?.items) return [];
561
+ return data.items.map((item) => item.number);
562
+ }
563
+ async function fetchPRComments(owner, repo, prNumber, contributor, opts) {
564
+ const comments = [];
565
+ const contributorLower = contributor.toLowerCase();
566
+ const reviewComments = await ghFetch(
567
+ `/repos/${owner}/${repo}/pulls/${prNumber}/comments?per_page=100`,
568
+ opts
569
+ );
570
+ if (Array.isArray(reviewComments)) {
571
+ for (const c of reviewComments) {
572
+ if (c.user?.login?.toLowerCase() === contributorLower && c.body?.trim()) {
573
+ comments.push(c.body.trim());
574
+ }
575
+ }
576
+ }
577
+ if (rateLimited) return comments;
578
+ const reviews = await ghFetch(
579
+ `/repos/${owner}/${repo}/pulls/${prNumber}/reviews?per_page=100`,
580
+ opts
581
+ );
582
+ if (Array.isArray(reviews)) {
583
+ for (const r of reviews) {
584
+ if (r.user?.login?.toLowerCase() === contributorLower && r.body?.trim()) {
585
+ comments.push(r.body.trim());
586
+ }
587
+ }
588
+ }
589
+ return comments;
590
+ }
591
+ async function extractGitHubSignals(owner, repo, contributor, token, verbose) {
592
+ const signals = {
593
+ reviewComments: [],
594
+ commentLengths: [],
595
+ severityPrefixes: {},
596
+ questionComments: 0,
597
+ totalComments: 0,
598
+ source: "github"
599
+ };
600
+ const opts = { token, verbose };
601
+ apiCallCount = 0;
602
+ rateLimited = false;
603
+ const prefixRe = /^(nit|suggestion|blocking|question|thought|minor|major)[:\s]/i;
604
+ log(verbose, "github", "Searching for PRs with review comments...");
605
+ const prNumbers = await searchPRsWithComments(owner, repo, contributor, opts);
606
+ if (prNumbers.length === 0) {
607
+ console.log(
608
+ `[github] No PR review comments found for "${contributor}". Make sure --contributor matches the GitHub username.`
609
+ );
610
+ return signals;
611
+ }
612
+ log(
613
+ verbose,
614
+ "github",
615
+ `Found ${prNumbers.length} PRs with comments by ${contributor}`
616
+ );
617
+ const prCap = token ? 50 : 25;
618
+ const prsToFetch = prNumbers.slice(0, prCap);
619
+ if (prNumbers.length > prCap) {
620
+ console.log(
621
+ `[github] Sampling ${prCap} of ${prNumbers.length} PRs (use --github-token for more)`
622
+ );
623
+ }
624
+ for (const prNumber of prsToFetch) {
625
+ if (rateLimited) break;
626
+ const comments = await fetchPRComments(
627
+ owner,
628
+ repo,
629
+ prNumber,
630
+ contributor,
631
+ opts
632
+ );
633
+ for (const body of comments) {
634
+ signals.reviewComments.push(body);
635
+ signals.commentLengths.push(body.length);
636
+ signals.totalComments += 1;
637
+ const prefixMatch = body.match(prefixRe);
638
+ if (prefixMatch) {
639
+ increment(signals.severityPrefixes, prefixMatch[1].toLowerCase());
640
+ }
641
+ if (body.endsWith("?") || body.toLowerCase().startsWith("do we") || body.toLowerCase().startsWith("should we")) {
642
+ signals.questionComments += 1;
643
+ }
644
+ }
645
+ }
646
+ log(
647
+ verbose,
648
+ "github",
649
+ `Collected ${signals.totalComments} review comments (${apiCallCount} API calls)`
650
+ );
651
+ return signals;
652
+ }
653
+
654
+ // src/extractors/slack.ts
655
+ import { readFileSync as readFileSync2, readdirSync } from "fs";
656
+ import path2 from "path";
657
+ function extractSlackSignals(exportDir, contributor, verbose) {
658
+ const signals = {
659
+ messages: [],
660
+ technicalMessages: []
661
+ };
662
+ const techKeywords = /\b(PR|MR|merge|review|component|composable|API|endpoint|refactor|naming|test|pattern|abstraction|type|interface|performance|bug|breaking)\b/i;
663
+ const contributorLower = contributor.toLowerCase();
664
+ let entries;
665
+ try {
666
+ entries = readdirSync(exportDir, { recursive: true, encoding: "utf-8" });
667
+ } catch {
668
+ return signals;
669
+ }
670
+ const jsonFiles = entries.filter((e) => e.endsWith(".json"));
671
+ for (const relPath of jsonFiles) {
672
+ const fullPath = path2.join(exportDir, relPath);
673
+ let messages;
674
+ try {
675
+ messages = JSON.parse(readFileSync2(fullPath, "utf-8"));
676
+ } catch {
677
+ continue;
678
+ }
679
+ if (!Array.isArray(messages)) continue;
680
+ for (const msg of messages) {
681
+ if (!msg || typeof msg !== "object") continue;
682
+ const obj = msg;
683
+ const username = String(
684
+ obj.username ?? obj.user_profile?.display_name ?? ""
685
+ );
686
+ const text = String(obj.text ?? "").trim();
687
+ if (!text || !username.toLowerCase().includes(contributorLower)) continue;
688
+ if (text.length < 20 || text.startsWith(":")) continue;
689
+ signals.messages.push(text);
690
+ if (techKeywords.test(text)) {
691
+ signals.technicalMessages.push(text);
692
+ }
693
+ }
694
+ }
695
+ log(
696
+ verbose,
697
+ "slack",
698
+ `Found ${signals.messages.length} messages, ${signals.technicalMessages.length} technical`
699
+ );
700
+ return signals;
701
+ }
702
+ function summariseSlack(signals) {
703
+ const obs = {};
704
+ const tech = signals.technicalMessages;
705
+ if (tech.length === 0) return obs;
706
+ obs.technicalMessageCount = tech.length;
707
+ obs.sampleTechnicalMessages = tech.slice(0, 8);
708
+ return obs;
709
+ }
710
+
711
+ // src/extractors/docs.ts
712
+ import { readFileSync as readFileSync3, readdirSync as readdirSync2 } from "fs";
713
+ import path3 from "path";
714
+ function extractDocsSignals(docsDir, contributor, verbose) {
715
+ const signals = {
716
+ authoredDocs: [],
717
+ docExcerpts: []
718
+ };
719
+ const contributorLower = contributor.toLowerCase();
720
+ let entries;
721
+ try {
722
+ entries = readdirSync2(docsDir, { recursive: true, encoding: "utf-8" });
723
+ } catch {
724
+ return signals;
725
+ }
726
+ const mdFiles = entries.filter((e) => e.endsWith(".md"));
727
+ for (const relPath of mdFiles) {
728
+ const fullPath = path3.join(docsDir, relPath);
729
+ let content;
730
+ try {
731
+ content = readFileSync3(fullPath, "utf-8");
732
+ } catch {
733
+ continue;
734
+ }
735
+ const frontmatterMatch = content.slice(0, 500).match(/author[:\s]+(.+)/i);
736
+ if (frontmatterMatch && frontmatterMatch[1].toLowerCase().includes(contributorLower)) {
737
+ signals.authoredDocs.push(fullPath);
738
+ const paras = content.split("\n\n").map((p) => p.trim()).filter((p) => p.length > 100);
739
+ signals.docExcerpts.push(...paras.slice(0, 3));
740
+ }
741
+ }
742
+ log(verbose, "docs", `Found ${signals.authoredDocs.length} docs attributed to ${contributor}`);
743
+ return signals;
744
+ }
745
+
746
+ // src/generator.ts
747
+ function formatPairs(pairs, suffix = "") {
748
+ return pairs.map(([name, count]) => `${name}${suffix} (${count})`).join(", ");
749
+ }
750
+ function dominantNamingStyle(style) {
751
+ const total = style.camelCase + style.PascalCase + style.snake_case;
752
+ if (total < 10) return null;
753
+ if (style.camelCase / total > 0.7) return "Strongly prefers camelCase";
754
+ if (style.snake_case / total > 0.7) return "Strongly prefers snake_case";
755
+ if (style.PascalCase / total > 0.7) return "Strongly prefers PascalCase";
756
+ return null;
757
+ }
758
+ function buildGhostMarkdown(input) {
759
+ const { contributor, slug, gitObs, codeStyleObs, reviewObs, slackObs, docsSignals, sourcesUsed } = input;
760
+ const today = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
761
+ const domains = gitObs.inferredDomains?.length ? gitObs.inferredDomains.join(", ") : "_[fill in manually]_";
762
+ const lines = [
763
+ `# Contributor Soul: ${contributor}`,
764
+ "",
765
+ "## Identity",
766
+ `- **Slug**: ${slug}`,
767
+ "- **Role**: _[fill in manually]_",
768
+ `- **Primary domains**: ${domains}`,
769
+ `- **Soul last updated**: ${today}`,
770
+ `- **Sources used**: ${sourcesUsed.join(", ")}`,
771
+ "",
772
+ "---",
773
+ "",
774
+ "## Review Philosophy",
775
+ "",
776
+ "### What they care about most (ranked)",
777
+ "_[Fill in manually after reviewing the data below \u2014 re-order based on review comment patterns]_",
778
+ "1. Correctness",
779
+ "2. Naming",
780
+ "3. Component / module boundaries",
781
+ "4. Test coverage",
782
+ "5. Consistency with existing patterns",
783
+ "",
784
+ "### What they tend to ignore",
785
+ "_[Fill in manually]_",
786
+ "",
787
+ "### Dealbreakers",
788
+ "_[Fill in manually]_",
789
+ "",
790
+ "### Recurring questions / phrases"
791
+ ];
792
+ if (reviewObs.sampleComments && reviewObs.sampleComments.length > 0) {
793
+ const questions = reviewObs.sampleComments.filter((c) => c.endsWith("?"));
794
+ if (questions.length > 0) {
795
+ for (const q of questions.slice(0, 5)) {
796
+ const truncated = q.length > 150 ? q.slice(0, 150) + "..." : q;
797
+ lines.push(`- "${truncated}"`);
798
+ }
799
+ } else {
800
+ lines.push("_[Fill in from sample comments below]_");
801
+ }
802
+ } else {
803
+ lines.push("_[Fill in from sample comments below]_");
804
+ }
805
+ lines.push("", "---", "", "## Communication Style", "");
806
+ if (reviewObs.totalReviewComments != null) {
807
+ lines.push(
808
+ `- **Total review comments analysed**: ${reviewObs.totalReviewComments}`,
809
+ `- **Average comment length**: ${reviewObs.avgCommentLength ?? "N/A"} chars`,
810
+ `- **Tends to be brief**: ${reviewObs.tendsToBeBrief ?? "unknown"}`,
811
+ `- **Question ratio**: ${reviewObs.questionRatio ?? "N/A"} (proportion of comments phrased as questions)`
812
+ );
813
+ if (reviewObs.usesSeverityPrefixes && Object.keys(reviewObs.usesSeverityPrefixes).length > 0) {
814
+ const prefixes = Object.entries(reviewObs.usesSeverityPrefixes).map(([k, v]) => `${k} (${v})`).join(", ");
815
+ lines.push(`- **Severity prefixes used**: ${prefixes}`);
816
+ }
817
+ lines.push("");
818
+ }
819
+ lines.push(
820
+ "### Tone",
821
+ "_[Fill in manually \u2014 direct? warm? collaborative? terse?]_",
822
+ "",
823
+ "### Severity prefixes they use"
824
+ );
825
+ if (reviewObs.usesSeverityPrefixes && Object.keys(reviewObs.usesSeverityPrefixes).length > 0) {
826
+ lines.push(
827
+ Object.keys(reviewObs.usesSeverityPrefixes).join(", ")
828
+ );
829
+ } else {
830
+ lines.push("_[Derived from comments above \u2014 fill in which they actually use]_");
831
+ }
832
+ lines.push(
833
+ "",
834
+ "### Vocabulary / phrases they use",
835
+ "_[Fill in from sample comments below]_",
836
+ "",
837
+ "---",
838
+ "",
839
+ "## Code Patterns",
840
+ ""
841
+ );
842
+ if (gitObs.primaryExtensions) {
843
+ lines.push(
844
+ `**Primary file types**: ${formatPairs(gitObs.primaryExtensions)}`
845
+ );
846
+ }
847
+ if (gitObs.primaryDirectories) {
848
+ lines.push(
849
+ `**Primary codebase areas**: ${formatPairs(gitObs.primaryDirectories, "/")}`
850
+ );
851
+ }
852
+ if (gitObs.namingStyle) {
853
+ const ns = gitObs.namingStyle;
854
+ lines.push(
855
+ `**Naming style**: camelCase (${ns.camelCase}), PascalCase (${ns.PascalCase}), snake_case (${ns.snake_case})`
856
+ );
857
+ const dominant = dominantNamingStyle(ns);
858
+ if (dominant) lines.push(`> ${dominant}`);
859
+ }
860
+ if (gitObs.fileTypeProfile) {
861
+ const p = gitObs.fileTypeProfile;
862
+ const parts = [
863
+ p.components && `components (${p.components})`,
864
+ p.tests && `tests (${p.tests})`,
865
+ p.configs && `configs (${p.configs})`,
866
+ p.docs && `docs (${p.docs})`,
867
+ p.other && `other (${p.other})`
868
+ ].filter(Boolean);
869
+ if (parts.length > 0) {
870
+ lines.push(`**File type profile**: ${parts.join(", ")}`);
871
+ }
872
+ }
873
+ if (gitObs.primaryExtensions || gitObs.primaryDirectories || gitObs.namingStyle || gitObs.fileTypeProfile) {
874
+ lines.push("");
875
+ }
876
+ if (codeStyleObs.observations.length > 0) {
877
+ lines.push("### Detected Code Style Preferences");
878
+ if (codeStyleObs.primaryLanguage) {
879
+ lines.push(
880
+ `_Primary language: ${codeStyleObs.primaryLanguage} \xB7 ${codeStyleObs.totalLinesAnalyzed} added lines analyzed_`
881
+ );
882
+ }
883
+ lines.push("");
884
+ for (const obs of codeStyleObs.observations) {
885
+ if (obs.confidence === "strong") {
886
+ lines.push(`- **${obs.category}**: ${obs.observation}`);
887
+ } else {
888
+ lines.push(`- ${obs.category}: ${obs.observation}`);
889
+ }
890
+ }
891
+ lines.push("");
892
+ }
893
+ lines.push(
894
+ "### Patterns they introduce / prefer"
895
+ );
896
+ const namingInsight = gitObs.namingStyle ? dominantNamingStyle(gitObs.namingStyle) : null;
897
+ if (namingInsight) {
898
+ lines.push(`- ${namingInsight}`);
899
+ }
900
+ lines.push(
901
+ "_[Fill in manually from code inspection and review comment themes]_",
902
+ "",
903
+ "### Patterns they push back on",
904
+ "_[Fill in manually]_",
905
+ "",
906
+ "---",
907
+ "",
908
+ "## Known Blind Spots",
909
+ "_[Fill in manually \u2014 areas they historically under-comment on]_",
910
+ "",
911
+ "---",
912
+ "",
913
+ "## Example Review Comments",
914
+ "_These ground Claude's output in their actual voice. Aim for 5\u201310 verbatim examples._",
915
+ ""
916
+ );
917
+ if (reviewObs.sampleComments && reviewObs.sampleComments.length > 0) {
918
+ for (const comment of reviewObs.sampleComments) {
919
+ const truncated = comment.length > 300 ? comment.slice(0, 300) + "..." : comment;
920
+ lines.push(`> ${truncated}`, "");
921
+ }
922
+ }
923
+ lines.push("---", "", "## Commit Message Style", "");
924
+ if (gitObs.commitMessageAvgLength != null) {
925
+ lines.push(`- **Avg length**: ${gitObs.commitMessageAvgLength} chars`);
926
+ lines.push(
927
+ `- **Uses imperative mood**: ${gitObs.likelyUsesImperativeMood ?? "unknown"}`
928
+ );
929
+ if (gitObs.conventionalCommitPrefixes) {
930
+ const prefixStr = Object.entries(gitObs.conventionalCommitPrefixes).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k} (${v})`).join(", ");
931
+ lines.push(`- **Conventional commits**: yes \u2014 ${prefixStr}`);
932
+ if (gitObs.commitScopePatterns && gitObs.commitScopePatterns.length > 0) {
933
+ lines.push(
934
+ `- **Common scopes**: ${gitObs.commitScopePatterns.join(", ")}`
935
+ );
936
+ }
937
+ }
938
+ if (gitObs.commitMessageSample) {
939
+ lines.push("- **Sample messages**:");
940
+ for (const msg of gitObs.commitMessageSample) {
941
+ lines.push(` - \`${msg}\``);
942
+ }
943
+ }
944
+ lines.push("");
945
+ }
946
+ if (slackObs.sampleTechnicalMessages && slackObs.sampleTechnicalMessages.length > 0) {
947
+ lines.push("---", "", "## Slack / Chat Voice (technical messages)", "");
948
+ for (const msg of slackObs.sampleTechnicalMessages) {
949
+ const truncated = msg.length > 200 ? msg.slice(0, 200) + "..." : msg;
950
+ lines.push(`> ${truncated}`, "");
951
+ }
952
+ }
953
+ lines.push("---", "", "## Docs Authored", "");
954
+ if (docsSignals.authoredDocs.length > 0) {
955
+ for (const doc of docsSignals.authoredDocs) {
956
+ lines.push(`- \`${doc}\``);
957
+ }
958
+ } else {
959
+ lines.push("_None found or docs-dir not provided._");
960
+ }
961
+ lines.push(
962
+ "",
963
+ "---",
964
+ "",
965
+ "<!-- Generated by @poltergeist-ai/cli -->",
966
+ `<!-- Run date: ${today} -->`
967
+ );
968
+ return lines.join("\n");
969
+ }
970
+
971
+ // src/cli.ts
972
+ var POLTERGEIST_DIR = ".poltergeist";
973
+ var CACHE_DIR = `${POLTERGEIST_DIR}/repos`;
974
+ var GHOSTS_DIR = `${POLTERGEIST_DIR}/ghosts`;
975
+ function ensureReposGitignored() {
976
+ const gitignorePath = ".gitignore";
977
+ const entry = ".poltergeist/repos/";
978
+ if (existsSync(gitignorePath)) {
979
+ const content = readFileSync4(gitignorePath, "utf-8");
980
+ if (content.includes(entry)) return;
981
+ appendFileSync(gitignorePath, `
982
+ # Poltergeist cached clones
983
+ ${entry}
984
+ `);
985
+ } else {
986
+ writeFileSync(gitignorePath, `# Poltergeist cached clones
987
+ ${entry}
988
+ `);
989
+ }
990
+ }
991
+ function printUsage() {
992
+ console.log(`Usage: poltergeist [extract] [options]
993
+
994
+ Build a contributor ghost profile from data sources.
995
+
996
+ Options:
997
+ --contributor <name> Contributor name (required; use GitHub username for best results)
998
+ --email <email> Contributor email (for git log filtering)
999
+ --slug <slug> Output slug (default: derived from name)
1000
+ --git-repo <path|url> Path to local git repo or remote URL (cloned to .poltergeist/repos/)
1001
+ --gitlab-export <path> Path to GitLab MR comments JSON export
1002
+ --slack-export <path> Path to Slack export directory
1003
+ --docs-dir <path> Path to design docs / ADRs directory
1004
+ --github-token <token> GitHub personal access token (for higher API rate limits)
1005
+ --output <path> Output path (default: .poltergeist/ghosts/<slug>.md)
1006
+ --verbose Enable verbose logging
1007
+ --help Show this help message`);
1008
+ }
1009
+ function isRemoteUrl(value) {
1010
+ return value.startsWith("https://") || value.startsWith("http://") || value.startsWith("git@");
1011
+ }
1012
+ function repoSlug(url) {
1013
+ return url.replace(/\.git$/, "").replace(/^https?:\/\//, "").replace(/^git@/, "").replace(/[/:]+/g, "-").replace(/^-|-$/g, "");
1014
+ }
1015
+ function resolveGitRepo(value, verbose) {
1016
+ if (!isRemoteUrl(value)) return value;
1017
+ const slug = repoSlug(value);
1018
+ const cloneDir = path4.join(CACHE_DIR, slug);
1019
+ if (existsSync(cloneDir)) {
1020
+ console.log(`[extract] Using cached clone at ${cloneDir}`);
1021
+ try {
1022
+ execFileSync2("git", ["-C", cloneDir, "fetch", "--quiet"], {
1023
+ encoding: "utf-8",
1024
+ stdio: verbose ? "inherit" : "pipe",
1025
+ timeout: 6e4
1026
+ });
1027
+ } catch {
1028
+ console.log("[extract] Warning: fetch failed, using existing cache");
1029
+ }
1030
+ return cloneDir;
1031
+ }
1032
+ console.log(`[extract] Cloning ${value} into ${cloneDir}...`);
1033
+ mkdirSync(CACHE_DIR, { recursive: true });
1034
+ ensureReposGitignored();
1035
+ execFileSync2(
1036
+ "git",
1037
+ ["clone", "--bare", "--filter=blob:none", value, cloneDir],
1038
+ {
1039
+ encoding: "utf-8",
1040
+ stdio: verbose ? "inherit" : "pipe",
1041
+ timeout: 12e4
1042
+ }
1043
+ );
1044
+ return cloneDir;
1045
+ }
1046
+ async function run() {
1047
+ const rawArgs = process.argv.slice(2);
1048
+ if (rawArgs.length === 0 || rawArgs[0] === "--help" || rawArgs[0] === "-h") {
1049
+ printUsage();
1050
+ return 0;
1051
+ }
1052
+ const args = rawArgs[0] === "extract" ? rawArgs.slice(1) : rawArgs;
1053
+ const { values } = parseArgs({
1054
+ args,
1055
+ options: {
1056
+ contributor: { type: "string" },
1057
+ email: { type: "string" },
1058
+ slug: { type: "string" },
1059
+ "git-repo": { type: "string" },
1060
+ "gitlab-export": { type: "string" },
1061
+ "slack-export": { type: "string" },
1062
+ "docs-dir": { type: "string" },
1063
+ "github-token": { type: "string" },
1064
+ output: { type: "string" },
1065
+ verbose: { type: "boolean", default: false },
1066
+ help: { type: "boolean", default: false }
1067
+ },
1068
+ strict: true
1069
+ });
1070
+ if (values.help) {
1071
+ printUsage();
1072
+ return 0;
1073
+ }
1074
+ if (!values.contributor) {
1075
+ console.error("Error: --contributor is required.");
1076
+ printUsage();
1077
+ return 1;
1078
+ }
1079
+ const contributor = values.contributor;
1080
+ const email = values.email;
1081
+ const slug = values.slug ?? slugify(contributor);
1082
+ const outputPath = values.output ?? `${GHOSTS_DIR}/${slug}.md`;
1083
+ const verbose = values.verbose ?? false;
1084
+ const githubToken = values["github-token"];
1085
+ const sourcesUsed = [];
1086
+ let gitObs = {};
1087
+ let codeStyleObs = { observations: [], totalLinesAnalyzed: 0 };
1088
+ let reviewObs = {};
1089
+ let slackObs = {};
1090
+ let docsSignals = { authoredDocs: [], docExcerpts: [] };
1091
+ if (values["git-repo"]) {
1092
+ const repoPath = resolveGitRepo(values["git-repo"], verbose);
1093
+ console.log(`[extract] Mining git history in ${repoPath}...`);
1094
+ const gitSignals = extractGitSignals(
1095
+ repoPath,
1096
+ contributor,
1097
+ email,
1098
+ verbose
1099
+ );
1100
+ gitObs = summariseGit(gitSignals);
1101
+ if (gitSignals.rawDiffOutput) {
1102
+ const codeStyleSignals = extractCodeStyleFromDiff(gitSignals.rawDiffOutput);
1103
+ codeStyleObs = summariseCodeStyle(codeStyleSignals);
1104
+ }
1105
+ sourcesUsed.push("git-history");
1106
+ const ghParsed = parseGitHubUrl(values["git-repo"]);
1107
+ if (ghParsed) {
1108
+ console.log(
1109
+ `[extract] Fetching GitHub PR review comments for ${contributor}...`
1110
+ );
1111
+ try {
1112
+ const ghSignals = await extractGitHubSignals(
1113
+ ghParsed.owner,
1114
+ ghParsed.repo,
1115
+ contributor,
1116
+ githubToken,
1117
+ verbose
1118
+ );
1119
+ if (ghSignals.totalComments > 0) {
1120
+ reviewObs = summariseReview(ghSignals);
1121
+ sourcesUsed.push("github-pr-comments");
1122
+ }
1123
+ } catch (e) {
1124
+ console.log(`[github] Warning: could not fetch PR comments \u2014 ${e}`);
1125
+ }
1126
+ }
1127
+ }
1128
+ if (values["gitlab-export"]) {
1129
+ console.log(
1130
+ `[extract] Parsing GitLab comment export ${values["gitlab-export"]}...`
1131
+ );
1132
+ const gitlabSignals = extractGitLabSignals(
1133
+ values["gitlab-export"],
1134
+ contributor,
1135
+ verbose
1136
+ );
1137
+ const gitlabObs = summariseGitLab(gitlabSignals);
1138
+ if (reviewObs.totalReviewComments && gitlabObs.totalReviewComments) {
1139
+ reviewObs.sampleComments = [
1140
+ ...reviewObs.sampleComments ?? [],
1141
+ ...gitlabObs.sampleComments ?? []
1142
+ ].slice(0, 10);
1143
+ reviewObs.totalReviewComments += gitlabObs.totalReviewComments;
1144
+ } else if (gitlabObs.totalReviewComments) {
1145
+ reviewObs = gitlabObs;
1146
+ }
1147
+ sourcesUsed.push("gitlab-comments");
1148
+ }
1149
+ if (values["slack-export"]) {
1150
+ console.log(
1151
+ `[extract] Scanning Slack export ${values["slack-export"]}...`
1152
+ );
1153
+ const slackSignals = extractSlackSignals(
1154
+ values["slack-export"],
1155
+ contributor,
1156
+ verbose
1157
+ );
1158
+ slackObs = summariseSlack(slackSignals);
1159
+ sourcesUsed.push("slack-export");
1160
+ }
1161
+ if (values["docs-dir"]) {
1162
+ console.log(`[extract] Scanning docs in ${values["docs-dir"]}...`);
1163
+ docsSignals = extractDocsSignals(values["docs-dir"], contributor, verbose);
1164
+ sourcesUsed.push("docs");
1165
+ }
1166
+ if (sourcesUsed.length === 0) {
1167
+ console.error(
1168
+ "Error: no data sources provided. Use --git-repo, --gitlab-export, --slack-export, or --docs-dir."
1169
+ );
1170
+ return 1;
1171
+ }
1172
+ console.log(
1173
+ `[extract] Building ghost file for ${contributor} (slug: ${slug})...`
1174
+ );
1175
+ const ghostMd = buildGhostMarkdown({
1176
+ contributor,
1177
+ slug,
1178
+ gitObs,
1179
+ codeStyleObs,
1180
+ reviewObs,
1181
+ slackObs,
1182
+ docsSignals,
1183
+ sourcesUsed
1184
+ });
1185
+ const dir = path4.dirname(outputPath);
1186
+ if (dir && dir !== ".") {
1187
+ mkdirSync(dir, { recursive: true });
1188
+ }
1189
+ writeFileSync(outputPath, ghostMd);
1190
+ console.log(`
1191
+ Ghost draft written to: ${outputPath}`);
1192
+ console.log("\nNext steps:");
1193
+ console.log(
1194
+ " 1. Open the file and fill in all [fill in manually] sections"
1195
+ );
1196
+ console.log(
1197
+ " 2. Review the sample comments \u2014 they're the most important voice signal"
1198
+ );
1199
+ console.log(
1200
+ " 3. Validate with a team member who knows this contributor"
1201
+ );
1202
+ console.log(
1203
+ ` 4. Run a test review: claude 'Review as @${slug}' < git diff main`
1204
+ );
1205
+ return 0;
1206
+ }
1207
+ run().then(
1208
+ (code) => process.exit(code),
1209
+ (err) => {
1210
+ console.error(err);
1211
+ process.exit(1);
1212
+ }
1213
+ );