@poltergeist-ai/cli 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,978 @@
1
+ // src/extractors/git.ts
2
+ import path from "path";
3
+
4
+ // src/utils.ts
5
+ import { execFileSync } from "child_process";
6
+ function slugify(name) {
7
+ return name.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "");
8
+ }
9
+ function runGit(repoPath, args) {
10
+ return execFileSync("git", ["-C", repoPath, ...args], {
11
+ encoding: "utf-8",
12
+ maxBuffer: 50 * 1024 * 1024
13
+ });
14
+ }
15
+ function log(verbose, tag, msg) {
16
+ if (verbose) {
17
+ console.log(`[${tag}] ${msg}`);
18
+ }
19
+ }
20
+ function increment(record, key, amount = 1) {
21
+ record[key] = (record[key] ?? 0) + amount;
22
+ }
23
+ function topN(record, n) {
24
+ return Object.entries(record).sort((a, b) => b[1] - a[1]).slice(0, n);
25
+ }
26
+
27
+ // src/extractors/git.ts
28
+ var CONVENTIONAL_RE = /^(\w+)(?:\(([^)]+)\))?[!]?:\s/;
29
+ var DOMAIN_RULES = [
30
+ [/\.(vue|svelte|jsx|tsx|css|scss|sass|less)$/, "frontend"],
31
+ [/\.(go|rs|py|java|rb|php|cs|ex|exs)$/, "backend"],
32
+ [/(Dockerfile|\.tf|\.yaml|\.yml)/, "infrastructure"],
33
+ [/\.(test|spec)\.[^.]+$|__tests__\//, "testing"],
34
+ [/\.md$/, "documentation"],
35
+ [/(config|rc\.|\.config\.|tsconfig|vite\.config|eslint|prettier|package\.json)/, "tooling"]
36
+ ];
37
+ function inferDomains(filesModified) {
38
+ const domainCounts = {};
39
+ for (const [filepath, count] of Object.entries(filesModified)) {
40
+ for (const [re, domain] of DOMAIN_RULES) {
41
+ if (re.test(filepath)) {
42
+ increment(domainCounts, domain, count);
43
+ break;
44
+ }
45
+ }
46
+ }
47
+ return topN(domainCounts, 4).filter(([, count]) => count >= 3).map(([domain]) => domain);
48
+ }
49
+ function profileFileTypes(filesModified) {
50
+ const profile = { tests: 0, configs: 0, components: 0, docs: 0, other: 0 };
51
+ for (const [filepath, count] of Object.entries(filesModified)) {
52
+ if (/\.(test|spec)\.[^.]+$|__tests__\//.test(filepath)) {
53
+ profile.tests += count;
54
+ } else if (/(config|rc\.|\.config\.|tsconfig|eslint|prettier)/.test(filepath)) {
55
+ profile.configs += count;
56
+ } else if (/\.(vue|svelte|jsx|tsx)$/.test(filepath)) {
57
+ profile.components += count;
58
+ } else if (/\.md$/.test(filepath)) {
59
+ profile.docs += count;
60
+ } else {
61
+ profile.other += count;
62
+ }
63
+ }
64
+ return profile;
65
+ }
66
+ function extractGitSignals(repoPath, contributor, email, verbose) {
67
+ const signals = {
68
+ commitMessages: [],
69
+ commitBodies: [],
70
+ filesCreated: [],
71
+ filesModified: {},
72
+ extensions: {},
73
+ namingPatterns: [],
74
+ commitCount: 0
75
+ };
76
+ const authorFilter = `--author=${email ?? contributor}`;
77
+ try {
78
+ const output = runGit(repoPath, [
79
+ "log",
80
+ authorFilter,
81
+ "--format=%s%x00%B%x01",
82
+ "--max-count=200"
83
+ ]);
84
+ const entries = output.split("").filter(Boolean);
85
+ for (const entry of entries) {
86
+ const [subject, ...bodyParts] = entry.split("\0");
87
+ const trimmedSubject = subject.trim();
88
+ if (trimmedSubject) {
89
+ signals.commitMessages.push(trimmedSubject);
90
+ const body = bodyParts.join("\0").trim();
91
+ if (body && body !== trimmedSubject) {
92
+ signals.commitBodies.push(body);
93
+ }
94
+ }
95
+ }
96
+ signals.commitCount = signals.commitMessages.length;
97
+ log(verbose, "git", `Found ${signals.commitCount} commits`);
98
+ } catch (e) {
99
+ console.log(`[git] Warning: could not read commit messages \u2014 ${e}`);
100
+ }
101
+ try {
102
+ const output = runGit(repoPath, [
103
+ "log",
104
+ authorFilter,
105
+ "--diff-filter=A",
106
+ "--name-only",
107
+ "--format="
108
+ ]);
109
+ signals.filesCreated = output.split("\n").map((l) => l.trim()).filter(Boolean);
110
+ for (const f of signals.filesCreated) {
111
+ const ext = path.extname(f);
112
+ if (ext) increment(signals.extensions, ext);
113
+ }
114
+ } catch {
115
+ }
116
+ try {
117
+ const output = runGit(repoPath, [
118
+ "log",
119
+ authorFilter,
120
+ "--name-only",
121
+ "--format="
122
+ ]);
123
+ for (const line of output.split("\n")) {
124
+ const trimmed = line.trim();
125
+ if (trimmed) increment(signals.filesModified, trimmed);
126
+ }
127
+ } catch {
128
+ }
129
+ try {
130
+ const output = runGit(repoPath, [
131
+ "log",
132
+ authorFilter,
133
+ "--max-count=50",
134
+ "-p",
135
+ "--no-merges"
136
+ ]);
137
+ signals.rawDiffOutput = output;
138
+ const addedLines = output.split("\n").filter((l) => l.startsWith("+") && !l.startsWith("+++")).map((l) => l.slice(1));
139
+ const nameRe = /\b(const|function|let|var|def|class)\s+([a-zA-Z_][a-zA-Z0-9_]*)/g;
140
+ for (const line of addedLines) {
141
+ let match;
142
+ while ((match = nameRe.exec(line)) !== null) {
143
+ signals.namingPatterns.push(match[2]);
144
+ }
145
+ }
146
+ } catch {
147
+ }
148
+ return signals;
149
+ }
150
+ function summariseGit(signals) {
151
+ const obs = {};
152
+ const msgs = signals.commitMessages;
153
+ if (msgs.length > 0) {
154
+ const avgLen = msgs.reduce((sum, m) => sum + m.length, 0) / msgs.length;
155
+ const imperative = msgs.filter(
156
+ (m) => m[0] === m[0].toUpperCase() && !m.startsWith("fix") && !m.startsWith("add")
157
+ ).length;
158
+ obs.commitMessageAvgLength = Math.round(avgLen);
159
+ obs.commitMessageSample = msgs.slice(0, 5);
160
+ obs.likelyUsesImperativeMood = imperative > msgs.length * 0.5;
161
+ const prefixCounts = {};
162
+ const scopes = /* @__PURE__ */ new Set();
163
+ let conventionalCount = 0;
164
+ for (const msg of msgs) {
165
+ const match = msg.match(CONVENTIONAL_RE);
166
+ if (match) {
167
+ conventionalCount++;
168
+ increment(prefixCounts, match[1].toLowerCase());
169
+ if (match[2]) scopes.add(match[2]);
170
+ }
171
+ }
172
+ if (conventionalCount > msgs.length * 0.3) {
173
+ obs.conventionalCommitPrefixes = prefixCounts;
174
+ if (scopes.size > 0) {
175
+ obs.commitScopePatterns = [...scopes].slice(0, 10);
176
+ }
177
+ }
178
+ }
179
+ if (Object.keys(signals.extensions).length > 0) {
180
+ obs.primaryExtensions = topN(signals.extensions, 5);
181
+ }
182
+ if (Object.keys(signals.filesModified).length > 0) {
183
+ const topFiles = topN(signals.filesModified, 10);
184
+ const dirs = {};
185
+ for (const [filepath, count] of topFiles) {
186
+ const parts = filepath.split(path.sep);
187
+ if (parts.length > 1) {
188
+ increment(dirs, parts[0], count);
189
+ }
190
+ }
191
+ obs.primaryDirectories = topN(dirs, 5);
192
+ obs.inferredDomains = inferDomains(signals.filesModified);
193
+ obs.fileTypeProfile = profileFileTypes(signals.filesModified);
194
+ }
195
+ const names = signals.namingPatterns;
196
+ if (names.length > 0) {
197
+ const camel = names.filter((n) => /^[a-z][a-zA-Z]+$/.test(n)).length;
198
+ const pascal = names.filter((n) => /^[A-Z][a-zA-Z]+$/.test(n)).length;
199
+ const snake = names.filter((n) => n.includes("_")).length;
200
+ obs.namingStyle = {
201
+ camelCase: camel,
202
+ PascalCase: pascal,
203
+ snake_case: snake
204
+ };
205
+ }
206
+ return obs;
207
+ }
208
+
209
+ // src/extractors/code-style.ts
210
+ var LANG_MAP = {
211
+ ".ts": "typescript",
212
+ ".tsx": "typescript",
213
+ ".js": "javascript",
214
+ ".jsx": "javascript",
215
+ ".mjs": "javascript",
216
+ ".vue": "typescript",
217
+ ".svelte": "typescript",
218
+ ".py": "python",
219
+ ".go": "go",
220
+ ".rs": "rust",
221
+ ".rb": "ruby",
222
+ ".java": "java",
223
+ ".php": "php"
224
+ };
225
+ var JS_TS = ["typescript", "javascript"];
226
+ var TS_ONLY = ["typescript"];
227
+ var PATTERN_RULES = [
228
+ // Imports
229
+ { category: "import_style", choice: "named_import", pattern: /^import\s+\{/, languages: JS_TS },
230
+ { category: "import_style", choice: "default_import", pattern: /^import\s+[A-Za-z_$][^\s{]*\s+from/, languages: JS_TS },
231
+ { category: "import_style", choice: "path_alias", pattern: /from\s+['"][@~]\//, languages: JS_TS },
232
+ { category: "import_style", choice: "relative_import", pattern: /from\s+['"]\.\.?\//, languages: JS_TS },
233
+ // Exports
234
+ { category: "export_style", choice: "named_export", pattern: /^export\s+(?:const|function|class|type|interface|enum)\s/, languages: JS_TS },
235
+ { category: "export_style", choice: "default_export", pattern: /^export\s+default\b/, languages: JS_TS },
236
+ { category: "export_style", choice: "re_export", pattern: /^export\s+\{[^}]*\}\s+from/, languages: JS_TS },
237
+ // Functions
238
+ { category: "function_style", choice: "arrow_function", pattern: /(?:const|let)\s+\w+\s*=\s*(?:async\s+)?\(/, languages: JS_TS },
239
+ { category: "function_style", choice: "function_declaration", pattern: /^(?:export\s+)?(?:async\s+)?function\s+\w/, languages: JS_TS },
240
+ // Async
241
+ { category: "async_style", choice: "async_await", pattern: /\bawait\s/, languages: [...JS_TS, "python"] },
242
+ { category: "async_style", choice: "then_chain", pattern: /\.then\s*\(/, languages: JS_TS },
243
+ // Control flow
244
+ { category: "control_flow", choice: "early_return", pattern: /^\s+if\s*\(.*\)\s*return\b/, languages: JS_TS },
245
+ { category: "control_flow", choice: "guard_clause", pattern: /^\s+if\s*\(!/, languages: JS_TS },
246
+ // Strings
247
+ { category: "string_style", choice: "template_literal", pattern: /`[^`]*\$\{/, languages: JS_TS },
248
+ // Modern operators
249
+ { category: "modern_operators", choice: "optional_chaining", pattern: /\?\.\w/, languages: JS_TS },
250
+ { category: "modern_operators", choice: "nullish_coalescing", pattern: /\?\?/, languages: JS_TS },
251
+ { category: "modern_operators", choice: "destructuring", pattern: /(?:const|let|var)\s+[\[{]/, languages: JS_TS },
252
+ // TypeScript types
253
+ { category: "type_definition", choice: "interface", pattern: /^(?:export\s+)?interface\s+\w/, languages: TS_ONLY },
254
+ { category: "type_definition", choice: "type_alias", pattern: /^(?:export\s+)?type\s+\w+\s*=/, languages: TS_ONLY },
255
+ { category: "enum_vs_union", choice: "enum", pattern: /^(?:export\s+)?(?:const\s+)?enum\s+\w/, languages: TS_ONLY },
256
+ { category: "enum_vs_union", choice: "union_type", pattern: /type\s+\w+\s*=\s*['"\w]+(?:\s*\|\s*['"\w]+){2,}/, languages: TS_ONLY },
257
+ { category: "type_features", choice: "as_const", pattern: /\bas\s+const\b/, languages: TS_ONLY },
258
+ { category: "type_features", choice: "generic_usage", pattern: /<[A-Z]\w*(?:,\s*[A-Z]\w*)*>/, languages: TS_ONLY },
259
+ { category: "type_features", choice: "explicit_return_type", pattern: /\)\s*:\s*(?:Promise<|void|string|number|boolean|\w+\[\])/, languages: TS_ONLY },
260
+ // Error handling
261
+ { category: "error_handling", choice: "try_catch", pattern: /^\s*(?:try\s*\{|\}\s*catch\s*\()/ },
262
+ { category: "error_handling", choice: "custom_error", pattern: /class\s+\w+Error\s+extends/ },
263
+ // Testing
264
+ { category: "test_structure", choice: "describe_it", pattern: /\b(?:describe|it)\s*\(/, languages: JS_TS },
265
+ { category: "test_structure", choice: "test_fn", pattern: /\btest\s*\(/, languages: JS_TS },
266
+ { category: "test_assertion", choice: "expect", pattern: /\bexpect\s*\(/, languages: JS_TS },
267
+ { category: "test_assertion", choice: "assert", pattern: /\bassert\.\w/, languages: [...JS_TS, "python"] },
268
+ // Architecture
269
+ { category: "composition_style", choice: "inheritance", pattern: /class\s+\w+\s+extends\s/, languages: JS_TS },
270
+ { category: "composition_style", choice: "composition", pattern: /\buse[A-Z]\w+\s*\(/, languages: JS_TS },
271
+ { category: "architecture", choice: "factory_function", pattern: /(?:create|make|build)[A-Z]\w+\s*\(/, languages: JS_TS },
272
+ { category: "architecture", choice: "event_pattern", pattern: /\.(?:on|emit|addEventListener|subscribe)\s*\(/, languages: JS_TS },
273
+ // Python-specific
274
+ { category: "python_style", choice: "type_hints", pattern: /def\s+\w+\(.*:\s*\w+/, languages: ["python"] },
275
+ { category: "python_style", choice: "list_comprehension", pattern: /\[.*\bfor\b.*\bin\b.*\]/, languages: ["python"] },
276
+ { category: "python_style", choice: "dataclass", pattern: /@dataclass/, languages: ["python"] },
277
+ { category: "python_style", choice: "f_string", pattern: /f['"].*\{/, languages: ["python"] }
278
+ ];
279
+ var CATEGORY_LABELS = {
280
+ import_style: "Import Style",
281
+ export_style: "Export Style",
282
+ function_style: "Function Style",
283
+ async_style: "Async Style",
284
+ control_flow: "Control Flow",
285
+ string_style: "String Style",
286
+ modern_operators: "Modern Operators",
287
+ type_definition: "Type Definition",
288
+ enum_vs_union: "Enum vs Union",
289
+ type_features: "TypeScript Features",
290
+ error_handling: "Error Handling",
291
+ test_structure: "Test Structure",
292
+ test_assertion: "Test Assertions",
293
+ composition_style: "Composition Style",
294
+ architecture: "Architecture Patterns",
295
+ python_style: "Python Style"
296
+ };
297
+ var CHOICE_LABELS = {
298
+ named_import: "named imports",
299
+ default_import: "default imports",
300
+ path_alias: "path aliases (@/)",
301
+ relative_import: "relative imports",
302
+ named_export: "named exports",
303
+ default_export: "default exports",
304
+ re_export: "re-exports",
305
+ arrow_function: "arrow functions",
306
+ function_declaration: "function declarations",
307
+ async_await: "async/await",
308
+ then_chain: ".then() chains",
309
+ early_return: "early returns",
310
+ guard_clause: "guard clauses",
311
+ template_literal: "template literals",
312
+ optional_chaining: "optional chaining (?.)",
313
+ nullish_coalescing: "nullish coalescing (??)",
314
+ destructuring: "destructuring",
315
+ interface: "interfaces",
316
+ type_alias: "type aliases",
317
+ enum: "enums",
318
+ union_type: "union types",
319
+ as_const: "as const assertions",
320
+ generic_usage: "generics",
321
+ explicit_return_type: "explicit return types",
322
+ try_catch: "try/catch",
323
+ custom_error: "custom error classes",
324
+ describe_it: "describe/it blocks",
325
+ test_fn: "test() functions",
326
+ expect: "expect()",
327
+ assert: "assert",
328
+ inheritance: "class inheritance",
329
+ composition: "composables/hooks",
330
+ factory_function: "factory functions",
331
+ event_pattern: "event/pub-sub patterns",
332
+ type_hints: "type hints",
333
+ list_comprehension: "list comprehensions",
334
+ dataclass: "dataclasses",
335
+ f_string: "f-strings"
336
+ };
337
+ function detectLanguages(diffOutput) {
338
+ const langCounts = {};
339
+ const headerRe = /^diff --git a\/(.*?) b\//gm;
340
+ let match;
341
+ while ((match = headerRe.exec(diffOutput)) !== null) {
342
+ const filepath = match[1];
343
+ const dotIdx = filepath.lastIndexOf(".");
344
+ if (dotIdx === -1) continue;
345
+ const ext = filepath.slice(dotIdx);
346
+ const lang = LANG_MAP[ext];
347
+ if (lang) increment(langCounts, lang);
348
+ }
349
+ return Object.entries(langCounts).sort((a, b) => b[1] - a[1]).map(([lang]) => lang);
350
+ }
351
+ function extractCodeStyleFromDiff(diffOutput) {
352
+ const signals = {
353
+ counters: {},
354
+ detectedLanguages: detectLanguages(diffOutput),
355
+ totalLinesAnalyzed: 0
356
+ };
357
+ if (!diffOutput) return signals;
358
+ const langSet = new Set(signals.detectedLanguages);
359
+ const addedLines = diffOutput.split("\n").filter((l) => l.startsWith("+") && !l.startsWith("+++")).map((l) => l.slice(1));
360
+ signals.totalLinesAnalyzed = addedLines.length;
361
+ const activeRules = PATTERN_RULES.filter(
362
+ (rule) => !rule.languages || rule.languages.some((lang) => langSet.has(lang))
363
+ );
364
+ for (const line of addedLines) {
365
+ const trimmed = line.trim();
366
+ if (!trimmed || trimmed.startsWith("//") || trimmed.startsWith("*")) continue;
367
+ for (const rule of activeRules) {
368
+ if (rule.pattern.test(trimmed)) {
369
+ if (!signals.counters[rule.category]) {
370
+ signals.counters[rule.category] = {};
371
+ }
372
+ increment(signals.counters[rule.category], rule.choice);
373
+ }
374
+ }
375
+ }
376
+ return signals;
377
+ }
378
+ function summariseCodeStyle(signals) {
379
+ const observations = [];
380
+ for (const [category, choices] of Object.entries(signals.counters)) {
381
+ const entries = Object.entries(choices).sort((a, b) => b[1] - a[1]);
382
+ const total = entries.reduce((sum, [, c]) => sum + c, 0);
383
+ if (total < 3) continue;
384
+ const [topChoice, topCount] = entries[0];
385
+ if (entries.length === 1) {
386
+ if (topCount >= 5) {
387
+ observations.push({
388
+ category: CATEGORY_LABELS[category] ?? category,
389
+ observation: `Frequently uses ${CHOICE_LABELS[topChoice] ?? topChoice} (${topCount} occurrences)`,
390
+ confidence: topCount >= 15 ? "strong" : "moderate"
391
+ });
392
+ }
393
+ continue;
394
+ }
395
+ const ratio = topCount / total;
396
+ if (ratio < 0.6) continue;
397
+ const confidence = ratio >= 0.8 ? "strong" : "moderate";
398
+ const pct = Math.round(ratio * 100);
399
+ const topLabel = CHOICE_LABELS[topChoice] ?? topChoice;
400
+ const runnerLabel = CHOICE_LABELS[entries[1][0]] ?? entries[1][0];
401
+ observations.push({
402
+ category: CATEGORY_LABELS[category] ?? category,
403
+ observation: `Prefers ${topLabel} over ${runnerLabel} (${pct}% of ${total})`,
404
+ confidence
405
+ });
406
+ }
407
+ observations.sort((a, b) => {
408
+ if (a.confidence !== b.confidence) return a.confidence === "strong" ? -1 : 1;
409
+ return a.category.localeCompare(b.category);
410
+ });
411
+ return {
412
+ observations,
413
+ primaryLanguage: signals.detectedLanguages[0],
414
+ totalLinesAnalyzed: signals.totalLinesAnalyzed
415
+ };
416
+ }
417
+
418
+ // src/extractors/gitlab.ts
419
+ import { readFileSync } from "fs";
420
+
421
+ // src/extractors/review-common.ts
422
+ function summariseReview(signals) {
423
+ const obs = { source: signals.source };
424
+ const comments = signals.reviewComments;
425
+ if (comments.length === 0) return obs;
426
+ obs.totalReviewComments = signals.totalComments;
427
+ const lengths = signals.commentLengths;
428
+ obs.avgCommentLength = lengths.length > 0 ? Math.round(lengths.reduce((a, b) => a + b, 0) / lengths.length) : 0;
429
+ obs.tendsToBeBrief = obs.avgCommentLength < 120;
430
+ obs.usesSeverityPrefixes = { ...signals.severityPrefixes };
431
+ obs.questionRatio = Math.round(signals.questionComments / comments.length * 100) / 100;
432
+ const sorted = [...comments].sort((a, b) => a.length - b.length);
433
+ const n = sorted.length;
434
+ const indices = [
435
+ 0,
436
+ Math.floor(n / 6),
437
+ Math.floor(n / 3),
438
+ Math.floor(n / 2),
439
+ Math.floor(2 * n / 3),
440
+ Math.floor(5 * n / 6),
441
+ n - 2,
442
+ n - 1
443
+ ];
444
+ obs.sampleComments = [
445
+ ...new Set(
446
+ indices.filter((i) => i >= 0 && i < n).map((i) => sorted[i])
447
+ )
448
+ ];
449
+ return obs;
450
+ }
451
+
452
+ // src/extractors/gitlab.ts
453
+ function extractGitLabSignals(exportPath, contributor, verbose) {
454
+ const signals = {
455
+ reviewComments: [],
456
+ commentLengths: [],
457
+ severityPrefixes: {},
458
+ questionComments: 0,
459
+ totalComments: 0,
460
+ source: "gitlab"
461
+ };
462
+ let data;
463
+ try {
464
+ data = JSON.parse(readFileSync(exportPath, "utf-8"));
465
+ } catch (e) {
466
+ console.log(`[gitlab] Could not read export \u2014 ${e}`);
467
+ return signals;
468
+ }
469
+ const comments = [];
470
+ if (Array.isArray(data)) {
471
+ for (const item of data) {
472
+ if (item && typeof item === "object") {
473
+ const obj = item;
474
+ if (Array.isArray(obj.notes)) {
475
+ comments.push(...obj.notes);
476
+ } else if (typeof obj.body === "string") {
477
+ comments.push(obj);
478
+ }
479
+ }
480
+ }
481
+ }
482
+ const contributorLower = contributor.toLowerCase();
483
+ const prefixRe = /^(nit|suggestion|blocking|question|thought|minor|major)[:\s]/i;
484
+ for (const comment of comments) {
485
+ const author = comment.author;
486
+ const authorName = String(author?.name ?? "");
487
+ if (!authorName.toLowerCase().includes(contributorLower)) continue;
488
+ const body = String(comment.body ?? "").trim();
489
+ if (!body) continue;
490
+ signals.reviewComments.push(body);
491
+ signals.commentLengths.push(body.length);
492
+ signals.totalComments += 1;
493
+ const prefixMatch = body.match(prefixRe);
494
+ if (prefixMatch) {
495
+ increment(signals.severityPrefixes, prefixMatch[1].toLowerCase());
496
+ }
497
+ if (body.endsWith("?") || body.toLowerCase().startsWith("do we") || body.toLowerCase().startsWith("should we")) {
498
+ signals.questionComments += 1;
499
+ }
500
+ }
501
+ log(verbose, "gitlab", `Found ${signals.totalComments} comments by ${contributor}`);
502
+ return signals;
503
+ }
504
+ function summariseGitLab(signals) {
505
+ return summariseReview(signals);
506
+ }
507
+
508
+ // src/extractors/github.ts
509
+ var USER_AGENT = "poltergeist-cli/0.1.0";
510
+ function parseGitHubUrl(url) {
511
+ const match = url.match(/github\.com[/:]([^/]+)\/([^/.]+)/);
512
+ if (!match) return null;
513
+ return { owner: match[1], repo: match[2] };
514
+ }
515
+ var apiCallCount = 0;
516
+ var rateLimited = false;
517
+ async function ghFetch(urlPath, opts) {
518
+ if (rateLimited) return null;
519
+ const url = urlPath.startsWith("https://") ? urlPath : `https://api.github.com${urlPath}`;
520
+ const headers = {
521
+ Accept: "application/vnd.github+json",
522
+ "User-Agent": USER_AGENT
523
+ };
524
+ if (opts.token) {
525
+ headers.Authorization = `Bearer ${opts.token}`;
526
+ }
527
+ apiCallCount++;
528
+ const res = await fetch(url, { headers });
529
+ if (!res.ok) {
530
+ if (res.status === 403 || res.status === 429) {
531
+ rateLimited = true;
532
+ return null;
533
+ }
534
+ log(opts.verbose, "github", `API ${res.status} for ${url}`);
535
+ return null;
536
+ }
537
+ const remaining = res.headers.get("X-RateLimit-Remaining");
538
+ if (remaining && parseInt(remaining) <= 1) {
539
+ rateLimited = true;
540
+ console.log("[github] Rate limit exhausted \u2014 stopping API calls");
541
+ }
542
+ return res.json();
543
+ }
544
+ async function searchPRsWithComments(owner, repo, contributor, opts) {
545
+ const query = encodeURIComponent(
546
+ `repo:${owner}/${repo} commenter:${contributor} type:pr`
547
+ );
548
+ const data = await ghFetch(
549
+ `/search/issues?q=${query}&per_page=100&sort=updated&order=desc`,
550
+ opts
551
+ );
552
+ if (!data?.items) return [];
553
+ return data.items.map((item) => item.number);
554
+ }
555
+ async function fetchPRComments(owner, repo, prNumber, contributor, opts) {
556
+ const comments = [];
557
+ const contributorLower = contributor.toLowerCase();
558
+ const reviewComments = await ghFetch(
559
+ `/repos/${owner}/${repo}/pulls/${prNumber}/comments?per_page=100`,
560
+ opts
561
+ );
562
+ if (Array.isArray(reviewComments)) {
563
+ for (const c of reviewComments) {
564
+ if (c.user?.login?.toLowerCase() === contributorLower && c.body?.trim()) {
565
+ comments.push(c.body.trim());
566
+ }
567
+ }
568
+ }
569
+ if (rateLimited) return comments;
570
+ const reviews = await ghFetch(
571
+ `/repos/${owner}/${repo}/pulls/${prNumber}/reviews?per_page=100`,
572
+ opts
573
+ );
574
+ if (Array.isArray(reviews)) {
575
+ for (const r of reviews) {
576
+ if (r.user?.login?.toLowerCase() === contributorLower && r.body?.trim()) {
577
+ comments.push(r.body.trim());
578
+ }
579
+ }
580
+ }
581
+ return comments;
582
+ }
583
+ async function extractGitHubSignals(owner, repo, contributor, token, verbose) {
584
+ const signals = {
585
+ reviewComments: [],
586
+ commentLengths: [],
587
+ severityPrefixes: {},
588
+ questionComments: 0,
589
+ totalComments: 0,
590
+ source: "github"
591
+ };
592
+ const opts = { token, verbose };
593
+ apiCallCount = 0;
594
+ rateLimited = false;
595
+ const prefixRe = /^(nit|suggestion|blocking|question|thought|minor|major)[:\s]/i;
596
+ log(verbose, "github", "Searching for PRs with review comments...");
597
+ const prNumbers = await searchPRsWithComments(owner, repo, contributor, opts);
598
+ if (prNumbers.length === 0) {
599
+ console.log(
600
+ `[github] No PR review comments found for "${contributor}". Make sure --contributor matches the GitHub username.`
601
+ );
602
+ return signals;
603
+ }
604
+ log(
605
+ verbose,
606
+ "github",
607
+ `Found ${prNumbers.length} PRs with comments by ${contributor}`
608
+ );
609
+ const prCap = token ? 50 : 25;
610
+ const prsToFetch = prNumbers.slice(0, prCap);
611
+ if (prNumbers.length > prCap) {
612
+ console.log(
613
+ `[github] Sampling ${prCap} of ${prNumbers.length} PRs (use --github-token for more)`
614
+ );
615
+ }
616
+ for (const prNumber of prsToFetch) {
617
+ if (rateLimited) break;
618
+ const comments = await fetchPRComments(
619
+ owner,
620
+ repo,
621
+ prNumber,
622
+ contributor,
623
+ opts
624
+ );
625
+ for (const body of comments) {
626
+ signals.reviewComments.push(body);
627
+ signals.commentLengths.push(body.length);
628
+ signals.totalComments += 1;
629
+ const prefixMatch = body.match(prefixRe);
630
+ if (prefixMatch) {
631
+ increment(signals.severityPrefixes, prefixMatch[1].toLowerCase());
632
+ }
633
+ if (body.endsWith("?") || body.toLowerCase().startsWith("do we") || body.toLowerCase().startsWith("should we")) {
634
+ signals.questionComments += 1;
635
+ }
636
+ }
637
+ }
638
+ log(
639
+ verbose,
640
+ "github",
641
+ `Collected ${signals.totalComments} review comments (${apiCallCount} API calls)`
642
+ );
643
+ return signals;
644
+ }
645
+
646
+ // src/extractors/slack.ts
647
+ import { readFileSync as readFileSync2, readdirSync } from "fs";
648
+ import path2 from "path";
649
+ function extractSlackSignals(exportDir, contributor, verbose) {
650
+ const signals = {
651
+ messages: [],
652
+ technicalMessages: []
653
+ };
654
+ const techKeywords = /\b(PR|MR|merge|review|component|composable|API|endpoint|refactor|naming|test|pattern|abstraction|type|interface|performance|bug|breaking)\b/i;
655
+ const contributorLower = contributor.toLowerCase();
656
+ let entries;
657
+ try {
658
+ entries = readdirSync(exportDir, { recursive: true, encoding: "utf-8" });
659
+ } catch {
660
+ return signals;
661
+ }
662
+ const jsonFiles = entries.filter((e) => e.endsWith(".json"));
663
+ for (const relPath of jsonFiles) {
664
+ const fullPath = path2.join(exportDir, relPath);
665
+ let messages;
666
+ try {
667
+ messages = JSON.parse(readFileSync2(fullPath, "utf-8"));
668
+ } catch {
669
+ continue;
670
+ }
671
+ if (!Array.isArray(messages)) continue;
672
+ for (const msg of messages) {
673
+ if (!msg || typeof msg !== "object") continue;
674
+ const obj = msg;
675
+ const username = String(
676
+ obj.username ?? obj.user_profile?.display_name ?? ""
677
+ );
678
+ const text = String(obj.text ?? "").trim();
679
+ if (!text || !username.toLowerCase().includes(contributorLower)) continue;
680
+ if (text.length < 20 || text.startsWith(":")) continue;
681
+ signals.messages.push(text);
682
+ if (techKeywords.test(text)) {
683
+ signals.technicalMessages.push(text);
684
+ }
685
+ }
686
+ }
687
+ log(
688
+ verbose,
689
+ "slack",
690
+ `Found ${signals.messages.length} messages, ${signals.technicalMessages.length} technical`
691
+ );
692
+ return signals;
693
+ }
694
+ function summariseSlack(signals) {
695
+ const obs = {};
696
+ const tech = signals.technicalMessages;
697
+ if (tech.length === 0) return obs;
698
+ obs.technicalMessageCount = tech.length;
699
+ obs.sampleTechnicalMessages = tech.slice(0, 8);
700
+ return obs;
701
+ }
702
+
703
+ // src/extractors/docs.ts
704
+ import { readFileSync as readFileSync3, readdirSync as readdirSync2 } from "fs";
705
+ import path3 from "path";
706
+ function extractDocsSignals(docsDir, contributor, verbose) {
707
+ const signals = {
708
+ authoredDocs: [],
709
+ docExcerpts: []
710
+ };
711
+ const contributorLower = contributor.toLowerCase();
712
+ let entries;
713
+ try {
714
+ entries = readdirSync2(docsDir, { recursive: true, encoding: "utf-8" });
715
+ } catch {
716
+ return signals;
717
+ }
718
+ const mdFiles = entries.filter((e) => e.endsWith(".md"));
719
+ for (const relPath of mdFiles) {
720
+ const fullPath = path3.join(docsDir, relPath);
721
+ let content;
722
+ try {
723
+ content = readFileSync3(fullPath, "utf-8");
724
+ } catch {
725
+ continue;
726
+ }
727
+ const frontmatterMatch = content.slice(0, 500).match(/author[:\s]+(.+)/i);
728
+ if (frontmatterMatch && frontmatterMatch[1].toLowerCase().includes(contributorLower)) {
729
+ signals.authoredDocs.push(fullPath);
730
+ const paras = content.split("\n\n").map((p) => p.trim()).filter((p) => p.length > 100);
731
+ signals.docExcerpts.push(...paras.slice(0, 3));
732
+ }
733
+ }
734
+ log(verbose, "docs", `Found ${signals.authoredDocs.length} docs attributed to ${contributor}`);
735
+ return signals;
736
+ }
737
+
738
+ // src/generator.ts
739
+ function formatPairs(pairs, suffix = "") {
740
+ return pairs.map(([name, count]) => `${name}${suffix} (${count})`).join(", ");
741
+ }
742
+ function dominantNamingStyle(style) {
743
+ const total = style.camelCase + style.PascalCase + style.snake_case;
744
+ if (total < 10) return null;
745
+ if (style.camelCase / total > 0.7) return "Strongly prefers camelCase";
746
+ if (style.snake_case / total > 0.7) return "Strongly prefers snake_case";
747
+ if (style.PascalCase / total > 0.7) return "Strongly prefers PascalCase";
748
+ return null;
749
+ }
750
+ function buildGhostMarkdown(input) {
751
+ const { contributor, slug, gitObs, codeStyleObs, reviewObs, slackObs, docsSignals, sourcesUsed } = input;
752
+ const today = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
753
+ const domains = gitObs.inferredDomains?.length ? gitObs.inferredDomains.join(", ") : "_[fill in manually]_";
754
+ const lines = [
755
+ `# Contributor Soul: ${contributor}`,
756
+ "",
757
+ "## Identity",
758
+ `- **Slug**: ${slug}`,
759
+ "- **Role**: _[fill in manually]_",
760
+ `- **Primary domains**: ${domains}`,
761
+ `- **Soul last updated**: ${today}`,
762
+ `- **Sources used**: ${sourcesUsed.join(", ")}`,
763
+ "",
764
+ "---",
765
+ "",
766
+ "## Review Philosophy",
767
+ "",
768
+ "### What they care about most (ranked)",
769
+ "_[Fill in manually after reviewing the data below \u2014 re-order based on review comment patterns]_",
770
+ "1. Correctness",
771
+ "2. Naming",
772
+ "3. Component / module boundaries",
773
+ "4. Test coverage",
774
+ "5. Consistency with existing patterns",
775
+ "",
776
+ "### What they tend to ignore",
777
+ "_[Fill in manually]_",
778
+ "",
779
+ "### Dealbreakers",
780
+ "_[Fill in manually]_",
781
+ "",
782
+ "### Recurring questions / phrases"
783
+ ];
784
+ if (reviewObs.sampleComments && reviewObs.sampleComments.length > 0) {
785
+ const questions = reviewObs.sampleComments.filter((c) => c.endsWith("?"));
786
+ if (questions.length > 0) {
787
+ for (const q of questions.slice(0, 5)) {
788
+ const truncated = q.length > 150 ? q.slice(0, 150) + "..." : q;
789
+ lines.push(`- "${truncated}"`);
790
+ }
791
+ } else {
792
+ lines.push("_[Fill in from sample comments below]_");
793
+ }
794
+ } else {
795
+ lines.push("_[Fill in from sample comments below]_");
796
+ }
797
+ lines.push("", "---", "", "## Communication Style", "");
798
+ if (reviewObs.totalReviewComments != null) {
799
+ lines.push(
800
+ `- **Total review comments analysed**: ${reviewObs.totalReviewComments}`,
801
+ `- **Average comment length**: ${reviewObs.avgCommentLength ?? "N/A"} chars`,
802
+ `- **Tends to be brief**: ${reviewObs.tendsToBeBrief ?? "unknown"}`,
803
+ `- **Question ratio**: ${reviewObs.questionRatio ?? "N/A"} (proportion of comments phrased as questions)`
804
+ );
805
+ if (reviewObs.usesSeverityPrefixes && Object.keys(reviewObs.usesSeverityPrefixes).length > 0) {
806
+ const prefixes = Object.entries(reviewObs.usesSeverityPrefixes).map(([k, v]) => `${k} (${v})`).join(", ");
807
+ lines.push(`- **Severity prefixes used**: ${prefixes}`);
808
+ }
809
+ lines.push("");
810
+ }
811
+ lines.push(
812
+ "### Tone",
813
+ "_[Fill in manually \u2014 direct? warm? collaborative? terse?]_",
814
+ "",
815
+ "### Severity prefixes they use"
816
+ );
817
+ if (reviewObs.usesSeverityPrefixes && Object.keys(reviewObs.usesSeverityPrefixes).length > 0) {
818
+ lines.push(
819
+ Object.keys(reviewObs.usesSeverityPrefixes).join(", ")
820
+ );
821
+ } else {
822
+ lines.push("_[Derived from comments above \u2014 fill in which they actually use]_");
823
+ }
824
+ lines.push(
825
+ "",
826
+ "### Vocabulary / phrases they use",
827
+ "_[Fill in from sample comments below]_",
828
+ "",
829
+ "---",
830
+ "",
831
+ "## Code Patterns",
832
+ ""
833
+ );
834
+ if (gitObs.primaryExtensions) {
835
+ lines.push(
836
+ `**Primary file types**: ${formatPairs(gitObs.primaryExtensions)}`
837
+ );
838
+ }
839
+ if (gitObs.primaryDirectories) {
840
+ lines.push(
841
+ `**Primary codebase areas**: ${formatPairs(gitObs.primaryDirectories, "/")}`
842
+ );
843
+ }
844
+ if (gitObs.namingStyle) {
845
+ const ns = gitObs.namingStyle;
846
+ lines.push(
847
+ `**Naming style**: camelCase (${ns.camelCase}), PascalCase (${ns.PascalCase}), snake_case (${ns.snake_case})`
848
+ );
849
+ const dominant = dominantNamingStyle(ns);
850
+ if (dominant) lines.push(`> ${dominant}`);
851
+ }
852
+ if (gitObs.fileTypeProfile) {
853
+ const p = gitObs.fileTypeProfile;
854
+ const parts = [
855
+ p.components && `components (${p.components})`,
856
+ p.tests && `tests (${p.tests})`,
857
+ p.configs && `configs (${p.configs})`,
858
+ p.docs && `docs (${p.docs})`,
859
+ p.other && `other (${p.other})`
860
+ ].filter(Boolean);
861
+ if (parts.length > 0) {
862
+ lines.push(`**File type profile**: ${parts.join(", ")}`);
863
+ }
864
+ }
865
+ if (gitObs.primaryExtensions || gitObs.primaryDirectories || gitObs.namingStyle || gitObs.fileTypeProfile) {
866
+ lines.push("");
867
+ }
868
+ if (codeStyleObs.observations.length > 0) {
869
+ lines.push("### Detected Code Style Preferences");
870
+ if (codeStyleObs.primaryLanguage) {
871
+ lines.push(
872
+ `_Primary language: ${codeStyleObs.primaryLanguage} \xB7 ${codeStyleObs.totalLinesAnalyzed} added lines analyzed_`
873
+ );
874
+ }
875
+ lines.push("");
876
+ for (const obs of codeStyleObs.observations) {
877
+ if (obs.confidence === "strong") {
878
+ lines.push(`- **${obs.category}**: ${obs.observation}`);
879
+ } else {
880
+ lines.push(`- ${obs.category}: ${obs.observation}`);
881
+ }
882
+ }
883
+ lines.push("");
884
+ }
885
+ lines.push(
886
+ "### Patterns they introduce / prefer"
887
+ );
888
+ const namingInsight = gitObs.namingStyle ? dominantNamingStyle(gitObs.namingStyle) : null;
889
+ if (namingInsight) {
890
+ lines.push(`- ${namingInsight}`);
891
+ }
892
+ lines.push(
893
+ "_[Fill in manually from code inspection and review comment themes]_",
894
+ "",
895
+ "### Patterns they push back on",
896
+ "_[Fill in manually]_",
897
+ "",
898
+ "---",
899
+ "",
900
+ "## Known Blind Spots",
901
+ "_[Fill in manually \u2014 areas they historically under-comment on]_",
902
+ "",
903
+ "---",
904
+ "",
905
+ "## Example Review Comments",
906
+ "_These ground Claude's output in their actual voice. Aim for 5\u201310 verbatim examples._",
907
+ ""
908
+ );
909
+ if (reviewObs.sampleComments && reviewObs.sampleComments.length > 0) {
910
+ for (const comment of reviewObs.sampleComments) {
911
+ const truncated = comment.length > 300 ? comment.slice(0, 300) + "..." : comment;
912
+ lines.push(`> ${truncated}`, "");
913
+ }
914
+ }
915
+ lines.push("---", "", "## Commit Message Style", "");
916
+ if (gitObs.commitMessageAvgLength != null) {
917
+ lines.push(`- **Avg length**: ${gitObs.commitMessageAvgLength} chars`);
918
+ lines.push(
919
+ `- **Uses imperative mood**: ${gitObs.likelyUsesImperativeMood ?? "unknown"}`
920
+ );
921
+ if (gitObs.conventionalCommitPrefixes) {
922
+ const prefixStr = Object.entries(gitObs.conventionalCommitPrefixes).sort((a, b) => b[1] - a[1]).map(([k, v]) => `${k} (${v})`).join(", ");
923
+ lines.push(`- **Conventional commits**: yes \u2014 ${prefixStr}`);
924
+ if (gitObs.commitScopePatterns && gitObs.commitScopePatterns.length > 0) {
925
+ lines.push(
926
+ `- **Common scopes**: ${gitObs.commitScopePatterns.join(", ")}`
927
+ );
928
+ }
929
+ }
930
+ if (gitObs.commitMessageSample) {
931
+ lines.push("- **Sample messages**:");
932
+ for (const msg of gitObs.commitMessageSample) {
933
+ lines.push(` - \`${msg}\``);
934
+ }
935
+ }
936
+ lines.push("");
937
+ }
938
+ if (slackObs.sampleTechnicalMessages && slackObs.sampleTechnicalMessages.length > 0) {
939
+ lines.push("---", "", "## Slack / Chat Voice (technical messages)", "");
940
+ for (const msg of slackObs.sampleTechnicalMessages) {
941
+ const truncated = msg.length > 200 ? msg.slice(0, 200) + "..." : msg;
942
+ lines.push(`> ${truncated}`, "");
943
+ }
944
+ }
945
+ lines.push("---", "", "## Docs Authored", "");
946
+ if (docsSignals.authoredDocs.length > 0) {
947
+ for (const doc of docsSignals.authoredDocs) {
948
+ lines.push(`- \`${doc}\``);
949
+ }
950
+ } else {
951
+ lines.push("_None found or docs-dir not provided._");
952
+ }
953
+ lines.push(
954
+ "",
955
+ "---",
956
+ "",
957
+ "<!-- Generated by @poltergeist-ai/cli -->",
958
+ `<!-- Run date: ${today} -->`
959
+ );
960
+ return lines.join("\n");
961
+ }
962
+ export {
963
+ buildGhostMarkdown,
964
+ extractCodeStyleFromDiff,
965
+ extractDocsSignals,
966
+ extractGitHubSignals,
967
+ extractGitLabSignals,
968
+ extractGitSignals,
969
+ extractSlackSignals,
970
+ parseGitHubUrl,
971
+ slugify,
972
+ summariseCodeStyle,
973
+ summariseGit,
974
+ summariseReview as summariseGitHub,
975
+ summariseGitLab,
976
+ summariseReview,
977
+ summariseSlack
978
+ };