tryassay 0.29.0 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/dist/cli.js +21 -0
  2. package/dist/cli.js.map +1 -1
  3. package/dist/commands/catalog-push.d.ts +7 -0
  4. package/dist/commands/catalog-push.js +47 -0
  5. package/dist/commands/catalog-push.js.map +1 -0
  6. package/dist/commands/generate.js +1 -0
  7. package/dist/commands/generate.js.map +1 -1
  8. package/dist/commands/harvest.d.ts +9 -0
  9. package/dist/commands/harvest.js +76 -0
  10. package/dist/commands/harvest.js.map +1 -0
  11. package/dist/lib/__tests__/learned-rules.test.d.ts +1 -0
  12. package/dist/lib/__tests__/learned-rules.test.js +260 -0
  13. package/dist/lib/__tests__/learned-rules.test.js.map +1 -0
  14. package/dist/lib/__tests__/pr-harvester-types.test.d.ts +1 -0
  15. package/dist/lib/__tests__/pr-harvester-types.test.js +43 -0
  16. package/dist/lib/__tests__/pr-harvester-types.test.js.map +1 -0
  17. package/dist/lib/__tests__/pr-harvester.test.d.ts +1 -0
  18. package/dist/lib/__tests__/pr-harvester.test.js +341 -0
  19. package/dist/lib/__tests__/pr-harvester.test.js.map +1 -0
  20. package/dist/lib/__tests__/rule-harvester.test.d.ts +1 -0
  21. package/dist/lib/__tests__/rule-harvester.test.js +526 -0
  22. package/dist/lib/__tests__/rule-harvester.test.js.map +1 -0
  23. package/dist/lib/learned-rules/category-map.d.ts +28 -0
  24. package/dist/lib/learned-rules/category-map.js +110 -0
  25. package/dist/lib/learned-rules/category-map.js.map +1 -0
  26. package/dist/lib/learned-rules/index.d.ts +107 -0
  27. package/dist/lib/learned-rules/index.js +198 -0
  28. package/dist/lib/learned-rules/index.js.map +1 -0
  29. package/dist/lib/learned-rules/learned-catalog.d.ts +62 -0
  30. package/dist/lib/learned-rules/learned-catalog.js +161 -0
  31. package/dist/lib/learned-rules/learned-catalog.js.map +1 -0
  32. package/dist/lib/learned-rules/pattern-extractor.d.ts +25 -0
  33. package/dist/lib/learned-rules/pattern-extractor.js +351 -0
  34. package/dist/lib/learned-rules/pattern-extractor.js.map +1 -0
  35. package/dist/lib/learned-rules/rule-codifier.d.ts +41 -0
  36. package/dist/lib/learned-rules/rule-codifier.js +138 -0
  37. package/dist/lib/learned-rules/rule-codifier.js.map +1 -0
  38. package/dist/lib/learned-rules/starter-catalog.d.ts +16 -0
  39. package/dist/lib/learned-rules/starter-catalog.js +402 -0
  40. package/dist/lib/learned-rules/starter-catalog.js.map +1 -0
  41. package/dist/lib/learned-rules/types.d.ts +196 -0
  42. package/dist/lib/learned-rules/types.js +9 -0
  43. package/dist/lib/learned-rules/types.js.map +1 -0
  44. package/dist/lib/learned-rules/validation-harness.d.ts +26 -0
  45. package/dist/lib/learned-rules/validation-harness.js +260 -0
  46. package/dist/lib/learned-rules/validation-harness.js.map +1 -0
  47. package/dist/lib/rule-harvester/diff-parser.d.ts +9 -0
  48. package/dist/lib/rule-harvester/diff-parser.js +77 -0
  49. package/dist/lib/rule-harvester/diff-parser.js.map +1 -0
  50. package/dist/lib/rule-harvester/file-selector.d.ts +10 -0
  51. package/dist/lib/rule-harvester/file-selector.js +59 -0
  52. package/dist/lib/rule-harvester/file-selector.js.map +1 -0
  53. package/dist/lib/rule-harvester/ground-truth.d.ts +19 -0
  54. package/dist/lib/rule-harvester/ground-truth.js +156 -0
  55. package/dist/lib/rule-harvester/ground-truth.js.map +1 -0
  56. package/dist/lib/rule-harvester/harvest.d.ts +26 -0
  57. package/dist/lib/rule-harvester/harvest.js +307 -0
  58. package/dist/lib/rule-harvester/harvest.js.map +1 -0
  59. package/dist/lib/rule-harvester/pr-discovery.d.ts +49 -0
  60. package/dist/lib/rule-harvester/pr-discovery.js +168 -0
  61. package/dist/lib/rule-harvester/pr-discovery.js.map +1 -0
  62. package/dist/lib/rule-harvester/pr-harvest.d.ts +53 -0
  63. package/dist/lib/rule-harvester/pr-harvest.js +326 -0
  64. package/dist/lib/rule-harvester/pr-harvest.js.map +1 -0
  65. package/dist/lib/rule-harvester/progress.d.ts +13 -0
  66. package/dist/lib/rule-harvester/progress.js +50 -0
  67. package/dist/lib/rule-harvester/progress.js.map +1 -0
  68. package/dist/lib/rule-harvester/reporter.d.ts +35 -0
  69. package/dist/lib/rule-harvester/reporter.js +46 -0
  70. package/dist/lib/rule-harvester/reporter.js.map +1 -0
  71. package/dist/lib/rule-harvester/rule-generalizer.d.ts +25 -0
  72. package/dist/lib/rule-harvester/rule-generalizer.js +135 -0
  73. package/dist/lib/rule-harvester/rule-generalizer.js.map +1 -0
  74. package/dist/lib/rule-harvester/scanner.d.ts +20 -0
  75. package/dist/lib/rule-harvester/scanner.js +37 -0
  76. package/dist/lib/rule-harvester/scanner.js.map +1 -0
  77. package/dist/sdk/api-client.d.ts +65 -0
  78. package/dist/sdk/api-client.js +41 -0
  79. package/dist/sdk/api-client.js.map +1 -0
  80. package/dist/sdk/forward-verify.d.ts +3 -1
  81. package/dist/sdk/forward-verify.js +138 -5
  82. package/dist/sdk/forward-verify.js.map +1 -1
  83. package/dist/sdk/index.d.ts +1 -1
  84. package/dist/sdk/types.d.ts +21 -0
  85. package/package.json +1 -1
@@ -0,0 +1,307 @@
1
+ import { execSync } from 'node:child_process';
2
+ import { readFile, rm } from 'node:fs/promises';
3
+ import { readdirSync, readFileSync, statSync } from 'node:fs';
4
+ import { join, relative } from 'node:path';
5
+ import { selectFiles } from './file-selector.js';
6
+ import { scanFile } from './scanner.js';
7
+ import { parseConfirmation, runTscCheck } from './ground-truth.js';
8
+ import { loadProgress, saveProgress, getFileState, setFileState, initRepo, } from './progress.js';
9
+ import { createRunReport, saveRunReport } from './reporter.js';
10
+ import { learnFromFinding, getLearnedRulesSummary } from '../learned-rules/index.js';
11
+ // ── Helpers ───────────────────────────────────────────────────
12
+ function repoKey(repo) {
13
+ return `${repo.owner}/${repo.name}`;
14
+ }
15
+ function log(config, msg) {
16
+ if (config.onLog) {
17
+ config.onLog(msg);
18
+ }
19
+ else {
20
+ console.log(msg);
21
+ }
22
+ }
23
+ function cloneRepo(repo, targetDir) {
24
+ const url = `https://github.com/${repo.owner}/${repo.name}.git`;
25
+ try {
26
+ execSync(`git clone --depth 1 "${url}" "${targetDir}"`, {
27
+ timeout: 120_000,
28
+ stdio: 'pipe',
29
+ });
30
+ return true;
31
+ }
32
+ catch {
33
+ return false;
34
+ }
35
+ }
36
+ function installDeps(repoDir) {
37
+ try {
38
+ execSync('npm install --ignore-scripts', {
39
+ cwd: repoDir,
40
+ timeout: 180_000,
41
+ stdio: 'pipe',
42
+ });
43
+ return true;
44
+ }
45
+ catch {
46
+ return false;
47
+ }
48
+ }
49
+ function listTsFiles(dir, base) {
50
+ const root = base ?? dir;
51
+ const results = [];
52
+ const TS_EXTENSIONS = new Set(['.ts', '.tsx', '.js', '.jsx']);
53
+ const SKIP_DIRS = new Set(['node_modules', '.git', 'dist']);
54
+ let entries;
55
+ try {
56
+ entries = readdirSync(dir);
57
+ }
58
+ catch {
59
+ return results;
60
+ }
61
+ for (const entry of entries) {
62
+ if (SKIP_DIRS.has(entry))
63
+ continue;
64
+ const fullPath = join(dir, entry);
65
+ let stat;
66
+ try {
67
+ stat = statSync(fullPath);
68
+ }
69
+ catch {
70
+ continue;
71
+ }
72
+ if (stat.isDirectory()) {
73
+ results.push(...listTsFiles(fullPath, root));
74
+ }
75
+ else if (stat.isFile()) {
76
+ const ext = entry.slice(entry.lastIndexOf('.'));
77
+ if (!TS_EXTENSIONS.has(ext))
78
+ continue;
79
+ let content = '';
80
+ try {
81
+ content = readFileSync(fullPath, 'utf-8');
82
+ }
83
+ catch {
84
+ // If unreadable, estimate 0 lines
85
+ }
86
+ const lineCount = content ? content.split('\n').length : 0;
87
+ const relPath = relative(root, fullPath);
88
+ results.push({ path: relPath, lineCount });
89
+ }
90
+ }
91
+ return results;
92
+ }
93
+ // ── Main Pipeline ─────────────────────────────────────────────
94
+ export async function harvestRepos(config) {
95
+ const startTime = Date.now();
96
+ // 1. Load progress if resume=true
97
+ let progress = {};
98
+ if (config.resume) {
99
+ progress = await loadProgress(config.progressDir);
100
+ log(config, `Resumed progress: ${Object.keys(progress).length} repos tracked`);
101
+ }
102
+ // 2. Filter repos by repoFilter and limit
103
+ let repos = config.repoList;
104
+ if (config.repoFilter && config.repoFilter.length > 0) {
105
+ const filterSet = new Set(config.repoFilter);
106
+ repos = repos.filter((r) => filterSet.has(repoKey(r)));
107
+ }
108
+ if (config.limit !== undefined && config.limit > 0) {
109
+ repos = repos.slice(0, config.limit);
110
+ }
111
+ log(config, `Processing ${repos.length} repos`);
112
+ const allFileResults = [];
113
+ for (const repo of repos) {
114
+ const key = repoKey(repo);
115
+ // 3. Skip completed repos on resume
116
+ if (config.resume) {
117
+ const repoProgress = progress[key];
118
+ if (repoProgress?.status === 'complete') {
119
+ log(config, `[SKIP] ${key} — already complete`);
120
+ continue;
121
+ }
122
+ }
123
+ log(config, `[START] ${key}`);
124
+ const repoDir = join(config.workDir, repo.name);
125
+ // 4a. Clone
126
+ const cloneOk = cloneRepo(repo, repoDir);
127
+ if (!cloneOk) {
128
+ log(config, `[FAIL] ${key} — clone failed`);
129
+ continue;
130
+ }
131
+ log(config, `[CLONED] ${key}`);
132
+ // 4b. Install deps (best-effort)
133
+ const installOk = installDeps(repoDir);
134
+ if (!installOk) {
135
+ log(config, `[WARN] ${key} — npm install failed (continuing)`);
136
+ }
137
+ // 4c. Run tsc baseline
138
+ const tscResult = runTscCheck(repoDir);
139
+ const tscOutput = tscResult?.errors ?? null;
140
+ log(config, `[TSC] ${key} — exit ${tscResult?.exitCode ?? 'null'}, ${tscOutput?.length ?? 0} chars`);
141
+ // 4d. List all .ts/.tsx/.js/.jsx files
142
+ const allFiles = listTsFiles(repoDir);
143
+ log(config, `[FILES] ${key} — ${allFiles.length} files found`);
144
+ // 4e. Select files
145
+ const selectedFiles = selectFiles(allFiles);
146
+ log(config, `[SELECTED] ${key} — ${selectedFiles.length} files to scan`);
147
+ // Initialize repo in progress
148
+ progress = initRepo(progress, key);
149
+ // 4f. Scan each selected file
150
+ for (const selectedFile of selectedFiles) {
151
+ const absFilePath = join(repoDir, selectedFile.path);
152
+ const fileState = getFileState(progress, key, selectedFile.path);
153
+ // Skip already-learned/confirmed files on resume
154
+ if (config.resume && (fileState === 'learned' || fileState === 'confirmed')) {
155
+ log(config, ` [SKIP] ${selectedFile.path} — ${fileState}`);
156
+ continue;
157
+ }
158
+ const fileStart = Date.now();
159
+ const result = {
160
+ repo: key,
161
+ filePath: selectedFile.path,
162
+ claimsExtracted: 0,
163
+ confirmed: 0,
164
+ rulesLearned: 0,
165
+ rulesRejected: 0,
166
+ rulesDuplicate: 0,
167
+ unmatchedCategories: [],
168
+ durationMs: 0,
169
+ };
170
+ // Read file content
171
+ let code;
172
+ try {
173
+ code = await readFile(absFilePath, 'utf-8');
174
+ }
175
+ catch (err) {
176
+ log(config, ` [WARN] ${selectedFile.path} — unreadable: ${String(err)}`);
177
+ continue;
178
+ }
179
+ // Scan with timeout (clear timer on success to avoid accumulating pending timers)
180
+ let scanOutput = null;
181
+ let timer;
182
+ try {
183
+ const scanPromise = scanFile(code, 'typescript');
184
+ const timeoutPromise = new Promise((_, reject) => {
185
+ timer = setTimeout(() => reject(new Error('scanFile timeout')), 1_200_000);
186
+ });
187
+ scanOutput = await Promise.race([scanPromise, timeoutPromise]);
188
+ }
189
+ catch (err) {
190
+ log(config, ` [SCAN_ERR] ${selectedFile.path} — ${String(err)}`);
191
+ progress = setFileState(progress, key, selectedFile.path, 'scanned');
192
+ await saveProgress(config.progressDir, progress);
193
+ result.durationMs = Date.now() - fileStart;
194
+ allFileResults.push(result);
195
+ continue;
196
+ }
197
+ finally {
198
+ if (timer)
199
+ clearTimeout(timer);
200
+ }
201
+ result.claimsExtracted = scanOutput.result.totalClaims;
202
+ // Update progress to scanned
203
+ progress = setFileState(progress, key, selectedFile.path, 'scanned');
204
+ // For each FAIL/PARTIAL finding, confirm and learn
205
+ for (const failure of scanOutput.result.failures) {
206
+ const claimForConfirmation = {
207
+ claimId: failure.claimId,
208
+ category: failure.category,
209
+ description: failure.description,
210
+ verdict: 'FAIL',
211
+ };
212
+ const groundTruth = parseConfirmation(code, claimForConfirmation, tscOutput, selectedFile.path);
213
+ if (!groundTruth.confirmed)
214
+ continue;
215
+ result.confirmed += 1;
216
+ progress = setFileState(progress, key, selectedFile.path, 'confirmed');
217
+ // Coerce severity: low → medium (ExtractedPattern.severity doesn't include 'low')
218
+ const rawSeverity = failure.severity;
219
+ const coercedSeverity = rawSeverity === 'critical' || rawSeverity === 'high' || rawSeverity === 'medium'
220
+ ? rawSeverity
221
+ : 'medium';
222
+ const extractionInput = {
223
+ claim: {
224
+ id: failure.claimId,
225
+ category: failure.category,
226
+ severity: coercedSeverity,
227
+ description: failure.description,
228
+ assertion: failure.assertion,
229
+ },
230
+ verification: {
231
+ verdict: 'FAIL',
232
+ reasoning: failure.reasoning,
233
+ },
234
+ code,
235
+ language: 'typescript',
236
+ filePath: selectedFile.path,
237
+ };
238
+ let learnResult;
239
+ try {
240
+ learnResult = await learnFromFinding(config.catalogPath, extractionInput);
241
+ }
242
+ catch (err) {
243
+ log(config, ` [LEARN_ERR] ${selectedFile.path}:${failure.claimId} — ${String(err)}`);
244
+ result.rulesRejected += 1;
245
+ continue;
246
+ }
247
+ if (learnResult.learned) {
248
+ result.rulesLearned += 1;
249
+ progress = setFileState(progress, key, selectedFile.path, 'learned');
250
+ }
251
+ else {
252
+ const reason = learnResult.reason ?? '';
253
+ if (reason.includes('No extraction strategy')) {
254
+ result.unmatchedCategories.push(failure.category);
255
+ }
256
+ else if (reason.includes('duplicate')) {
257
+ result.rulesDuplicate += 1;
258
+ }
259
+ else {
260
+ result.rulesRejected += 1;
261
+ }
262
+ }
263
+ }
264
+ result.durationMs = Date.now() - fileStart;
265
+ allFileResults.push(result);
266
+ // Save after each file (Ctrl+C safe)
267
+ await saveProgress(config.progressDir, progress);
268
+ log(config, ` [FILE] ${selectedFile.path} — ${result.claimsExtracted} claims, ${result.confirmed} confirmed, ${result.rulesLearned} learned (${result.durationMs}ms)`);
269
+ }
270
+ // 4g. Delete clone
271
+ try {
272
+ await rm(repoDir, { recursive: true, force: true });
273
+ log(config, `[CLEANED] ${key}`);
274
+ }
275
+ catch (err) {
276
+ log(config, `[WARN] ${key} — rm failed: ${String(err)}`);
277
+ }
278
+ log(config, `[DONE] ${key}`);
279
+ }
280
+ // 5. Generate and save run report
281
+ const catalogSummary = await getLearnedRulesSummary(config.catalogPath);
282
+ const catalogSize = {
283
+ total: catalogSummary.total,
284
+ promoted: catalogSummary.promoted,
285
+ rejected: catalogSummary.rejected,
286
+ };
287
+ const report = createRunReport(allFileResults, config.model, catalogSize);
288
+ const reportPath = await saveRunReport(config.runsDir, report);
289
+ // 6. Print summary
290
+ const totalDurationMin = ((Date.now() - startTime) / 60_000).toFixed(1);
291
+ log(config, '');
292
+ log(config, '══════════════════════════════════════');
293
+ log(config, ' HARVEST COMPLETE');
294
+ log(config, '══════════════════════════════════════');
295
+ log(config, ` Repos scanned: ${report.reposScanned}`);
296
+ log(config, ` Files scanned: ${report.filesScanned}`);
297
+ log(config, ` Claims extracted: ${report.claimsExtracted}`);
298
+ log(config, ` Findings confirmed: ${report.findingsConfirmed}`);
299
+ log(config, ` Rules learned: ${report.rulesLearned}`);
300
+ log(config, ` Rules rejected: ${report.rulesRejected}`);
301
+ log(config, ` Rules duplicate: ${report.rulesDuplicate}`);
302
+ log(config, ` Catalog total: ${catalogSize.total} (${catalogSize.promoted} promoted)`);
303
+ log(config, ` Duration: ${totalDurationMin}m`);
304
+ log(config, ` Report: ${reportPath}`);
305
+ log(config, '══════════════════════════════════════');
306
+ }
307
+ //# sourceMappingURL=harvest.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"harvest.js","sourceRoot":"","sources":["../../../src/lib/rule-harvester/harvest.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE3C,OAAO,EAAE,WAAW,EAAsB,MAAM,oBAAoB,CAAC;AACrE,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACnE,OAAO,EACL,YAAY,EACZ,YAAY,EACZ,YAAY,EACZ,YAAY,EACZ,QAAQ,GAET,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,eAAe,EAAE,aAAa,EAAuB,MAAM,eAAe,CAAC;AACpF,OAAO,EAAE,gBAAgB,EAAE,sBAAsB,EAAE,MAAM,2BAA2B,CAAC;AA0BrF,iEAAiE;AAEjE,SAAS,OAAO,CAAC,IAAqC;IACpD,OAAO,GAAG,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;AACtC,CAAC;AAED,SAAS,GAAG,CAAC,MAAqB,EAAE,GAAW;IAC7C,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QACjB,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACpB,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC;AACH,CAAC;AAED,SAAS,SAAS,CAChB,IAAqC,EACrC,SAAiB;IAEjB,MAAM,GAAG,GAAG,sBAAsB,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,MAAM,CAAC;IAChE,IAAI,CAAC;QACH,QAAQ,CAAC,wBAAwB,GAAG,MAAM,SAAS,GAAG,EAAE;YACtD,OAAO,EAAE,OAAO;YAChB,KAAK,EAAE,MAAM;SACd,CAAC,CAAC;QACH,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAAC,OAAe;IAClC,IAAI,CAAC;QACH,QAAQ,CAAC,8BAA8B,EAAE;YACvC,GAAG,EAAE,OAAO;YACZ,OAAO,EAAE,OAAO;YAChB,KAAK,EAAE,MAAM;SACd,CAAC,CAAC;QACH,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAAC,GAAW,EAAE,IAAa;IAC7C,MAAM,IAAI,GAAG,IAAI,IAAI,GAAG,CAAC;IACzB,MAAM,OAAO,GAAoB,EAAE,CAAC;IACpC,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;IAC9D,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,CAAC,cAAc,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAE5D,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACH,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC;YAAE,SAAS;QAEnC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAClC,IAAI,IAAI,CAAC;QACT,IAAI,CAAC;YACH,IAAI,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;QAED,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;YACvB,OAAO,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC;QAC/C,CAAC;aAAM,IAAI,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;YACzB,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;YAChD,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,SAAS;YAEtC,IAAI,OAAO,GAAG,EAAE,CAAC;YACjB,IAAI,CAAC;gBACH,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAC5C,CAAC;YAAC,MAAM,CAAC;gBACP,kCAAkC;YACpC,CAAC;YAED,MAAM,SAAS,GAAG,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YAC3D,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;YAEzC,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,iEAAiE;AAEjE,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,MAAqB;IACtD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,kCAAkC;IAClC,IAAI,QAAQ,GAAoB,EAAE,CAAC;IACnC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QAClB,QAAQ,GAAG,MAAM,YAAY,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;QAClD,GAAG,CAAC,MAAM,EAAE,qBAAqB,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,MAAM,gBAAgB,CAAC,CAAC;IACjF,CAAC;IAED,0CAA0C;IAC1C,IAAI,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC;IAE5B,IAAI,MAAM,CAAC,UAAU,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtD,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAC7C,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACzD,CAAC;IAED,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;QACnD,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;IACvC,CAAC;IAED,GAAG,CAAC,MAAM,EAAE,cAAc,KAAK,CAAC,MAAM,QAAQ,CAAC,CAAC;IAEhD,MAAM,cAAc,GAAqB,EAAE,CAAC;IAE5C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;QAE1B,oCAAoC;QACpC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClB,MAAM,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;YACnC,IAAI,YAAY,EAAE,MAAM,KAAK,UAAU,EAAE,CAAC;gBACxC,GAAG,CAAC,MAAM,EAAE,UAAU,GAAG,qBAAqB,CAAC,CAAC;gBAChD,SAAS;YACX,CAAC;QACH,CAAC;QAED,GAAG,CAAC,MAAM,EAAE,WAAW,GAAG,EAAE,CAAC,CAAC;QAE9B,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;QAEhD,YAAY;QACZ,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACzC,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,GAAG,CAAC,MAAM,EAAE,UAAU,GAAG,iBAAiB,CAAC,CAAC;YAC5C,SAAS;QACX,CAAC;QAED,GAAG,CAAC,MAAM,EAAE,YAAY,GAAG,EAAE,CAAC,CAAC;QAE/B,iCAAiC;QACjC,MAAM,SAAS,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;QACvC,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,GAAG,CAAC,MAAM,EAAE,UAAU,GAAG,oCAAoC,CAAC,CAAC;QACjE,CAAC;QAED,uBAAuB;QACvB,MAAM,SAAS,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;QACvC,MAAM,SAAS,GAAG,SAAS,EAAE,MAAM,IAAI,IAAI,CAAC;QAC5C,GAAG,CACD,MAAM,EACN,SAAS,GAAG,WAAW,SAAS,EAAE,QAAQ,IAAI,MAAM,KAAK,SAAS,EAAE,MAAM,IAAI,CAAC,QAAQ,CACxF,CAAC;QAEF,uCAAuC;QACvC,MAAM,QAAQ,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;QACtC,GAAG,CAAC,MAAM,EAAE,WAAW,GAAG,MAAM,QAAQ,CAAC,MAAM,cAAc,CAAC,CAAC;QAE/D,mBAAmB;QACnB,MAAM,aAAa,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC5C,GAAG,CAAC,MAAM,EAAE,cAAc,GAAG,MAAM,aAAa,CAAC,MAAM,gBAAgB,CAAC,CAAC;QAEzE,8BAA8B;QAC9B,QAAQ,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAEnC,8BAA8B;QAC9B,KAAK,MAAM,YAAY,IAAI,aAAa,EAAE,CAAC;YACzC,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,CAAC;YACrD,MAAM,SAAS,GAAG,YAAY,CAAC,QAAQ,EAAE,GAAG,EAAE,YAAY,CAAC,IAAI,CAAC,CAAC;YAEjE,iDAAiD;YACjD,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,SAAS,KAAK,SAAS,IAAI,SAAS,KAAK,WAAW,CAAC,EAAE,CAAC;gBAC5E,GAAG,CAAC,MAAM,EAAE,YAAY,YAAY,CAAC,IAAI,MAAM,SAAS,EAAE,CAAC,CAAC;gBAC5D,SAAS;YACX,CAAC;YAED,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC7B,MAAM,MAAM,GAAmB;gBAC7B,IAAI,EAAE,GAAG;gBACT,QAAQ,EAAE,YAAY,CAAC,IAAI;gBAC3B,eAAe,EAAE,CAAC;gBAClB,SAAS,EAAE,CAAC;gBACZ,YAAY,EAAE,CAAC;gBACf,aAAa,EAAE,CAAC;gBAChB,cAAc,EAAE,CAAC;gBACjB,mBAAmB,EAAE,EAAE;gBACvB,UAAU,EAAE,CAAC;aACd,CAAC;YAEF,oBAAoB;YACpB,IAAI,IAAY,CAAC;YACjB,IAAI,CAAC;gBACH,IAAI,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;YAC9C,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,GAAG,CAAC,MAAM,EAAE,YAAY,YAAY,CAAC,IAAI,kBAAkB,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBAC1E,SAAS;YACX,CAAC;YAED,kFAAkF;YAClF,IAAI,UAAU,GAAgD,IAAI,CAAC;YACnE,IAAI,KAAgD,CAAC;YACrD,IAAI,CAAC;gBACH,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;gBACjD,MAAM,cAAc,GAAG,IAAI,OAAO,CAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE;oBACtD,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,kBAAkB,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;gBAC7E,CAAC,CAAC,CAAC;gBACH,UAAU,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC,CAAC;YACjE,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,GAAG,CAAC,MAAM,EAAE,gBAAgB,YAAY,CAAC,IAAI,MAAM,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBAClE,QAAQ,GAAG,YAAY,CAAC,QAAQ,EAAE,GAAG,EAAE,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;gBACrE,MAAM,YAAY,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;gBACjD,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;gBAC3C,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC5B,SAAS;YACX,CAAC;oBAAS,CAAC;gBACT,IAAI,KAAK;oBAAE,YAAY,CAAC,KAAK,CAAC,CAAC;YACjC,CAAC;YAED,MAAM,CAAC,eAAe,GAAG,UAAU,CAAC,MAAM,CAAC,WAAW,CAAC;YAEvD,6BAA6B;YAC7B,QAAQ,GAAG,YAAY,CAAC,QAAQ,EAAE,GAAG,EAAE,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAErE,mDAAmD;YACnD,KAAK,MAAM,OAAO,IAAI,UAAU,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;gBACjD,MAAM,oBAAoB,GAAG;oBAC3B,OAAO,EAAE,OAAO,CAAC,OAAO;oBACxB,QAAQ,EAAE,OAAO,CAAC,QAAQ;oBAC1B,WAAW,EAAE,OAAO,CAAC,WAAW;oBAChC,OAAO,EAAE,MAAM;iBAChB,CAAC;gBAEF,MAAM,WAAW,GAAG,iBAAiB,CACnC,IAAI,EACJ,oBAAoB,EACpB,SAAS,EACT,YAAY,CAAC,IAAI,CAClB,CAAC;gBAEF,IAAI,CAAC,WAAW,CAAC,SAAS;oBAAE,SAAS;gBAErC,MAAM,CAAC,SAAS,IAAI,CAAC,CAAC;gBACtB,QAAQ,GAAG,YAAY,CAAC,QAAQ,EAAE,GAAG,EAAE,YAAY,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;gBAEvE,kFAAkF;gBAClF,MAAM,WAAW,GAAG,OAAO,CAAC,QAAkB,CAAC;gBAC/C,MAAM,eAAe,GACnB,WAAW,KAAK,UAAU,IAAI,WAAW,KAAK,MAAM,IAAI,WAAW,KAAK,QAAQ;oBAC9E,CAAC,CAAC,WAAW;oBACb,CAAC,CAAC,QAAQ,CAAC;gBAEf,MAAM,eAAe,GAAG;oBACtB,KAAK,EAAE;wBACL,EAAE,EAAE,OAAO,CAAC,OAAO;wBACnB,QAAQ,EAAE,OAAO,CAAC,QAAQ;wBAC1B,QAAQ,EAAE,eAAe;wBACzB,WAAW,EAAE,OAAO,CAAC,WAAW;wBAChC,SAAS,EAAE,OAAO,CAAC,SAAS;qBAC7B;oBACD,YAAY,EAAE;wBACZ,OAAO,EAAE,MAAe;wBACxB,SAAS,EAAE,OAAO,CAAC,SAAS;qBAC7B;oBACD,IAAI;oBACJ,QAAQ,EAAE,YAAY;oBACtB,QAAQ,EAAE,YAAY,CAAC,IAAI;iBAC5B,CAAC;gBAEF,IAAI,WAAW,CAAC;gBAChB,IAAI,CAAC;oBACH,WAAW,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,WAAW,EAAE,eAAe,CAAC,CAAC;gBAC5E,CAAC;gBAAC,OAAO,GAAG,EAAE,CAAC;oBACb,GAAG,CAAC,MAAM,EAAE,iBAAiB,YAAY,CAAC,IAAI,IAAI,OAAO,CAAC,OAAO,MAAM,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;oBACtF,MAAM,CAAC,aAAa,IAAI,CAAC,CAAC;oBAC1B,SAAS;gBACX,CAAC;gBAED,IAAI,WAAW,CAAC,OAAO,EAAE,CAAC;oBACxB,MAAM,CAAC,YAAY,IAAI,CAAC,CAAC;oBACzB,QAAQ,GAAG,YAAY,CAAC,QAAQ,EAAE,GAAG,EAAE,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;gBACvE,CAAC;qBAAM,CAAC;oBACN,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,IAAI,EAAE,CAAC;oBACxC,IAAI,MAAM,CAAC,QAAQ,CAAC,wBAAwB,CAAC,EAAE,CAAC;wBAC9C,MAAM,CAAC,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;oBACpD,CAAC;yBAAM,IAAI,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;wBACxC,MAAM,CAAC,cAAc,IAAI,CAAC,CAAC;oBAC7B,CAAC;yBAAM,CAAC;wBACN,MAAM,CAAC,aAAa,IAAI,CAAC,CAAC;oBAC5B,CAAC;gBACH,CAAC;YACH,CAAC;YAED,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YAC3C,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAE5B,qCAAqC;YACrC,MAAM,YAAY,CAAC,MAAM,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;YAEjD,GAAG,CACD,MAAM,EACN,YAAY,YAAY,CAAC,IAAI,MAAM,MAAM,CAAC,eAAe,YAAY,MAAM,CAAC,SAAS,eAAe,MAAM,CAAC,YAAY,aAAa,MAAM,CAAC,UAAU,KAAK,CAC3J,CAAC;QACJ,CAAC;QAED,mBAAmB;QACnB,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;YACpD,GAAG,CAAC,MAAM,EAAE,aAAa,GAAG,EAAE,CAAC,CAAC;QAClC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,MAAM,EAAE,UAAU,GAAG,iBAAiB,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,GAAG,CAAC,MAAM,EAAE,UAAU,GAAG,EAAE,CAAC,CAAC;IAC/B,CAAC;IAED,kCAAkC;IAClC,MAAM,cAAc,GAAG,MAAM,sBAAsB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;IACxE,MAAM,WAAW,GAAG;QAClB,KAAK,EAAE,cAAc,CAAC,KAAK;QAC3B,QAAQ,EAAE,cAAc,CAAC,QAAQ;QACjC,QAAQ,EAAE,cAAc,CAAC,QAAQ;KAClC,CAAC;IAEF,MAAM,MAAM,GAAG,eAAe,CAAC,cAAc,EAAE,MAAM,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;IAC1E,MAAM,UAAU,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAE/D,mBAAmB;IACnB,MAAM,gBAAgB,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAExE,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAChB,GAAG,CAAC,MAAM,EAAE,wCAAwC,CAAC,CAAC;IACtD,GAAG,CAAC,MAAM,EAAE,oBAAoB,CAAC,CAAC;IAClC,GAAG,CAAC,MAAM,EAAE,wCAAwC,CAAC,CAAC;IACtD,GAAG,CAAC,MAAM,EAAE,yBAAyB,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;IAC5D,GAAG,CAAC,MAAM,EAAE,yBAAyB,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;IAC5D,GAAG,CAAC,MAAM,EAAE,yBAAyB,MAAM,CAAC,eAAe,EAAE,CAAC,CAAC;IAC/D,GAAG,CAAC,MAAM,EAAE,yBAAyB,MAAM,CAAC,iBAAiB,EAAE,CAAC,CAAC;IACjE,GAAG,CAAC,MAAM,EAAE,yBAAyB,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;IAC5D,GAAG,CAAC,MAAM,EAAE,yBAAyB,MAAM,CAAC,aAAa,EAAE,CAAC,CAAC;IAC7D,GAAG,CAAC,MAAM,EAAE,yBAAyB,MAAM,CAAC,cAAc,EAAE,CAAC,CAAC;IAC9D,GAAG,CAAC,MAAM,EAAE,yBAAyB,WAAW,CAAC,KAAK,KAAK,WAAW,CAAC,QAAQ,YAAY,CAAC,CAAC;IAC7F,GAAG,CAAC,MAAM,EAAE,yBAAyB,gBAAgB,GAAG,CAAC,CAAC;IAC1D,GAAG,CAAC,MAAM,EAAE,yBAAyB,UAAU,EAAE,CAAC,CAAC;IACnD,GAAG,CAAC,MAAM,EAAE,wCAAwC,CAAC,CAAC;AACxD,CAAC"}
@@ -0,0 +1,49 @@
1
+ import type { DiscoveredPR, ParsedPRDiff } from '../learned-rules/types.js';
2
+ /** Default labels that indicate a bug-fix PR. Matched case-insensitively as substrings. */
3
+ export declare const DEFAULT_BUG_LABELS: readonly string[];
4
+ /**
5
+ * Returns true if any label in the array contains a bug-related keyword.
6
+ * Matches case-insensitively. Accepts optional custom bug label list.
7
+ */
8
+ export declare function matchesBugLabels(labels: string[], bugLabels?: readonly string[]): boolean;
9
+ /**
10
+ * Returns true if the PR title matches common bug-fix prefixes.
11
+ * Looks for fix/bug/resolve/patch at the start or after a conventional commit prefix.
12
+ */
13
+ export declare function matchesBugTitle(title: string): boolean;
14
+ /**
15
+ * Returns true if the file path is a TypeScript/JavaScript source file
16
+ * that is NOT a test file or declaration file.
17
+ */
18
+ export declare function isTargetFile(path: string): boolean;
19
+ interface DiscoveryConfig {
20
+ /** Max PRs to fetch per page (GitHub max: 100). */
21
+ readonly perPage?: number;
22
+ /** Max pages to paginate through. */
23
+ readonly maxPages?: number;
24
+ /** Custom bug labels to match. Defaults to DEFAULT_BUG_LABELS. */
25
+ readonly bugLabels?: readonly string[];
26
+ /** Whether to fall back to title matching if no labels match. */
27
+ readonly titleFallback?: boolean;
28
+ /** Whether to fetch review comments for each PR. */
29
+ readonly fetchComments?: boolean;
30
+ }
31
+ /**
32
+ * Discover merged bug-fix PRs from a GitHub repo.
33
+ *
34
+ * Flow:
35
+ * 1. Fetch closed PRs (paginated)
36
+ * 2. Filter to merged-only (merged_at !== null)
37
+ * 3. Filter by bug labels (with optional title fallback)
38
+ * 4. Optionally enrich with review comments
39
+ * 5. Collect changed file paths
40
+ */
41
+ export declare function discoverBugFixPRs(owner: string, name: string, config?: DiscoveryConfig): DiscoveredPR[];
42
+ /**
43
+ * Fetch and parse diffs for a discovered PR.
44
+ *
45
+ * Calls the files endpoint, filters to target files,
46
+ * and parses each file's patch into typed DiffHunks.
47
+ */
48
+ export declare function fetchPRDiffs(owner: string, name: string, pr: DiscoveredPR): ParsedPRDiff;
49
+ export {};
@@ -0,0 +1,168 @@
1
+ import { execSync } from 'node:child_process';
2
+ import { parsePatch } from './diff-parser.js';
3
+ // ── Constants ──────────────────────────────────────────────────
4
+ /** Default labels that indicate a bug-fix PR. Matched case-insensitively as substrings. */
5
+ export const DEFAULT_BUG_LABELS = [
6
+ 'bug',
7
+ 'fix',
8
+ 'bugfix',
9
+ 'hotfix',
10
+ 'security',
11
+ 'patch',
12
+ 'defect',
13
+ 'regression',
14
+ ];
15
+ /** File extensions we mine patterns from. */
16
+ const TARGET_EXTENSIONS = /\.(tsx?|jsx?|py)$/;
17
+ /** Paths that indicate test files — excluded from mining. */
18
+ const TEST_PATH_PATTERNS = /\.(test|spec)\.[tj]sx?$|__tests__[/\\]|tests?[/\\]|_test\.py$|test_[^/\\]+\.py$/;
19
+ /** Declaration files — excluded from mining. */
20
+ const DECLARATION_PATTERN = /\.d\.ts$|\.pyi$/;
21
+ // ── Pure Filter Functions (unit-testable) ──────────────────────
22
+ /**
23
+ * Returns true if any label in the array contains a bug-related keyword.
24
+ * Matches case-insensitively. Accepts optional custom bug label list.
25
+ */
26
+ export function matchesBugLabels(labels, bugLabels = DEFAULT_BUG_LABELS) {
27
+ const lower = labels.map((l) => l.toLowerCase());
28
+ return lower.some((label) => bugLabels.some((keyword) => label.includes(keyword)));
29
+ }
30
+ /**
31
+ * Returns true if the PR title matches common bug-fix prefixes.
32
+ * Looks for fix/bug/resolve/patch at the start or after a conventional commit prefix.
33
+ */
34
+ export function matchesBugTitle(title) {
35
+ return /\b(fix|bug|resolve|hotfix|patch)\b/i.test(title);
36
+ }
37
+ /**
38
+ * Returns true if the file path is a TypeScript/JavaScript source file
39
+ * that is NOT a test file or declaration file.
40
+ */
41
+ export function isTargetFile(path) {
42
+ if (!TARGET_EXTENSIONS.test(path))
43
+ return false;
44
+ if (TEST_PATH_PATTERNS.test(path))
45
+ return false;
46
+ if (DECLARATION_PATTERN.test(path))
47
+ return false;
48
+ return true;
49
+ }
50
+ /**
51
+ * Call the `gh api` CLI and return parsed JSON.
52
+ * Relies on gh's built-in auth and rate-limit handling.
53
+ */
54
+ function ghApi(endpoint) {
55
+ const raw = execSync(`gh api "${endpoint}" --paginate`, {
56
+ encoding: 'utf-8',
57
+ maxBuffer: 50 * 1024 * 1024, // 50 MB — large repos can have big diffs
58
+ timeout: 60_000,
59
+ });
60
+ return JSON.parse(raw);
61
+ }
62
+ /**
63
+ * Fetch a single page from the GitHub API (no --paginate).
64
+ */
65
+ function ghApiPage(endpoint) {
66
+ const raw = execSync(`gh api "${endpoint}"`, {
67
+ encoding: 'utf-8',
68
+ maxBuffer: 50 * 1024 * 1024,
69
+ timeout: 60_000,
70
+ });
71
+ return JSON.parse(raw);
72
+ }
73
+ /**
74
+ * Discover merged bug-fix PRs from a GitHub repo.
75
+ *
76
+ * Flow:
77
+ * 1. Fetch closed PRs (paginated)
78
+ * 2. Filter to merged-only (merged_at !== null)
79
+ * 3. Filter by bug labels (with optional title fallback)
80
+ * 4. Optionally enrich with review comments
81
+ * 5. Collect changed file paths
82
+ */
83
+ export function discoverBugFixPRs(owner, name, config = {}) {
84
+ const { perPage = 30, maxPages = 3, bugLabels = DEFAULT_BUG_LABELS, titleFallback = true, fetchComments = true, } = config;
85
+ const discovered = [];
86
+ for (let page = 1; page <= maxPages; page++) {
87
+ const endpoint = `/repos/${owner}/${name}/pulls?state=closed&sort=updated&direction=desc&per_page=${perPage}&page=${page}`;
88
+ let prs;
89
+ try {
90
+ prs = ghApiPage(endpoint);
91
+ }
92
+ catch {
93
+ // If the API call fails (auth, network, etc.), stop pagination
94
+ break;
95
+ }
96
+ if (!prs || prs.length === 0)
97
+ break;
98
+ for (const pr of prs) {
99
+ // Only merged PRs
100
+ if (!pr.merged_at || !pr.merge_commit_sha)
101
+ continue;
102
+ const labelNames = pr.labels.map((l) => l.name);
103
+ const hasBugLabel = matchesBugLabels(labelNames, bugLabels);
104
+ const hasBugTitle = titleFallback && matchesBugTitle(pr.title);
105
+ if (!hasBugLabel && !hasBugTitle)
106
+ continue;
107
+ // Fetch file list for this PR
108
+ let files = [];
109
+ try {
110
+ const prFiles = ghApi(`/repos/${owner}/${name}/pulls/${pr.number}/files`);
111
+ files = prFiles.map((f) => f.filename);
112
+ }
113
+ catch {
114
+ // If we can't get files, still include the PR with empty files
115
+ }
116
+ // Fetch review comments if requested
117
+ let reviewComments = [];
118
+ if (fetchComments) {
119
+ try {
120
+ const comments = ghApi(`/repos/${owner}/${name}/pulls/${pr.number}/comments`);
121
+ reviewComments = comments.map((c) => ({
122
+ body: c.body,
123
+ path: c.path,
124
+ line: c.line ?? c.original_line ?? null,
125
+ }));
126
+ }
127
+ catch {
128
+ // Review comments are optional enrichment
129
+ }
130
+ }
131
+ discovered.push({
132
+ repo: `${owner}/${name}`,
133
+ prNumber: pr.number,
134
+ title: pr.title,
135
+ labels: labelNames,
136
+ mergeCommit: pr.merge_commit_sha,
137
+ reviewComments,
138
+ files,
139
+ });
140
+ }
141
+ }
142
+ return discovered;
143
+ }
144
+ /**
145
+ * Fetch and parse diffs for a discovered PR.
146
+ *
147
+ * Calls the files endpoint, filters to target files,
148
+ * and parses each file's patch into typed DiffHunks.
149
+ */
150
+ export function fetchPRDiffs(owner, name, pr) {
151
+ let allHunks = [];
152
+ try {
153
+ const files = ghApi(`/repos/${owner}/${name}/pulls/${pr.prNumber}/files`);
154
+ for (const file of files) {
155
+ if (!isTargetFile(file.filename))
156
+ continue;
157
+ if (!file.patch)
158
+ continue;
159
+ const hunks = parsePatch(file.filename, file.patch);
160
+ allHunks = allHunks.concat(hunks);
161
+ }
162
+ }
163
+ catch {
164
+ // Return empty hunks if API call fails
165
+ }
166
+ return { pr, hunks: allHunks };
167
+ }
168
+ //# sourceMappingURL=pr-discovery.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pr-discovery.js","sourceRoot":"","sources":["../../../src/lib/rule-harvester/pr-discovery.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAE9C,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAE9C,kEAAkE;AAElE,2FAA2F;AAC3F,MAAM,CAAC,MAAM,kBAAkB,GAAsB;IACnD,KAAK;IACL,KAAK;IACL,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,OAAO;IACP,QAAQ;IACR,YAAY;CACJ,CAAC;AAEX,6CAA6C;AAC7C,MAAM,iBAAiB,GAAG,mBAAmB,CAAC;AAE9C,6DAA6D;AAC7D,MAAM,kBAAkB,GAAG,iFAAiF,CAAC;AAE7G,gDAAgD;AAChD,MAAM,mBAAmB,GAAG,iBAAiB,CAAC;AAE9C,kEAAkE;AAElE;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAC9B,MAAgB,EAChB,YAA+B,kBAAkB;IAEjD,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;IACjD,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAC1B,SAAS,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CACrD,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,KAAa;IAC3C,OAAO,qCAAqC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AAC3D,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAChD,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAChD,IAAI,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IACjD,OAAO,IAAI,CAAC;AACd,CAAC;AAyCD;;;GAGG;AACH,SAAS,KAAK,CAAI,QAAgB;IAChC,MAAM,GAAG,GAAG,QAAQ,CAAC,WAAW,QAAQ,cAAc,EAAE;QACtD,QAAQ,EAAE,OAAO;QACjB,SAAS,EAAE,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,yCAAyC;QACtE,OAAO,EAAE,MAAM;KAChB,CAAC,CAAC;IACH,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAM,CAAC;AAC9B,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAI,QAAgB;IACpC,MAAM,GAAG,GAAG,QAAQ,CAAC,WAAW,QAAQ,GAAG,EAAE;QAC3C,QAAQ,EAAE,OAAO;QACjB,SAAS,EAAE,EAAE,GAAG,IAAI,GAAG,IAAI;QAC3B,OAAO,EAAE,MAAM;KAChB,CAAC,CAAC;IACH,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAM,CAAC;AAC9B,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,iBAAiB,CAC/B,KAAa,EACb,IAAY,EACZ,SAA0B,EAAE;IAE5B,MAAM,EACJ,OAAO,GAAG,EAAE,EACZ,QAAQ,GAAG,CAAC,EACZ,SAAS,GAAG,kBAAkB,EAC9B,aAAa,GAAG,IAAI,EACpB,aAAa,GAAG,IAAI,GACrB,GAAG,MAAM,CAAC;IAEX,MAAM,UAAU,GAAmB,EAAE,CAAC;IAEtC,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,IAAI,QAAQ,EAAE,IAAI,EAAE,EAAE,CAAC;QAC5C,MAAM,QAAQ,GACZ,UAAU,KAAK,IAAI,IAAI,4DAA4D,OAAO,SAAS,IAAI,EAAE,CAAC;QAE5G,IAAI,GAAe,CAAC;QACpB,IAAI,CAAC;YACH,GAAG,GAAG,SAAS,CAAa,QAAQ,CAAC,CAAC;QACxC,CAAC;QAAC,MAAM,CAAC;YACP,+DAA+D;YAC/D,MAAM;QACR,CAAC;QAED,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM;QAEpC,KAAK,MAAM,EAAE,IAAI,GAAG,EAAE,CAAC;YACrB,kBAAkB;YAClB,IAAI,CAAC,EAAE,CAAC,SAAS,IAAI,CAAC,EAAE,CAAC,gBAAgB;gBAAE,SAAS;YAEpD,MAAM,UAAU,GAAG,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAChD,MAAM,WAAW,GAAG,gBAAgB,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;YAC5D,MAAM,WAAW,GAAG,aAAa,IAAI,eAAe,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC;YAE/D,IAAI,CAAC,WAAW,IAAI,CAAC,WAAW;gBAAE,SAAS;YAE3C,8BAA8B;YAC9B,IAAI,KAAK,GAAa,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,KAAK,CACnB,UAAU,KAAK,IAAI,IAAI,UAAU,EAAE,CAAC,MAAM,QAAQ,CACnD,CAAC;gBACF,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;YACzC,CAAC;YAAC,MAAM,CAAC;gBACP,+DAA+D;YACjE,CAAC;YAED,qCAAqC;YACrC,IAAI,cAAc,GAAsB,EAAE,CAAC;YAC3C,IAAI,aAAa,EAAE,CAAC;gBAClB,IAAI,CAAC;oBACH,MAAM,QAAQ,GAAG,KAAK,CACpB,UAAU,KAAK,IAAI,IAAI,UAAU,EAAE,CAAC,MAAM,WAAW,CACtD,CAAC;oBACF,cAAc,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;wBACpC,IAAI,EAAE,CAAC,CAAC,IAAI;wBACZ,IAAI,EAAE,CAAC,CAAC,IAAI;wBACZ,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,aAAa,IAAI,IAAI;qBACxC,CAAC,CAAC,CAAC;gBACN,CAAC;gBAAC,MAAM,CAAC;oBACP,0CAA0C;gBAC5C,CAAC;YACH,CAAC;YAED,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,GAAG,KAAK,IAAI,IAAI,EAAE;gBACxB,QAAQ,EAAE,EAAE,CAAC,MAAM;gBACnB,KAAK,EAAE,EAAE,CAAC,KAAK;gBACf,MAAM,EAAE,UAAU;gBAClB,WAAW,EAAE,EAAE,CAAC,gBAAgB;gBAChC,cAAc;gBACd,KAAK;aACN,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAC1B,KAAa,EACb,IAAY,EACZ,EAAgB;IAEhB,IAAI,QAAQ,GAAe,EAAE,CAAC;IAE9B,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,KAAK,CACjB,UAAU,KAAK,IAAI,IAAI,UAAU,EAAE,CAAC,QAAQ,QAAQ,CACrD,CAAC;QAEF,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC;gBAAE,SAAS;YAC3C,IAAI,CAAC,IAAI,CAAC,KAAK;gBAAE,SAAS;YAE1B,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;YACpD,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACpC,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,uCAAuC;IACzC,CAAC;IAED,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACjC,CAAC"}
@@ -0,0 +1,53 @@
1
+ /**
2
+ * PR Harvest Orchestrator — ties discovery -> extraction -> generalization -> catalog
3
+ * into a complete pipeline for mining bug-fix PRs from public GitHub repos.
4
+ *
5
+ * PR-sourced rules are validated through the validation harness before promotion.
6
+ * Fuzzy deduplication via Dice coefficient prevents near-duplicate rules.
7
+ */
8
+ import type { LearnedRule } from '../learned-rules/types.js';
9
+ /** Per-repo set of processed PR numbers. */
10
+ export type PRProgress = Record<string, number[]>;
11
+ export declare function createPRProgress(): PRProgress;
12
+ export declare function markPRProcessed(progress: PRProgress, repo: string, prNumber: number): PRProgress;
13
+ export declare function isPRProcessed(progress: PRProgress, repo: string, prNumber: number): boolean;
14
+ export declare function getPRProgressStats(progress: PRProgress, repo: string): {
15
+ processedPRs: number;
16
+ };
17
+ export declare function loadPRProgress(dir: string): Promise<PRProgress>;
18
+ export declare function savePRProgress(dir: string, progress: PRProgress): Promise<void>;
19
+ /**
20
+ * Dice coefficient (bigram overlap) for fuzzy string similarity.
21
+ * Returns a value between 0 (no overlap) and 1 (identical).
22
+ */
23
+ export declare function diceCoefficient(a: string, b: string): number;
24
+ /**
25
+ * Find the most similar existing rule by description, if above threshold.
26
+ */
27
+ export declare function findFuzzyDuplicate(existingRules: LearnedRule[], description: string, category: string): LearnedRule | null;
28
+ export interface PRHarvestConfig {
29
+ /** List of repos to mine PRs from. */
30
+ repoList: Array<{
31
+ owner: string;
32
+ name: string;
33
+ }>;
34
+ /** Assay project root where rules go. */
35
+ catalogPath: string;
36
+ /** Where pr-progress.json lives. */
37
+ progressDir: string;
38
+ /** Where run reports go. */
39
+ runsDir: string;
40
+ /** Model name for reporting. */
41
+ model: string;
42
+ /** Max repos to process. */
43
+ limit?: number;
44
+ /** Only scan these repos (owner/name format). */
45
+ repoFilter?: string[];
46
+ /** Skip already-processed PRs. */
47
+ resume?: boolean;
48
+ /** Max PRs per repo. */
49
+ maxPRsPerRepo?: number;
50
+ /** Logging callback. */
51
+ onLog?: (msg: string) => void;
52
+ }
53
+ export declare function harvestPRs(config: PRHarvestConfig): Promise<void>;