@oss-autopilot/core 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ import { paginateAll } from './pagination.js';
2
+ import { isBotAuthor } from './comment-utils.js';
3
+ import { parseGitHubUrl } from './urls.js';
4
+ import { ValidationError, errorMessage } from './errors.js';
5
+ import { debug, warn } from './logger.js';
6
+ const MODULE = 'pr-comments-fetcher';
7
+ /** Default concurrency for {@link fetchPRCommentBundlesBatch}. */
8
+ const DEFAULT_BATCH_CONCURRENCY = 3;
9
+ /**
10
+ * Fetch a single PR's comment bundle. Filters out the authenticated user's
11
+ * own comments and bots. Throws {@link ValidationError} on a non-PR URL.
12
+ */
13
+ export async function fetchPRCommentBundle(octokit, prUrl, githubUsername) {
14
+ const parsed = parseGitHubUrl(prUrl);
15
+ if (!parsed || parsed.type !== 'pull') {
16
+ throw new ValidationError(`Invalid PR URL: ${prUrl}`);
17
+ }
18
+ const { owner, repo, number: pull_number } = parsed;
19
+ const repoFull = `${owner}/${repo}`;
20
+ // Fetch the PR + all three comment streams in parallel. We always fetch
21
+ // every page — corpus quality depends on having every reviewer voice, not
22
+ // just the first 100 comments.
23
+ const [{ data: pr }, reviews, reviewComments, issueComments] = await Promise.all([
24
+ octokit.pulls.get({ owner, repo, pull_number }),
25
+ paginateAll((page) => octokit.pulls.listReviews({
26
+ owner,
27
+ repo,
28
+ pull_number,
29
+ per_page: 100,
30
+ page,
31
+ })),
32
+ paginateAll((page) => octokit.pulls.listReviewComments({
33
+ owner,
34
+ repo,
35
+ pull_number,
36
+ per_page: 100,
37
+ page,
38
+ })),
39
+ paginateAll((page) => octokit.issues.listComments({
40
+ owner,
41
+ repo,
42
+ issue_number: pull_number,
43
+ per_page: 100,
44
+ page,
45
+ })),
46
+ ]);
47
+ const ownLogin = githubUsername.toLowerCase();
48
+ /**
49
+ * Drop entries that aren't useful corpus: the user's own comments, bots,
50
+ * and entries with no author at all (deleted accounts surface as null
51
+ * user from GitHub's REST API).
52
+ */
53
+ const isWorthKeeping = (login) => {
54
+ if (!login)
55
+ return false;
56
+ if (login.toLowerCase() === ownLogin)
57
+ return false;
58
+ if (isBotAuthor(login))
59
+ return false;
60
+ return true;
61
+ };
62
+ const mergedAt = pr.merged_at ?? pr.closed_at ?? '';
63
+ return {
64
+ prUrl,
65
+ prTitle: pr.title,
66
+ repo: repoFull,
67
+ mergedAt,
68
+ reviews: reviews
69
+ .filter((r) => isWorthKeeping(r.user?.login))
70
+ .map((r) => ({
71
+ author: r.user?.login ?? '',
72
+ authorAssociation: r.author_association ?? 'NONE',
73
+ body: r.body ?? '',
74
+ submittedAt: r.submitted_at ?? '',
75
+ })),
76
+ reviewComments: reviewComments
77
+ .filter((c) => isWorthKeeping(c.user?.login))
78
+ .map((c) => ({
79
+ author: c.user?.login ?? '',
80
+ authorAssociation: c.author_association ?? 'NONE',
81
+ body: c.body ?? '',
82
+ path: c.path ?? '',
83
+ createdAt: c.created_at ?? '',
84
+ })),
85
+ issueComments: issueComments
86
+ .filter((c) => isWorthKeeping(c.user?.login))
87
+ .map((c) => ({
88
+ author: c.user?.login ?? '',
89
+ authorAssociation: c.author_association ?? 'NONE',
90
+ body: c.body ?? '',
91
+ createdAt: c.created_at ?? '',
92
+ })),
93
+ };
94
+ }
95
+ /**
96
+ * Fetch comment bundles for many PRs with a small concurrency cap (default 3).
97
+ *
98
+ * Failures on individual PRs are logged and skipped — the batch returns a
99
+ * shorter array rather than aborting. Rationale: extraction quality is
100
+ * already a partial-information problem (users contribute to many repos and
101
+ * many PRs), so a single 404 / rate limit on one PR should not deny the
102
+ * host the corpus from the other 4.
103
+ */
104
+ export async function fetchPRCommentBundlesBatch(octokit, prUrls, githubUsername, concurrency = DEFAULT_BATCH_CONCURRENCY) {
105
+ const results = [];
106
+ const queue = [...prUrls];
107
+ async function worker() {
108
+ while (queue.length > 0) {
109
+ const url = queue.shift();
110
+ if (!url)
111
+ return;
112
+ try {
113
+ const bundle = await fetchPRCommentBundle(octokit, url, githubUsername);
114
+ results.push(bundle);
115
+ }
116
+ catch (err) {
117
+ warn(MODULE, `Skipping ${url}: ${errorMessage(err)}`);
118
+ }
119
+ }
120
+ }
121
+ const workers = Array.from({ length: Math.min(concurrency, prUrls.length) }, worker);
122
+ await Promise.all(workers);
123
+ debug(MODULE, `Fetched ${results.length}/${prUrls.length} comment bundles`);
124
+ return results;
125
+ }
@@ -34,6 +34,12 @@ export declare function migrateV1ToV2(rawState: Record<string, unknown>): Record
34
34
  * New optional fields are handled by Zod defaults (undefined/optional).
35
35
  */
36
36
  export declare function migrateV2ToV3(rawState: Record<string, unknown>): Record<string, unknown>;
37
+ /**
38
+ * Migrate state from v3 to v4 (#867).
39
+ * Adds: commentsFetchedAt on StoredMergedPR / StoredClosedPR. The new field is
40
+ * optional, so no data transformation is needed — only the version bump.
41
+ */
42
+ export declare function migrateV3ToV4(rawState: Record<string, unknown>): Record<string, unknown>;
37
43
  /**
38
44
  * Create a fresh state (v3).
39
45
  * Leverages Zod schema defaults to produce a complete state.
@@ -158,12 +158,23 @@ export function migrateV2ToV3(rawState) {
158
158
  debug(MODULE, 'v2 to v3 migration complete.');
159
159
  return rawState;
160
160
  }
161
+ /**
162
+ * Migrate state from v3 to v4 (#867).
163
+ * Adds: commentsFetchedAt on StoredMergedPR / StoredClosedPR. The new field is
164
+ * optional, so no data transformation is needed — only the version bump.
165
+ */
166
+ export function migrateV3ToV4(rawState) {
167
+ debug(MODULE, 'Migrating state from v3 to v4 (add commentsFetchedAt to stored PR records)...');
168
+ rawState.version = 4;
169
+ debug(MODULE, 'v3 to v4 migration complete (no data transformation required).');
170
+ return rawState;
171
+ }
161
172
  /**
162
173
  * Create a fresh state (v3).
163
174
  * Leverages Zod schema defaults to produce a complete state.
164
175
  */
165
176
  export function createFreshState() {
166
- return AgentStateSchema.parse({ version: 3 });
177
+ return AgentStateSchema.parse({ version: 4 });
167
178
  }
168
179
  /**
169
180
  * Migrate state from legacy ./data/ location to ~/.oss-autopilot/.
@@ -273,6 +284,9 @@ function tryRestoreFromBackup() {
273
284
  if (raw.version === 2) {
274
285
  raw = migrateV2ToV3(raw);
275
286
  }
287
+ if (raw.version === 3) {
288
+ raw = migrateV3ToV4(raw);
289
+ }
276
290
  }
277
291
  const parsed = AgentStateSchema.safeParse(raw);
278
292
  if (parsed.success) {
@@ -313,7 +327,7 @@ export function loadState() {
313
327
  if (fs.existsSync(statePath)) {
314
328
  const data = fs.readFileSync(statePath, 'utf-8');
315
329
  let raw = JSON.parse(data);
316
- // Chain migrations: v1 → v2 → v3
330
+ // Chain migrations: v1 → v2 → v3 → v4
317
331
  let wasMigrated = false;
318
332
  if (typeof raw === 'object' && raw !== null) {
319
333
  const rawObj = raw;
@@ -325,6 +339,10 @@ export function loadState() {
325
339
  raw = migrateV2ToV3(raw);
326
340
  wasMigrated = true;
327
341
  }
342
+ if (raw.version === 3) {
343
+ raw = migrateV3ToV4(raw);
344
+ wasMigrated = true;
345
+ }
328
346
  }
329
347
  // Validate through Zod schema (strips unknown keys in memory; stale keys persist on disk until next save)
330
348
  const parsed = AgentStateSchema.safeParse(raw);
@@ -63,12 +63,14 @@ export declare const StoredMergedPRSchema: z.ZodObject<{
63
63
  url: z.ZodString;
64
64
  title: z.ZodString;
65
65
  mergedAt: z.ZodString;
66
+ commentsFetchedAt: z.ZodOptional<z.ZodString>;
66
67
  learningsExtractedAt: z.ZodOptional<z.ZodString>;
67
68
  }, z.core.$strip>;
68
69
  export declare const StoredClosedPRSchema: z.ZodObject<{
69
70
  url: z.ZodString;
70
71
  title: z.ZodString;
71
72
  closedAt: z.ZodString;
73
+ commentsFetchedAt: z.ZodOptional<z.ZodString>;
72
74
  learningsExtractedAt: z.ZodOptional<z.ZodString>;
73
75
  }, z.core.$strip>;
74
76
  export declare const AnalyzedIssueConversationSchema: z.ZodObject<{
@@ -324,7 +326,7 @@ export declare const DailyDigestSchema: z.ZodObject<{
324
326
  }, z.core.$strip>;
325
327
  }, z.core.$strip>;
326
328
  export declare const AgentStateSchema: z.ZodObject<{
327
- version: z.ZodLiteral<3>;
329
+ version: z.ZodLiteral<4>;
328
330
  gistId: z.ZodOptional<z.ZodString>;
329
331
  repoScores: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodObject<{
330
332
  repo: z.ZodString;
@@ -471,12 +473,14 @@ export declare const AgentStateSchema: z.ZodObject<{
471
473
  url: z.ZodString;
472
474
  title: z.ZodString;
473
475
  mergedAt: z.ZodString;
476
+ commentsFetchedAt: z.ZodOptional<z.ZodString>;
474
477
  learningsExtractedAt: z.ZodOptional<z.ZodString>;
475
478
  }, z.core.$strip>>>;
476
479
  closedPRs: z.ZodOptional<z.ZodArray<z.ZodObject<{
477
480
  url: z.ZodString;
478
481
  title: z.ZodString;
479
482
  closedAt: z.ZodString;
483
+ commentsFetchedAt: z.ZodOptional<z.ZodString>;
480
484
  learningsExtractedAt: z.ZodOptional<z.ZodString>;
481
485
  }, z.core.$strip>>>;
482
486
  analyzedIssueConversations: z.ZodOptional<z.ZodArray<z.ZodObject<{
@@ -44,12 +44,18 @@ export const StoredMergedPRSchema = z.object({
44
44
  url: z.string(),
45
45
  title: z.string(),
46
46
  mergedAt: z.string(),
47
+ /** When the raw review-comment bundle for this PR was last fetched (#867). */
48
+ commentsFetchedAt: z.string().optional(),
49
+ /** When the host last ran LLM extraction over this PR's comment bundle (#867). */
47
50
  learningsExtractedAt: z.string().optional(),
48
51
  });
49
52
  export const StoredClosedPRSchema = z.object({
50
53
  url: z.string(),
51
54
  title: z.string(),
52
55
  closedAt: z.string(),
56
+ /** When the raw review-comment bundle for this PR was last fetched (#867). */
57
+ commentsFetchedAt: z.string().optional(),
58
+ /** When the host last ran LLM extraction over this PR's comment bundle (#867). */
53
59
  learningsExtractedAt: z.string().optional(),
54
60
  });
55
61
  export const AnalyzedIssueConversationSchema = z.object({
@@ -211,7 +217,7 @@ export const DailyDigestSchema = z.object({
211
217
  });
212
218
  // ── 8. Root schema ───────────────────────────────────────────────────
213
219
  export const AgentStateSchema = z.object({
214
- version: z.literal(3),
220
+ version: z.literal(4),
215
221
  gistId: z.string().optional(),
216
222
  repoScores: z.record(z.string(), RepoScoreSchema).default({}),
217
223
  config: AgentConfigSchema.default(() => AgentConfigSchema.parse({})),
@@ -105,6 +105,29 @@ export declare class StateManager {
105
105
  isGistMode(): boolean;
106
106
  /** Whether the Gist is in degraded mode (using local cache fallback). */
107
107
  isGistDegraded(): boolean;
108
+ /**
109
+ * Whether per-repo guidelines (#867) are available. True iff the Gist store
110
+ * is initialized — in local-only mode, guidelines are unavailable and
111
+ * write operations would throw {@link GuidelinesNotAvailableError}.
112
+ */
113
+ isGuidelinesAvailable(): boolean;
114
+ /**
115
+ * Read the per-repo guidelines for `repo` (#867). Returns null when in
116
+ * local mode, when no file exists, or when the file is empty (tombstoned).
117
+ */
118
+ getGuidelines(repo: string): string | null;
119
+ /**
120
+ * Persist per-repo guidelines for `repo`. Throws when not in Gist mode or
121
+ * when content exceeds the byte budget.
122
+ */
123
+ setGuidelines(repo: string, content: string): void;
124
+ /**
125
+ * Tombstone the guidelines file for `repo` so subsequent reads return null.
126
+ * Throws when not in Gist mode.
127
+ */
128
+ deleteGuidelines(repo: string): void;
129
+ /** List repos with non-empty guidelines stored in the Gist. */
130
+ listGuidelinesRepos(): string[];
108
131
  /**
109
132
  * Get the current state as a read-only snapshot.
110
133
  */
@@ -176,6 +199,16 @@ export declare class StateManager {
176
199
  };
177
200
  /** Returns the most recent close date, used as a watermark for incremental fetching. */
178
201
  getClosedPRWatermark(): string | undefined;
202
+ /**
203
+ * Stamp `commentsFetchedAt` on the merged or closed PR matching `url` (#867).
204
+ * No-op when no PR with that URL is stored.
205
+ */
206
+ markPRCommentsFetched(url: string, fetchedAt: string): void;
207
+ /**
208
+ * Stamp `learningsExtractedAt` on the merged or closed PR matching `url` (#867).
209
+ * No-op when no PR with that URL is stored.
210
+ */
211
+ markPRLearningsExtracted(url: string, extractedAt: string): void;
179
212
  /**
180
213
  * Merge partial config updates into the current configuration.
181
214
  * @param config - Partial config object to merge
@@ -10,6 +10,7 @@ import * as repoScoring from './repo-score-manager.js';
10
10
  import { debug, warn } from './logger.js';
11
11
  import { errorMessage, ConfigurationError, ConcurrencyError } from './errors.js';
12
12
  import { GistStateStore } from './gist-state-store.js';
13
+ import * as guidelinesStoreModule from './guidelines-store.js';
13
14
  import { getStatePath, getStateCachePath } from './paths.js';
14
15
  import { parseGitHubUrl } from './urls.js';
15
16
  export { acquireLock, releaseLock, atomicWriteFileSync } from './state-persistence.js';
@@ -274,6 +275,41 @@ export class StateManager {
274
275
  isGistDegraded() {
275
276
  return this.gistDegraded;
276
277
  }
278
+ /**
279
+ * Whether per-repo guidelines (#867) are available. True iff the Gist store
280
+ * is initialized — in local-only mode, guidelines are unavailable and
281
+ * write operations would throw {@link GuidelinesNotAvailableError}.
282
+ */
283
+ isGuidelinesAvailable() {
284
+ return this.gistStore !== null;
285
+ }
286
+ /**
287
+ * Read the per-repo guidelines for `repo` (#867). Returns null when in
288
+ * local mode, when no file exists, or when the file is empty (tombstoned).
289
+ */
290
+ getGuidelines(repo) {
291
+ return guidelinesStoreModule.getGuidelines(this.gistStore, repo);
292
+ }
293
+ /**
294
+ * Persist per-repo guidelines for `repo`. Throws when not in Gist mode or
295
+ * when content exceeds the byte budget.
296
+ */
297
+ setGuidelines(repo, content) {
298
+ guidelinesStoreModule.setGuidelines(this.gistStore, repo, content);
299
+ this.autoSave();
300
+ }
301
+ /**
302
+ * Tombstone the guidelines file for `repo` so subsequent reads return null.
303
+ * Throws when not in Gist mode.
304
+ */
305
+ deleteGuidelines(repo) {
306
+ guidelinesStoreModule.deleteGuidelines(this.gistStore, repo);
307
+ this.autoSave();
308
+ }
309
+ /** List repos with non-empty guidelines stored in the Gist. */
310
+ listGuidelinesRepos() {
311
+ return guidelinesStoreModule.listGuidelinesRepos(this.gistStore);
312
+ }
277
313
  /**
278
314
  * Get the current state as a read-only snapshot.
279
315
  */
@@ -434,6 +470,40 @@ export class StateManager {
434
470
  getClosedPRWatermark() {
435
471
  return this.state.closedPRs?.[0]?.closedAt || undefined;
436
472
  }
473
+ /**
474
+ * Stamp `commentsFetchedAt` on the merged or closed PR matching `url` (#867).
475
+ * No-op when no PR with that URL is stored.
476
+ */
477
+ markPRCommentsFetched(url, fetchedAt) {
478
+ const merged = this.state.mergedPRs?.find((pr) => pr.url === url);
479
+ if (merged) {
480
+ merged.commentsFetchedAt = fetchedAt;
481
+ this.autoSave();
482
+ return;
483
+ }
484
+ const closed = this.state.closedPRs?.find((pr) => pr.url === url);
485
+ if (closed) {
486
+ closed.commentsFetchedAt = fetchedAt;
487
+ this.autoSave();
488
+ }
489
+ }
490
+ /**
491
+ * Stamp `learningsExtractedAt` on the merged or closed PR matching `url` (#867).
492
+ * No-op when no PR with that URL is stored.
493
+ */
494
+ markPRLearningsExtracted(url, extractedAt) {
495
+ const merged = this.state.mergedPRs?.find((pr) => pr.url === url);
496
+ if (merged) {
497
+ merged.learningsExtractedAt = extractedAt;
498
+ this.autoSave();
499
+ return;
500
+ }
501
+ const closed = this.state.closedPRs?.find((pr) => pr.url === url);
502
+ if (closed) {
503
+ closed.learningsExtractedAt = extractedAt;
504
+ this.autoSave();
505
+ }
506
+ }
437
507
  // === Configuration ===
438
508
  /**
439
509
  * Merge partial config updates into the current configuration.
@@ -233,7 +233,7 @@ interface CommentedIssueWithoutResponse extends CommentedIssueBase {
233
233
  export type CommentedIssue = CommentedIssueWithResponse | CommentedIssueWithoutResponse;
234
234
  /** Default configuration applied to new state files. All fields can be overridden via `/setup-oss`. */
235
235
  export declare const DEFAULT_CONFIG: AgentConfig;
236
- /** Initial state written to `~/.oss-autopilot/state.json` on first run. Uses v3 architecture. */
236
+ /** Initial state written to `~/.oss-autopilot/state.json` on first run. Uses v4 architecture. */
237
237
  export declare const INITIAL_STATE: AgentState;
238
238
  export declare const PROJECT_CATEGORIES: ("nonprofit" | "devtools" | "infrastructure" | "web-frameworks" | "data-ml" | "education")[];
239
239
  export declare const ISSUE_SCOPES: ("advanced" | "beginner" | "intermediate")[];
@@ -12,8 +12,8 @@ export function isBelowMinStars(stargazersCount, minStars) {
12
12
  // ── Schema-derived constants ─────────────────────────────────────────
13
13
  /** Default configuration applied to new state files. All fields can be overridden via `/setup-oss`. */
14
14
  export const DEFAULT_CONFIG = AgentConfigSchema.parse({});
15
- /** Initial state written to `~/.oss-autopilot/state.json` on first run. Uses v3 architecture. */
16
- export const INITIAL_STATE = AgentStateSchema.parse({ version: 3 });
15
+ /** Initial state written to `~/.oss-autopilot/state.json` on first run. Uses v4 architecture. */
16
+ export const INITIAL_STATE = AgentStateSchema.parse({ version: 4 });
17
17
  // ── Const arrays (derived from Zod schemas for runtime iteration) ────
18
18
  export const PROJECT_CATEGORIES = ProjectCategorySchema.options;
19
19
  export const ISSUE_SCOPES = IssueScopeSchema.options;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oss-autopilot/core",
3
- "version": "3.1.0",
3
+ "version": "3.2.0",
4
4
  "description": "CLI and core library for managing open source contributions",
5
5
  "type": "module",
6
6
  "bin": {