@oss-autopilot/core 3.1.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-registry.js +113 -3
- package/dist/cli.bundle.cjs +96 -92
- package/dist/commands/check-integration.js +8 -8
- package/dist/commands/comments.js +3 -0
- package/dist/commands/config.js +14 -7
- package/dist/commands/daily-render.js +10 -5
- package/dist/commands/daily.js +6 -1
- package/dist/commands/dashboard-lifecycle.js +1 -1
- package/dist/commands/dashboard-process.js +4 -4
- package/dist/commands/dashboard-server.js +7 -6
- package/dist/commands/dashboard.js +2 -2
- package/dist/commands/detect-formatters.js +3 -3
- package/dist/commands/doctor.js +5 -5
- package/dist/commands/guidelines.d.ts +67 -0
- package/dist/commands/guidelines.js +159 -0
- package/dist/commands/index.d.ts +9 -0
- package/dist/commands/index.js +9 -0
- package/dist/commands/list-move-tier.js +5 -5
- package/dist/commands/local-repos.js +9 -9
- package/dist/commands/parse-list.js +10 -10
- package/dist/commands/scout-bridge.js +2 -2
- package/dist/commands/setup.js +24 -13
- package/dist/commands/skip-add.js +6 -3
- package/dist/commands/skip-file-parser.js +3 -3
- package/dist/commands/startup.js +11 -8
- package/dist/commands/state-cmd.js +1 -1
- package/dist/commands/status.js +7 -0
- package/dist/commands/validation.js +3 -3
- package/dist/commands/vet-list.js +12 -8
- package/dist/commands/vet.js +1 -2
- package/dist/core/__fixtures__/prompt-injection-payloads.d.ts +22 -0
- package/dist/core/__fixtures__/prompt-injection-payloads.js +109 -0
- package/dist/core/anti-llm-policy.js +5 -5
- package/dist/core/auth.js +5 -5
- package/dist/core/daily-logic.js +8 -4
- package/dist/core/dates.js +3 -3
- package/dist/core/errors.d.ts +29 -0
- package/dist/core/errors.js +63 -0
- package/dist/core/formatter-detection.js +9 -9
- package/dist/core/gist-state-store.d.ts +19 -3
- package/dist/core/gist-state-store.js +81 -15
- package/dist/core/guidelines-store.d.ts +74 -0
- package/dist/core/guidelines-store.js +130 -0
- package/dist/core/http-cache.js +6 -6
- package/dist/core/index.d.ts +2 -0
- package/dist/core/index.js +2 -0
- package/dist/core/issue-conversation.js +3 -1
- package/dist/core/paths.js +4 -4
- package/dist/core/pr-comments-fetcher.d.ts +67 -0
- package/dist/core/pr-comments-fetcher.js +125 -0
- package/dist/core/pr-monitor.js +1 -2
- package/dist/core/pr-template.js +1 -1
- package/dist/core/state-persistence.d.ts +6 -0
- package/dist/core/state-persistence.js +27 -9
- package/dist/core/state-schema.d.ts +5 -1
- package/dist/core/state-schema.js +7 -1
- package/dist/core/state.d.ts +60 -0
- package/dist/core/state.js +136 -13
- package/dist/core/types.d.ts +1 -1
- package/dist/core/types.js +2 -2
- package/dist/core/untrusted-content.d.ts +48 -0
- package/dist/core/untrusted-content.js +106 -0
- package/dist/core/urls.js +2 -2
- package/dist/formatters/json.d.ts +53 -3
- package/dist/formatters/json.js +49 -14
- package/package.json +1 -1
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import { OssAutopilotError } from './errors.js';
|
|
2
|
+
/** Filename prefix shared by every guidelines file in the Gist. */
|
|
3
|
+
export const GUIDELINES_FILE_PREFIX = 'guidelines--';
|
|
4
|
+
/** Hard byte budget for a single guidelines file (#867 design log §1). */
|
|
5
|
+
export const GUIDELINES_MAX_BYTES = 8192;
|
|
6
|
+
/** Suffix appended to the filename so it renders as markdown in Gist. */
|
|
7
|
+
const GUIDELINES_FILE_SUFFIX = '.md';
|
|
8
|
+
/**
|
|
9
|
+
* Convert an `owner/repo` pair into the filename used inside the Gist.
|
|
10
|
+
* Slashes are escaped as `--` so the filename is filesystem-safe and
|
|
11
|
+
* unambiguous when parsing back to a repo string.
|
|
12
|
+
*/
|
|
13
|
+
export function guidelinesFilename(repo) {
|
|
14
|
+
if (!repo.includes('/')) {
|
|
15
|
+
throw new OssAutopilotError(`Invalid repo identifier "${repo}". Expected "owner/repo" format.`, 'INVALID_REPO_ID');
|
|
16
|
+
}
|
|
17
|
+
// GitHub forbids `/` in owner and repo, so the only `/` is the separator.
|
|
18
|
+
const [owner, name] = repo.split('/');
|
|
19
|
+
return `${GUIDELINES_FILE_PREFIX}${owner}--${name}${GUIDELINES_FILE_SUFFIX}`;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Inverse of {@link guidelinesFilename}. Returns null when the filename
|
|
23
|
+
* doesn't match the guidelines convention.
|
|
24
|
+
*/
|
|
25
|
+
export function repoFromGuidelinesFilename(filename) {
|
|
26
|
+
if (!filename.startsWith(GUIDELINES_FILE_PREFIX))
|
|
27
|
+
return null;
|
|
28
|
+
if (!filename.endsWith(GUIDELINES_FILE_SUFFIX))
|
|
29
|
+
return null;
|
|
30
|
+
const middle = filename.slice(GUIDELINES_FILE_PREFIX.length, filename.length - GUIDELINES_FILE_SUFFIX.length);
|
|
31
|
+
// Only split on the FIRST `--` separator. Repo names with `--` are rare
|
|
32
|
+
// but legal; owner names cannot contain `--` per GitHub username rules.
|
|
33
|
+
const sep = middle.indexOf('--');
|
|
34
|
+
if (sep === -1)
|
|
35
|
+
return null;
|
|
36
|
+
const owner = middle.slice(0, sep);
|
|
37
|
+
const name = middle.slice(sep + 2);
|
|
38
|
+
if (!owner || !name)
|
|
39
|
+
return null;
|
|
40
|
+
return `${owner}/${name}`;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Thrown by {@link setGuidelines} / {@link deleteGuidelines} when the
|
|
44
|
+
* StateManager is not in Gist mode. Catch + degrade gracefully when surfacing
|
|
45
|
+
* to user-facing flows: per-repo guidelines simply aren't available without a
|
|
46
|
+
* Gist to store them in.
|
|
47
|
+
*/
|
|
48
|
+
export class GuidelinesNotAvailableError extends OssAutopilotError {
|
|
49
|
+
constructor(message) {
|
|
50
|
+
super(message ??
|
|
51
|
+
'Per-repo guidelines require Gist persistence. Run `oss-autopilot setup` to enable Gist sync, then retry.', 'GUIDELINES_NOT_AVAILABLE');
|
|
52
|
+
this.name = 'GuidelinesNotAvailableError';
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Thrown by {@link setGuidelines} when content exceeds {@link GUIDELINES_MAX_BYTES}.
|
|
57
|
+
* Surfaced separately from generic validation errors so consumers can prompt the
|
|
58
|
+
* user with a "trim or split" UX rather than a generic shape rejection.
|
|
59
|
+
*/
|
|
60
|
+
export class GuidelinesTooLargeError extends OssAutopilotError {
|
|
61
|
+
constructor(byteSize, max = GUIDELINES_MAX_BYTES) {
|
|
62
|
+
super(`Guidelines content is ${byteSize} bytes, exceeding the ${max}-byte cap. ` + `Trim or split across categories.`, 'GUIDELINES_TOO_LARGE');
|
|
63
|
+
this.name = 'GuidelinesTooLargeError';
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Read the guidelines file for a repo from the Gist cache. Returns null when
|
|
68
|
+
* the store is not in Gist mode, the file does not exist, or the file is
|
|
69
|
+
* present but empty (treated as a tombstone left by {@link deleteGuidelines}).
|
|
70
|
+
*/
|
|
71
|
+
export function getGuidelines(store, repo) {
|
|
72
|
+
if (!store)
|
|
73
|
+
return null;
|
|
74
|
+
const content = store.getDocument(guidelinesFilename(repo));
|
|
75
|
+
if (content === null || content === '')
|
|
76
|
+
return null;
|
|
77
|
+
return content;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Write or replace the guidelines file for a repo. Throws if the store is not
|
|
81
|
+
* in Gist mode or the content exceeds the byte budget.
|
|
82
|
+
*/
|
|
83
|
+
export function setGuidelines(store, repo, content) {
|
|
84
|
+
if (!store)
|
|
85
|
+
throw new GuidelinesNotAvailableError();
|
|
86
|
+
const byteSize = Buffer.byteLength(content, 'utf8');
|
|
87
|
+
if (byteSize > GUIDELINES_MAX_BYTES) {
|
|
88
|
+
throw new GuidelinesTooLargeError(byteSize);
|
|
89
|
+
}
|
|
90
|
+
store.setDocument(guidelinesFilename(repo), content);
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Delete the guidelines file for a repo. No-op if the file doesn't exist.
|
|
94
|
+
* Implementation: write an empty string. The Gist API treats files with
|
|
95
|
+
* empty content as deletions on the next push, matching the existing
|
|
96
|
+
* single-source-of-truth model.
|
|
97
|
+
*/
|
|
98
|
+
export function deleteGuidelines(store, repo) {
|
|
99
|
+
if (!store)
|
|
100
|
+
throw new GuidelinesNotAvailableError();
|
|
101
|
+
// setDocument('') is interpreted as deletion; the GistStateStore push path
|
|
102
|
+
// already strips empty-content files before sending to the Gist API.
|
|
103
|
+
store.setDocument(guidelinesFilename(repo), '');
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* List every repo (as `owner/repo`) that has a non-empty guidelines file in
|
|
107
|
+
* the cache. Tombstoned (empty-content) files are excluded so the result
|
|
108
|
+
* matches what {@link getGuidelines} would actually return.
|
|
109
|
+
*
|
|
110
|
+
* Returns an empty array when the store is null or no files exist.
|
|
111
|
+
*/
|
|
112
|
+
export function listGuidelinesRepos(store) {
|
|
113
|
+
if (!store)
|
|
114
|
+
return [];
|
|
115
|
+
const filenames = store.listDocuments(GUIDELINES_FILE_PREFIX);
|
|
116
|
+
const repos = [];
|
|
117
|
+
for (const filename of filenames) {
|
|
118
|
+
const repo = repoFromGuidelinesFilename(filename);
|
|
119
|
+
// Skip files we can't decode (e.g. older formats, hand-edited) — better
|
|
120
|
+
// than throwing and breaking listGuidelinesRepos for everyone.
|
|
121
|
+
if (!repo)
|
|
122
|
+
continue;
|
|
123
|
+
// Skip tombstones — empty content means the user deleted these guidelines.
|
|
124
|
+
const content = store.getDocument(filename);
|
|
125
|
+
if (content === null || content === '')
|
|
126
|
+
continue;
|
|
127
|
+
repos.push(repo);
|
|
128
|
+
}
|
|
129
|
+
return repos.sort();
|
|
130
|
+
}
|
package/dist/core/http-cache.js
CHANGED
|
@@ -9,9 +9,9 @@
|
|
|
9
9
|
* for the same endpoint (e.g., star counts for two PRs in the same repo)
|
|
10
10
|
* share a single HTTP round-trip.
|
|
11
11
|
*/
|
|
12
|
-
import * as fs from 'fs';
|
|
13
|
-
import * as path from 'path';
|
|
14
|
-
import * as crypto from 'crypto';
|
|
12
|
+
import * as fs from 'node:fs';
|
|
13
|
+
import * as path from 'node:path';
|
|
14
|
+
import * as crypto from 'node:crypto';
|
|
15
15
|
import { getCacheDir } from './paths.js';
|
|
16
16
|
import { debug } from './logger.js';
|
|
17
17
|
import { getHttpStatusCode } from './errors.js';
|
|
@@ -76,7 +76,7 @@ export class HttpCache {
|
|
|
76
76
|
get(url) {
|
|
77
77
|
const filePath = this.pathFor(url);
|
|
78
78
|
try {
|
|
79
|
-
const raw = fs.readFileSync(filePath, '
|
|
79
|
+
const raw = fs.readFileSync(filePath, 'utf8');
|
|
80
80
|
const entry = JSON.parse(raw);
|
|
81
81
|
// Sanity-check: the file should contain the URL we asked for
|
|
82
82
|
if (entry.url !== url) {
|
|
@@ -100,7 +100,7 @@ export class HttpCache {
|
|
|
100
100
|
cachedAt: new Date().toISOString(),
|
|
101
101
|
};
|
|
102
102
|
try {
|
|
103
|
-
fs.writeFileSync(this.pathFor(url), JSON.stringify(entry), { encoding: '
|
|
103
|
+
fs.writeFileSync(this.pathFor(url), JSON.stringify(entry), { encoding: 'utf8', mode: 0o600 });
|
|
104
104
|
debug(MODULE, `Cached response for ${url}`);
|
|
105
105
|
// Best-effort size cap (#1057 M27). Runs after each write rather than on
|
|
106
106
|
// a schedule so long-lived sessions can't accumulate past the cap.
|
|
@@ -191,7 +191,7 @@ export class HttpCache {
|
|
|
191
191
|
continue;
|
|
192
192
|
const filePath = path.join(this.cacheDir, file);
|
|
193
193
|
try {
|
|
194
|
-
const raw = fs.readFileSync(filePath, '
|
|
194
|
+
const raw = fs.readFileSync(filePath, 'utf8');
|
|
195
195
|
const entry = JSON.parse(raw);
|
|
196
196
|
const age = now - new Date(entry.cachedAt).getTime();
|
|
197
197
|
if (age > maxAgeMs) {
|
package/dist/core/index.d.ts
CHANGED
|
@@ -4,9 +4,11 @@
|
|
|
4
4
|
*/
|
|
5
5
|
export { StateManager, getStateManager, getStateManagerAsync, ensureGistPersistence, maybeCheckpoint, resetStateManager, type Stats, } from './state.js';
|
|
6
6
|
export { GistStateStore } from './gist-state-store.js';
|
|
7
|
+
export { guidelinesFilename, repoFromGuidelinesFilename, GUIDELINES_FILE_PREFIX, GUIDELINES_MAX_BYTES, GuidelinesNotAvailableError, GuidelinesTooLargeError, } from './guidelines-store.js';
|
|
7
8
|
export { PRMonitor, type PRCheckFailure, type FetchPRsResult, computeDisplayLabel, classifyCICheck, classifyFailingChecks, } from './pr-monitor.js';
|
|
8
9
|
export { IssueConversationMonitor } from './issue-conversation.js';
|
|
9
10
|
export { isBotAuthor, isAcknowledgmentComment } from './comment-utils.js';
|
|
11
|
+
export { wrapUntrustedContent, extractFromFence, UNTRUSTED_OPEN_TAG_NAME, UNTRUSTED_CLOSE_TAG, type UntrustedContentMeta, } from './untrusted-content.js';
|
|
10
12
|
export { getOctokit, checkRateLimit, type RateLimitInfo } from './github.js';
|
|
11
13
|
export { parseGitHubUrl, splitRepo, isOwnRepo } from './urls.js';
|
|
12
14
|
export { daysBetween, formatRelativeTime, byDateDescending } from './dates.js';
|
package/dist/core/index.js
CHANGED
|
@@ -4,10 +4,12 @@
|
|
|
4
4
|
*/
|
|
5
5
|
export { StateManager, getStateManager, getStateManagerAsync, ensureGistPersistence, maybeCheckpoint, resetStateManager, } from './state.js';
|
|
6
6
|
export { GistStateStore } from './gist-state-store.js';
|
|
7
|
+
export { guidelinesFilename, repoFromGuidelinesFilename, GUIDELINES_FILE_PREFIX, GUIDELINES_MAX_BYTES, GuidelinesNotAvailableError, GuidelinesTooLargeError, } from './guidelines-store.js';
|
|
7
8
|
export { PRMonitor, computeDisplayLabel, classifyCICheck, classifyFailingChecks, } from './pr-monitor.js';
|
|
8
9
|
// Search/vetting now delegated to @oss-scout/core via commands/scout-bridge.ts
|
|
9
10
|
export { IssueConversationMonitor } from './issue-conversation.js';
|
|
10
11
|
export { isBotAuthor, isAcknowledgmentComment } from './comment-utils.js';
|
|
12
|
+
export { wrapUntrustedContent, extractFromFence, UNTRUSTED_OPEN_TAG_NAME, UNTRUSTED_CLOSE_TAG, } from './untrusted-content.js';
|
|
11
13
|
export { getOctokit, checkRateLimit } from './github.js';
|
|
12
14
|
export { parseGitHubUrl, splitRepo, isOwnRepo } from './urls.js';
|
|
13
15
|
export { daysBetween, formatRelativeTime, byDateDescending } from './dates.js';
|
|
@@ -151,7 +151,9 @@ export class IssueConversationMonitor {
|
|
|
151
151
|
body: comment.body || '',
|
|
152
152
|
createdAt: comment.created_at,
|
|
153
153
|
isUser: author.toLowerCase() === username.toLowerCase(),
|
|
154
|
-
authorAssociation:
|
|
154
|
+
authorAssociation: typeof comment.author_association === 'string'
|
|
155
|
+
? comment.author_association
|
|
156
|
+
: '',
|
|
155
157
|
});
|
|
156
158
|
}
|
|
157
159
|
timeline.sort((a, b) => new Date(a.createdAt).getTime() - new Date(b.createdAt).getTime());
|
package/dist/core/paths.js
CHANGED
|
@@ -6,9 +6,9 @@
|
|
|
6
6
|
*
|
|
7
7
|
* Extracted from utils.ts under #1116.
|
|
8
8
|
*/
|
|
9
|
-
import * as fs from 'fs';
|
|
10
|
-
import * as path from 'path';
|
|
11
|
-
import * as os from 'os';
|
|
9
|
+
import * as fs from 'node:fs';
|
|
10
|
+
import * as path from 'node:path';
|
|
11
|
+
import * as os from 'node:os';
|
|
12
12
|
/**
|
|
13
13
|
* Returns the oss-autopilot data directory path, creating it if it does not exist.
|
|
14
14
|
*
|
|
@@ -98,7 +98,7 @@ export function stateFileExists() {
|
|
|
98
98
|
export function getCLIVersion() {
|
|
99
99
|
try {
|
|
100
100
|
const pkgPath = path.join(path.dirname(process.argv[1]), '..', 'package.json');
|
|
101
|
-
return JSON.parse(fs.readFileSync(pkgPath, '
|
|
101
|
+
return JSON.parse(fs.readFileSync(pkgPath, 'utf8')).version;
|
|
102
102
|
}
|
|
103
103
|
catch {
|
|
104
104
|
return '0.0.0';
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fetch the raw review-comment bundle for a PR (#867 PR 3).
|
|
3
|
+
*
|
|
4
|
+
* Returns reviews, inline review comments, and issue-level comments for a
|
|
5
|
+
* single PR with the contributor's own comments + bots filtered out. The
|
|
6
|
+
* `authorAssociation` field is preserved on every entry so the host's
|
|
7
|
+
* extraction prompt can weight maintainer voices (OWNER/MEMBER/COLLABORATOR)
|
|
8
|
+
* differently from community feedback (CONTRIBUTOR/NONE).
|
|
9
|
+
*
|
|
10
|
+
* No LLM calls happen here — this is the data layer feeding the host's
|
|
11
|
+
* `extract-learnings` prompt. The bundle structure is the contract; the
|
|
12
|
+
* extraction is the host's responsibility.
|
|
13
|
+
*/
|
|
14
|
+
import type { Octokit } from '@octokit/rest';
|
|
15
|
+
/** A single review (top-level) on a PR. */
|
|
16
|
+
export interface PRReviewEntry {
|
|
17
|
+
author: string;
|
|
18
|
+
authorAssociation: string;
|
|
19
|
+
body: string;
|
|
20
|
+
submittedAt: string;
|
|
21
|
+
}
|
|
22
|
+
/** An inline review comment (anchored to a file/line) on a PR. */
|
|
23
|
+
export interface PRReviewCommentEntry {
|
|
24
|
+
author: string;
|
|
25
|
+
authorAssociation: string;
|
|
26
|
+
body: string;
|
|
27
|
+
path: string;
|
|
28
|
+
createdAt: string;
|
|
29
|
+
}
|
|
30
|
+
/** An issue-level comment posted on the PR thread. */
|
|
31
|
+
export interface PRIssueCommentEntry {
|
|
32
|
+
author: string;
|
|
33
|
+
authorAssociation: string;
|
|
34
|
+
body: string;
|
|
35
|
+
createdAt: string;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* The full comment bundle returned for a single PR. Field order matches
|
|
39
|
+
* the typical narrative arc of a PR review (top-level reviews → inline
|
|
40
|
+
* comments → general thread chatter), so the host's extraction prompt can
|
|
41
|
+
* walk the bundle linearly.
|
|
42
|
+
*/
|
|
43
|
+
export interface PRCommentBundle {
|
|
44
|
+
prUrl: string;
|
|
45
|
+
prTitle: string;
|
|
46
|
+
repo: string;
|
|
47
|
+
/** ISO-8601 timestamp the PR was merged or closed; whichever applies. */
|
|
48
|
+
mergedAt: string;
|
|
49
|
+
reviews: PRReviewEntry[];
|
|
50
|
+
reviewComments: PRReviewCommentEntry[];
|
|
51
|
+
issueComments: PRIssueCommentEntry[];
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Fetch a single PR's comment bundle. Filters out the authenticated user's
|
|
55
|
+
* own comments and bots. Throws {@link ValidationError} on a non-PR URL.
|
|
56
|
+
*/
|
|
57
|
+
export declare function fetchPRCommentBundle(octokit: Octokit, prUrl: string, githubUsername: string): Promise<PRCommentBundle>;
|
|
58
|
+
/**
|
|
59
|
+
* Fetch comment bundles for many PRs with a small concurrency cap (default 3).
|
|
60
|
+
*
|
|
61
|
+
* Failures on individual PRs are logged and skipped — the batch returns a
|
|
62
|
+
* shorter array rather than aborting. Rationale: extraction quality is
|
|
63
|
+
* already a partial-information problem (users contribute to many repos and
|
|
64
|
+
* many PRs), so a single 404 / rate limit on one PR should not deny the
|
|
65
|
+
* host the corpus from the other 4.
|
|
66
|
+
*/
|
|
67
|
+
export declare function fetchPRCommentBundlesBatch(octokit: Octokit, prUrls: string[], githubUsername: string, concurrency?: number): Promise<PRCommentBundle[]>;
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import { paginateAll } from './pagination.js';
|
|
2
|
+
import { isBotAuthor } from './comment-utils.js';
|
|
3
|
+
import { parseGitHubUrl } from './urls.js';
|
|
4
|
+
import { ValidationError, errorMessage } from './errors.js';
|
|
5
|
+
import { debug, warn } from './logger.js';
|
|
6
|
+
const MODULE = 'pr-comments-fetcher';
|
|
7
|
+
/** Default concurrency for {@link fetchPRCommentBundlesBatch}. */
|
|
8
|
+
const DEFAULT_BATCH_CONCURRENCY = 3;
|
|
9
|
+
/**
|
|
10
|
+
* Fetch a single PR's comment bundle. Filters out the authenticated user's
|
|
11
|
+
* own comments and bots. Throws {@link ValidationError} on a non-PR URL.
|
|
12
|
+
*/
|
|
13
|
+
export async function fetchPRCommentBundle(octokit, prUrl, githubUsername) {
|
|
14
|
+
const parsed = parseGitHubUrl(prUrl);
|
|
15
|
+
if (!parsed || parsed.type !== 'pull') {
|
|
16
|
+
throw new ValidationError(`Invalid PR URL: ${prUrl}`);
|
|
17
|
+
}
|
|
18
|
+
const { owner, repo, number: pull_number } = parsed;
|
|
19
|
+
const repoFull = `${owner}/${repo}`;
|
|
20
|
+
// Fetch the PR + all three comment streams in parallel. We always fetch
|
|
21
|
+
// every page — corpus quality depends on having every reviewer voice, not
|
|
22
|
+
// just the first 100 comments.
|
|
23
|
+
const [{ data: pr }, reviews, reviewComments, issueComments] = await Promise.all([
|
|
24
|
+
octokit.pulls.get({ owner, repo, pull_number }),
|
|
25
|
+
paginateAll((page) => octokit.pulls.listReviews({
|
|
26
|
+
owner,
|
|
27
|
+
repo,
|
|
28
|
+
pull_number,
|
|
29
|
+
per_page: 100,
|
|
30
|
+
page,
|
|
31
|
+
})),
|
|
32
|
+
paginateAll((page) => octokit.pulls.listReviewComments({
|
|
33
|
+
owner,
|
|
34
|
+
repo,
|
|
35
|
+
pull_number,
|
|
36
|
+
per_page: 100,
|
|
37
|
+
page,
|
|
38
|
+
})),
|
|
39
|
+
paginateAll((page) => octokit.issues.listComments({
|
|
40
|
+
owner,
|
|
41
|
+
repo,
|
|
42
|
+
issue_number: pull_number,
|
|
43
|
+
per_page: 100,
|
|
44
|
+
page,
|
|
45
|
+
})),
|
|
46
|
+
]);
|
|
47
|
+
const ownLogin = githubUsername.toLowerCase();
|
|
48
|
+
/**
|
|
49
|
+
* Drop entries that aren't useful corpus: the user's own comments, bots,
|
|
50
|
+
* and entries with no author at all (deleted accounts surface as null
|
|
51
|
+
* user from GitHub's REST API).
|
|
52
|
+
*/
|
|
53
|
+
const isWorthKeeping = (login) => {
|
|
54
|
+
if (!login)
|
|
55
|
+
return false;
|
|
56
|
+
if (login.toLowerCase() === ownLogin)
|
|
57
|
+
return false;
|
|
58
|
+
if (isBotAuthor(login))
|
|
59
|
+
return false;
|
|
60
|
+
return true;
|
|
61
|
+
};
|
|
62
|
+
const mergedAt = pr.merged_at ?? pr.closed_at ?? '';
|
|
63
|
+
return {
|
|
64
|
+
prUrl,
|
|
65
|
+
prTitle: pr.title,
|
|
66
|
+
repo: repoFull,
|
|
67
|
+
mergedAt,
|
|
68
|
+
reviews: reviews
|
|
69
|
+
.filter((r) => isWorthKeeping(r.user?.login))
|
|
70
|
+
.map((r) => ({
|
|
71
|
+
author: r.user?.login ?? '',
|
|
72
|
+
authorAssociation: r.author_association ?? 'NONE',
|
|
73
|
+
body: r.body ?? '',
|
|
74
|
+
submittedAt: r.submitted_at ?? '',
|
|
75
|
+
})),
|
|
76
|
+
reviewComments: reviewComments
|
|
77
|
+
.filter((c) => isWorthKeeping(c.user?.login))
|
|
78
|
+
.map((c) => ({
|
|
79
|
+
author: c.user?.login ?? '',
|
|
80
|
+
authorAssociation: c.author_association ?? 'NONE',
|
|
81
|
+
body: c.body ?? '',
|
|
82
|
+
path: c.path ?? '',
|
|
83
|
+
createdAt: c.created_at ?? '',
|
|
84
|
+
})),
|
|
85
|
+
issueComments: issueComments
|
|
86
|
+
.filter((c) => isWorthKeeping(c.user?.login))
|
|
87
|
+
.map((c) => ({
|
|
88
|
+
author: c.user?.login ?? '',
|
|
89
|
+
authorAssociation: c.author_association ?? 'NONE',
|
|
90
|
+
body: c.body ?? '',
|
|
91
|
+
createdAt: c.created_at ?? '',
|
|
92
|
+
})),
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Fetch comment bundles for many PRs with a small concurrency cap (default 3).
|
|
97
|
+
*
|
|
98
|
+
* Failures on individual PRs are logged and skipped — the batch returns a
|
|
99
|
+
* shorter array rather than aborting. Rationale: extraction quality is
|
|
100
|
+
* already a partial-information problem (users contribute to many repos and
|
|
101
|
+
* many PRs), so a single 404 / rate limit on one PR should not deny the
|
|
102
|
+
* host the corpus from the other 4.
|
|
103
|
+
*/
|
|
104
|
+
export async function fetchPRCommentBundlesBatch(octokit, prUrls, githubUsername, concurrency = DEFAULT_BATCH_CONCURRENCY) {
|
|
105
|
+
const results = [];
|
|
106
|
+
const queue = [...prUrls];
|
|
107
|
+
async function worker() {
|
|
108
|
+
while (queue.length > 0) {
|
|
109
|
+
const url = queue.shift();
|
|
110
|
+
if (!url)
|
|
111
|
+
return;
|
|
112
|
+
try {
|
|
113
|
+
const bundle = await fetchPRCommentBundle(octokit, url, githubUsername);
|
|
114
|
+
results.push(bundle);
|
|
115
|
+
}
|
|
116
|
+
catch (err) {
|
|
117
|
+
warn(MODULE, `Skipping ${url}: ${errorMessage(err)}`);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
const workers = Array.from({ length: Math.min(concurrency, prUrls.length) }, worker);
|
|
122
|
+
await Promise.all(workers);
|
|
123
|
+
debug(MODULE, `Fetched ${results.length}/${prUrls.length} comment bundles`);
|
|
124
|
+
return results;
|
|
125
|
+
}
|
package/dist/core/pr-monitor.js
CHANGED
|
@@ -16,9 +16,8 @@ import { getOctokit } from './github.js';
|
|
|
16
16
|
import { getStateManager } from './state.js';
|
|
17
17
|
import { daysBetween } from './dates.js';
|
|
18
18
|
import { parseGitHubUrl, extractOwnerRepo, isOwnRepo } from './urls.js';
|
|
19
|
-
import { DEFAULT_CONCURRENCY } from './concurrency.js';
|
|
19
|
+
import { DEFAULT_CONCURRENCY, runWorkerPool } from './concurrency.js';
|
|
20
20
|
import { determineStatus } from './status-determination.js';
|
|
21
|
-
import { runWorkerPool } from './concurrency.js';
|
|
22
21
|
import { ConfigurationError, ValidationError, errorMessage, getHttpStatusCode, isRateLimitOrAuthError, } from './errors.js';
|
|
23
22
|
import { paginateAll } from './pagination.js';
|
|
24
23
|
import { debug, warn, timed } from './logger.js';
|
package/dist/core/pr-template.js
CHANGED
|
@@ -43,7 +43,7 @@ export async function fetchPRTemplate(octokit, owner, repo) {
|
|
|
43
43
|
debug(MODULE, `${path} has no content, skipping`);
|
|
44
44
|
continue;
|
|
45
45
|
}
|
|
46
|
-
const template = Buffer.from(data.content, 'base64').toString('
|
|
46
|
+
const template = Buffer.from(data.content, 'base64').toString('utf8');
|
|
47
47
|
debug(MODULE, `Found PR template at ${path} (${template.length} chars)`);
|
|
48
48
|
return { template, source: path };
|
|
49
49
|
}
|
|
@@ -34,6 +34,12 @@ export declare function migrateV1ToV2(rawState: Record<string, unknown>): Record
|
|
|
34
34
|
* New optional fields are handled by Zod defaults (undefined/optional).
|
|
35
35
|
*/
|
|
36
36
|
export declare function migrateV2ToV3(rawState: Record<string, unknown>): Record<string, unknown>;
|
|
37
|
+
/**
|
|
38
|
+
* Migrate state from v3 to v4 (#867).
|
|
39
|
+
* Adds: commentsFetchedAt on StoredMergedPR / StoredClosedPR. The new field is
|
|
40
|
+
* optional, so no data transformation is needed — only the version bump.
|
|
41
|
+
*/
|
|
42
|
+
export declare function migrateV3ToV4(rawState: Record<string, unknown>): Record<string, unknown>;
|
|
37
43
|
/**
|
|
38
44
|
* Create a fresh state (v3).
|
|
39
45
|
* Leverages Zod schema defaults to produce a complete state.
|
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
* Handles file I/O, locking, backup/restore, and schema migration (v1→v2→v3).
|
|
4
4
|
* No module-level mutable state — functions accept/return AgentState objects.
|
|
5
5
|
*/
|
|
6
|
-
import * as fs from 'fs';
|
|
7
|
-
import * as path from 'path';
|
|
6
|
+
import * as fs from 'node:fs';
|
|
7
|
+
import * as path from 'node:path';
|
|
8
8
|
import { AgentStateSchema } from './state-schema.js';
|
|
9
9
|
import { getStatePath, getBackupDir, getDataDir } from './paths.js';
|
|
10
10
|
import { errorMessage, ConcurrencyError } from './errors.js';
|
|
@@ -21,7 +21,7 @@ const LEGACY_BACKUP_DIR = path.join(process.cwd(), 'data', 'backups');
|
|
|
21
21
|
*/
|
|
22
22
|
function isLockStale(lockPath) {
|
|
23
23
|
try {
|
|
24
|
-
const existing = JSON.parse(fs.readFileSync(lockPath, '
|
|
24
|
+
const existing = JSON.parse(fs.readFileSync(lockPath, 'utf8'));
|
|
25
25
|
return Date.now() - existing.timestamp > LOCK_TIMEOUT_MS;
|
|
26
26
|
}
|
|
27
27
|
catch (err) {
|
|
@@ -72,7 +72,7 @@ export function acquireLock(lockPath) {
|
|
|
72
72
|
*/
|
|
73
73
|
export function releaseLock(lockPath) {
|
|
74
74
|
try {
|
|
75
|
-
const data = JSON.parse(fs.readFileSync(lockPath, '
|
|
75
|
+
const data = JSON.parse(fs.readFileSync(lockPath, 'utf8'));
|
|
76
76
|
if (data.pid === process.pid) {
|
|
77
77
|
fs.unlinkSync(lockPath);
|
|
78
78
|
}
|
|
@@ -158,12 +158,23 @@ export function migrateV2ToV3(rawState) {
|
|
|
158
158
|
debug(MODULE, 'v2 to v3 migration complete.');
|
|
159
159
|
return rawState;
|
|
160
160
|
}
|
|
161
|
+
/**
|
|
162
|
+
* Migrate state from v3 to v4 (#867).
|
|
163
|
+
* Adds: commentsFetchedAt on StoredMergedPR / StoredClosedPR. The new field is
|
|
164
|
+
* optional, so no data transformation is needed — only the version bump.
|
|
165
|
+
*/
|
|
166
|
+
export function migrateV3ToV4(rawState) {
|
|
167
|
+
debug(MODULE, 'Migrating state from v3 to v4 (add commentsFetchedAt to stored PR records)...');
|
|
168
|
+
rawState.version = 4;
|
|
169
|
+
debug(MODULE, 'v3 to v4 migration complete (no data transformation required).');
|
|
170
|
+
return rawState;
|
|
171
|
+
}
|
|
161
172
|
/**
|
|
162
173
|
* Create a fresh state (v3).
|
|
163
174
|
* Leverages Zod schema defaults to produce a complete state.
|
|
164
175
|
*/
|
|
165
176
|
export function createFreshState() {
|
|
166
|
-
return AgentStateSchema.parse({ version:
|
|
177
|
+
return AgentStateSchema.parse({ version: 4 });
|
|
167
178
|
}
|
|
168
179
|
/**
|
|
169
180
|
* Migrate state from legacy ./data/ location to ~/.oss-autopilot/.
|
|
@@ -262,7 +273,7 @@ function tryRestoreFromBackup() {
|
|
|
262
273
|
for (const backupFile of backupFiles) {
|
|
263
274
|
const backupPath = path.join(backupDir, backupFile);
|
|
264
275
|
try {
|
|
265
|
-
const data = fs.readFileSync(backupPath, '
|
|
276
|
+
const data = fs.readFileSync(backupPath, 'utf8');
|
|
266
277
|
let raw = JSON.parse(data);
|
|
267
278
|
// Chain migrations: v1 → v2 → v3
|
|
268
279
|
if (typeof raw === 'object' && raw !== null) {
|
|
@@ -273,6 +284,9 @@ function tryRestoreFromBackup() {
|
|
|
273
284
|
if (raw.version === 2) {
|
|
274
285
|
raw = migrateV2ToV3(raw);
|
|
275
286
|
}
|
|
287
|
+
if (raw.version === 3) {
|
|
288
|
+
raw = migrateV3ToV4(raw);
|
|
289
|
+
}
|
|
276
290
|
}
|
|
277
291
|
const parsed = AgentStateSchema.safeParse(raw);
|
|
278
292
|
if (parsed.success) {
|
|
@@ -311,9 +325,9 @@ export function loadState() {
|
|
|
311
325
|
const statePath = getStatePath();
|
|
312
326
|
try {
|
|
313
327
|
if (fs.existsSync(statePath)) {
|
|
314
|
-
const data = fs.readFileSync(statePath, '
|
|
328
|
+
const data = fs.readFileSync(statePath, 'utf8');
|
|
315
329
|
let raw = JSON.parse(data);
|
|
316
|
-
// Chain migrations: v1 → v2 → v3
|
|
330
|
+
// Chain migrations: v1 → v2 → v3 → v4
|
|
317
331
|
let wasMigrated = false;
|
|
318
332
|
if (typeof raw === 'object' && raw !== null) {
|
|
319
333
|
const rawObj = raw;
|
|
@@ -325,6 +339,10 @@ export function loadState() {
|
|
|
325
339
|
raw = migrateV2ToV3(raw);
|
|
326
340
|
wasMigrated = true;
|
|
327
341
|
}
|
|
342
|
+
if (raw.version === 3) {
|
|
343
|
+
raw = migrateV3ToV4(raw);
|
|
344
|
+
wasMigrated = true;
|
|
345
|
+
}
|
|
328
346
|
}
|
|
329
347
|
// Validate through Zod schema (strips unknown keys in memory; stale keys persist on disk until next save)
|
|
330
348
|
const parsed = AgentStateSchema.safeParse(raw);
|
|
@@ -473,7 +491,7 @@ export function saveState(state, expectedMtimeMs = null) {
|
|
|
473
491
|
// Create backup of existing state (best-effort, non-fatal)
|
|
474
492
|
try {
|
|
475
493
|
if (fs.existsSync(statePath)) {
|
|
476
|
-
const timestamp = new Date().toISOString().replace(/[
|
|
494
|
+
const timestamp = new Date().toISOString().replace(/[.:]/g, '-');
|
|
477
495
|
const randomSuffix = Math.random().toString(36).slice(2, 8).padEnd(6, '0');
|
|
478
496
|
const backupFile = path.join(backupDir, `state-${timestamp}-${randomSuffix}.json`);
|
|
479
497
|
fs.copyFileSync(statePath, backupFile);
|
|
@@ -63,12 +63,14 @@ export declare const StoredMergedPRSchema: z.ZodObject<{
|
|
|
63
63
|
url: z.ZodString;
|
|
64
64
|
title: z.ZodString;
|
|
65
65
|
mergedAt: z.ZodString;
|
|
66
|
+
commentsFetchedAt: z.ZodOptional<z.ZodString>;
|
|
66
67
|
learningsExtractedAt: z.ZodOptional<z.ZodString>;
|
|
67
68
|
}, z.core.$strip>;
|
|
68
69
|
export declare const StoredClosedPRSchema: z.ZodObject<{
|
|
69
70
|
url: z.ZodString;
|
|
70
71
|
title: z.ZodString;
|
|
71
72
|
closedAt: z.ZodString;
|
|
73
|
+
commentsFetchedAt: z.ZodOptional<z.ZodString>;
|
|
72
74
|
learningsExtractedAt: z.ZodOptional<z.ZodString>;
|
|
73
75
|
}, z.core.$strip>;
|
|
74
76
|
export declare const AnalyzedIssueConversationSchema: z.ZodObject<{
|
|
@@ -324,7 +326,7 @@ export declare const DailyDigestSchema: z.ZodObject<{
|
|
|
324
326
|
}, z.core.$strip>;
|
|
325
327
|
}, z.core.$strip>;
|
|
326
328
|
export declare const AgentStateSchema: z.ZodObject<{
|
|
327
|
-
version: z.ZodLiteral<
|
|
329
|
+
version: z.ZodLiteral<4>;
|
|
328
330
|
gistId: z.ZodOptional<z.ZodString>;
|
|
329
331
|
repoScores: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodObject<{
|
|
330
332
|
repo: z.ZodString;
|
|
@@ -471,12 +473,14 @@ export declare const AgentStateSchema: z.ZodObject<{
|
|
|
471
473
|
url: z.ZodString;
|
|
472
474
|
title: z.ZodString;
|
|
473
475
|
mergedAt: z.ZodString;
|
|
476
|
+
commentsFetchedAt: z.ZodOptional<z.ZodString>;
|
|
474
477
|
learningsExtractedAt: z.ZodOptional<z.ZodString>;
|
|
475
478
|
}, z.core.$strip>>>;
|
|
476
479
|
closedPRs: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
477
480
|
url: z.ZodString;
|
|
478
481
|
title: z.ZodString;
|
|
479
482
|
closedAt: z.ZodString;
|
|
483
|
+
commentsFetchedAt: z.ZodOptional<z.ZodString>;
|
|
480
484
|
learningsExtractedAt: z.ZodOptional<z.ZodString>;
|
|
481
485
|
}, z.core.$strip>>>;
|
|
482
486
|
analyzedIssueConversations: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -44,12 +44,18 @@ export const StoredMergedPRSchema = z.object({
|
|
|
44
44
|
url: z.string(),
|
|
45
45
|
title: z.string(),
|
|
46
46
|
mergedAt: z.string(),
|
|
47
|
+
/** When the raw review-comment bundle for this PR was last fetched (#867). */
|
|
48
|
+
commentsFetchedAt: z.string().optional(),
|
|
49
|
+
/** When the host last ran LLM extraction over this PR's comment bundle (#867). */
|
|
47
50
|
learningsExtractedAt: z.string().optional(),
|
|
48
51
|
});
|
|
49
52
|
export const StoredClosedPRSchema = z.object({
|
|
50
53
|
url: z.string(),
|
|
51
54
|
title: z.string(),
|
|
52
55
|
closedAt: z.string(),
|
|
56
|
+
/** When the raw review-comment bundle for this PR was last fetched (#867). */
|
|
57
|
+
commentsFetchedAt: z.string().optional(),
|
|
58
|
+
/** When the host last ran LLM extraction over this PR's comment bundle (#867). */
|
|
53
59
|
learningsExtractedAt: z.string().optional(),
|
|
54
60
|
});
|
|
55
61
|
export const AnalyzedIssueConversationSchema = z.object({
|
|
@@ -211,7 +217,7 @@ export const DailyDigestSchema = z.object({
|
|
|
211
217
|
});
|
|
212
218
|
// ── 8. Root schema ───────────────────────────────────────────────────
|
|
213
219
|
export const AgentStateSchema = z.object({
|
|
214
|
-
version: z.literal(
|
|
220
|
+
version: z.literal(4),
|
|
215
221
|
gistId: z.string().optional(),
|
|
216
222
|
repoScores: z.record(z.string(), RepoScoreSchema).default({}),
|
|
217
223
|
config: AgentConfigSchema.default(() => AgentConfigSchema.parse({})),
|