pierre-review 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.js +9 -0
- package/dist/sync/commit-files.js +15 -9
- package/dist/sync/sync-manager.js +53 -4
- package/dist/sync/sync-repo.js +77 -25
- package/package.json +1 -1
- package/public/assets/index-C8dTw5tB.js +1368 -0
- package/public/assets/{index-Bb1RfTQk.css → index-D3qeT16k.css} +1 -1
- package/public/index.html +2 -2
- package/public-landing/assets/index-BtpOo-9R.css +1 -0
- package/public-landing/assets/index-DyEMCLrl.js +40 -0
- package/public-landing/index.html +47 -3
- package/public/assets/index-CV5B7nVN.js +0 -1368
- package/public-landing/assets/index-BUTW1ViM.css +0 -1
- package/public-landing/assets/index-BVL5_Nfr.js +0 -40
package/dist/config.js
CHANGED
|
@@ -58,6 +58,15 @@ export const config = {
|
|
|
58
58
|
host: process.env.HOST ?? (isCloud ? '0.0.0.0' : '127.0.0.1'),
|
|
59
59
|
dbPath: isCloud || isAbsolute(rawDbUrl) ? rawDbUrl : resolve(backendRoot, rawDbUrl),
|
|
60
60
|
backfillDays: intFromEnv('BACKFILL_DAYS', 90),
|
|
61
|
+
// First sync runs in two phases: a fast "foreground" window (matching the
|
|
62
|
+
// default timeline range) so the board is usable in seconds, then the rest of
|
|
63
|
+
// backfillDays is fetched in the background. Two-phase only kicks in when
|
|
64
|
+
// backfillDays exceeds this.
|
|
65
|
+
foregroundSyncDays: intFromEnv('FOREGROUND_SYNC_DAYS', 14),
|
|
66
|
+
// How many commit-file REST fetches to keep in flight at once (one pool per
|
|
67
|
+
// page). These draw from the REST quota (disjoint from the GraphQL points
|
|
68
|
+
// pool), so a modest pool safely cuts the dominant sync stage.
|
|
69
|
+
commitFileConcurrency: intFromEnv('COMMIT_FILE_CONCURRENCY', 10),
|
|
61
70
|
syncCron: process.env.SYNC_CRON ?? '*/5 * * * *',
|
|
62
71
|
syncOverlapMinutes: intFromEnv('SYNC_OVERLAP_MINUTES', 20),
|
|
63
72
|
stallThresholdDays: intFromEnv('STALL_THRESHOLD_DAYS', 3),
|
|
@@ -7,7 +7,7 @@ const { commitFiles } = schema;
|
|
|
7
7
|
* `commit_files` cache and filling misses via REST. SHAs are immutable, so the
|
|
8
8
|
* cache never expires — re-syncs are free.
|
|
9
9
|
*/
|
|
10
|
-
export async function ensureCommitFiles(owner, name, shas, token) {
|
|
10
|
+
export async function ensureCommitFiles(owner, name, shas, token, concurrency = 10) {
|
|
11
11
|
const result = new Map();
|
|
12
12
|
const unique = [...new Set(shas)];
|
|
13
13
|
if (unique.length === 0)
|
|
@@ -20,12 +20,13 @@ export async function ensureCommitFiles(owner, name, shas, token) {
|
|
|
20
20
|
.execute();
|
|
21
21
|
for (const row of cached)
|
|
22
22
|
result.set(row.sha, row.paths);
|
|
23
|
-
// Fetch cache misses
|
|
24
|
-
// sync latency
|
|
25
|
-
//
|
|
26
|
-
// the
|
|
23
|
+
// Fetch cache misses through one continuously-saturated worker pool. These
|
|
24
|
+
// REST calls dominate sync latency; the caller now hands us a whole page's
|
|
25
|
+
// worth of misses at once (rather than per-PR waves), so a fixed set of
|
|
26
|
+
// workers each pull the next SHA the instant they finish — keeping `concurrency`
|
|
27
|
+
// requests in flight the whole time. SHAs are immutable and the cache upsert is
|
|
28
|
+
// idempotent, so order doesn't matter.
|
|
27
29
|
const missing = unique.filter((sha) => !result.has(sha));
|
|
28
|
-
const CONCURRENCY = 5;
|
|
29
30
|
const fetchOne = async (sha) => {
|
|
30
31
|
try {
|
|
31
32
|
const commit = await ghRestGetFor(token, `/repos/${owner}/${name}/commits/${sha}`);
|
|
@@ -43,9 +44,14 @@ export async function ensureCommitFiles(owner, name, shas, token) {
|
|
|
43
44
|
result.set(sha, []);
|
|
44
45
|
}
|
|
45
46
|
};
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
let next = 0;
|
|
48
|
+
const worker = async () => {
|
|
49
|
+
while (next < missing.length) {
|
|
50
|
+
const sha = missing[next++];
|
|
51
|
+
await fetchOne(sha);
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
await Promise.all(Array.from({ length: Math.min(Math.max(1, concurrency), missing.length) }, worker));
|
|
49
55
|
return result;
|
|
50
56
|
}
|
|
51
57
|
//# sourceMappingURL=commit-files.js.map
|
|
@@ -142,16 +142,63 @@ export async function runSyncForRepo(repoId, log, opts = {}) {
|
|
|
142
142
|
log.error(`sync ${repo.owner}/${repo.name}: no access token for account ${repo.accountId}: ${err instanceof Error ? err.message : err}`);
|
|
143
143
|
return false;
|
|
144
144
|
}
|
|
145
|
-
const
|
|
145
|
+
const common = {
|
|
146
146
|
owner: repo.owner,
|
|
147
147
|
name: repo.name,
|
|
148
148
|
accountId: repo.accountId,
|
|
149
149
|
token,
|
|
150
|
-
...plan,
|
|
151
150
|
log,
|
|
152
|
-
|
|
151
|
+
commitFileConcurrency: config.commitFileConcurrency,
|
|
153
152
|
shouldCancel: () => cancelRequested.has(repoId),
|
|
154
|
-
}
|
|
153
|
+
};
|
|
154
|
+
// Two-phase only for a first full backfill (never-synced, not a forced "deep"
|
|
155
|
+
// re-sync) when the backfill window is wider than the foreground window. A deep
|
|
156
|
+
// re-sync stays single-pass — its board is already populated, so there's no
|
|
157
|
+
// blank-board wait to shorten.
|
|
158
|
+
const twoPhase = !opts.forceFull && plan.mode === 'full' && config.backfillDays > config.foregroundSyncDays;
|
|
159
|
+
const runWalk = async () => {
|
|
160
|
+
if (!twoPhase) {
|
|
161
|
+
await syncRepo({
|
|
162
|
+
...common,
|
|
163
|
+
mode: plan.mode,
|
|
164
|
+
since: plan.since,
|
|
165
|
+
commitState: true,
|
|
166
|
+
onProgress: (p) => setSyncProgress(repoId, { ...p, mode: plan.mode }),
|
|
167
|
+
});
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
// Phase 1 — the fast foreground window (the default timeline range). Committed
|
|
171
|
+
// per-PR so the recent board is usable in seconds, but does NOT stamp
|
|
172
|
+
// syncState, so the repo stays an "initial backfill" until phase 2 finishes.
|
|
173
|
+
const foregroundSince = new Date(Date.now() - config.foregroundSyncDays * DAY_MS);
|
|
174
|
+
const p1 = await syncRepo({
|
|
175
|
+
...common,
|
|
176
|
+
mode: 'full',
|
|
177
|
+
since: foregroundSince,
|
|
178
|
+
commitState: false,
|
|
179
|
+
onProgress: (p) => setSyncProgress(repoId, { ...p, mode: 'full', foregroundComplete: false }),
|
|
180
|
+
});
|
|
181
|
+
if (p1.cancelled)
|
|
182
|
+
return;
|
|
183
|
+
// Foreground done — flip the flag so the UI drops the user into the recent
|
|
184
|
+
// view, then continue the SAME cursor walk back to the full backfill window.
|
|
185
|
+
setSyncProgress(repoId, {
|
|
186
|
+
percent: 1,
|
|
187
|
+
prsProcessed: p1.prCount,
|
|
188
|
+
pages: p1.pages,
|
|
189
|
+
mode: 'full',
|
|
190
|
+
foregroundComplete: true,
|
|
191
|
+
});
|
|
192
|
+
await syncRepo({
|
|
193
|
+
...common,
|
|
194
|
+
mode: 'full',
|
|
195
|
+
since: plan.since, // now − backfillDays
|
|
196
|
+
startCursor: p1.endCursor,
|
|
197
|
+
commitState: true,
|
|
198
|
+
onProgress: (p) => setSyncProgress(repoId, { ...p, mode: 'full', foregroundComplete: true }),
|
|
199
|
+
});
|
|
200
|
+
};
|
|
201
|
+
const task = runWalk()
|
|
155
202
|
.catch((err) => {
|
|
156
203
|
log.error(`background sync ${repo.owner}/${repo.name} failed: ${err instanceof Error ? err.message : err}`);
|
|
157
204
|
})
|
|
@@ -193,6 +240,8 @@ export async function syncAllRepos(log) {
|
|
|
193
240
|
accountId: repo.accountId,
|
|
194
241
|
token,
|
|
195
242
|
...plan,
|
|
243
|
+
commitState: true,
|
|
244
|
+
commitFileConcurrency: config.commitFileConcurrency,
|
|
196
245
|
log,
|
|
197
246
|
onProgress: (p) => setSyncProgress(r.id, { ...p, mode: plan.mode }),
|
|
198
247
|
shouldCancel: () => cancelRequested.has(r.id),
|
package/dist/sync/sync-repo.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { performance } from 'node:perf_hooks';
|
|
1
2
|
import { eq } from 'drizzle-orm';
|
|
2
3
|
import { db, schema } from '../db/client.js';
|
|
3
4
|
import { getGraphqlClientFor } from '../github/client.js';
|
|
@@ -15,15 +16,28 @@ function clamp01(n) {
|
|
|
15
16
|
}
|
|
16
17
|
export async function syncRepo(opts) {
|
|
17
18
|
const { owner, name, accountId, mode, since, onProgress } = opts;
|
|
19
|
+
const commitState = opts.commitState ?? true;
|
|
18
20
|
const log = opts.log ?? consoleLogger;
|
|
19
21
|
const client = getGraphqlClientFor(opts.token);
|
|
20
22
|
const resolver = createUserResolver();
|
|
21
|
-
let cursor = null;
|
|
23
|
+
let cursor = opts.startCursor ?? null;
|
|
24
|
+
// The `after` value used to fetch the page currently being processed. When we
|
|
25
|
+
// stop at the `since` cutoff mid-page, this (not the page's endCursor) is what a
|
|
26
|
+
// follow-on phase resumes from, so the cutoff page's older PRs aren't skipped.
|
|
27
|
+
let pageStartCursor = cursor;
|
|
22
28
|
let repoId = null;
|
|
23
29
|
let prCount = 0;
|
|
24
30
|
let pages = 0;
|
|
25
31
|
let totalCost = 0;
|
|
26
32
|
let lastRemaining = 0;
|
|
33
|
+
// Per-stage wall-clock accumulators, so the final log attributes the 2-3 min:
|
|
34
|
+
// page fetch (network/GraphQL) vs commit-file REST fan-out vs DB persist. This
|
|
35
|
+
// is the baseline that tells us which stage to optimise next.
|
|
36
|
+
let graphqlMs = 0;
|
|
37
|
+
let commitFilesMs = 0;
|
|
38
|
+
let persistMs = 0;
|
|
39
|
+
const timingSummary = () => `graphql ${(graphqlMs / 1000).toFixed(1)}s / commit-files ` +
|
|
40
|
+
`${(commitFilesMs / 1000).toFixed(1)}s / persist ${(persistMs / 1000).toFixed(1)}s`;
|
|
27
41
|
// Time-walked progress: PRs arrive newest-first and we stop at `since`, so the
|
|
28
42
|
// span [since .. newest] is the work and the current PR's updatedAt marks how
|
|
29
43
|
// far through it we are.
|
|
@@ -46,11 +60,14 @@ export async function syncRepo(opts) {
|
|
|
46
60
|
cancelled = true;
|
|
47
61
|
break;
|
|
48
62
|
}
|
|
63
|
+
pageStartCursor = cursor;
|
|
64
|
+
const tPage = performance.now();
|
|
49
65
|
const resp = await client(REPO_ACTIVITY_QUERY, {
|
|
50
66
|
owner,
|
|
51
67
|
name,
|
|
52
68
|
cursor,
|
|
53
69
|
});
|
|
70
|
+
graphqlMs += performance.now() - tPage;
|
|
54
71
|
pages += 1;
|
|
55
72
|
totalCost += resp.rateLimit.cost;
|
|
56
73
|
lastRemaining = resp.rateLimit.remaining;
|
|
@@ -63,6 +80,11 @@ export async function syncRepo(opts) {
|
|
|
63
80
|
repoId = await upsertRepo(owner, name, resp.repository.id, resp.repository.defaultBranchRef?.name ?? null, accountId);
|
|
64
81
|
}
|
|
65
82
|
const { nodes, pageInfo } = resp.repository.pullRequests;
|
|
83
|
+
// First select the in-window PRs on this page and gather every commit SHA
|
|
84
|
+
// whose changed files we need (commits that could plausibly have addressed
|
|
85
|
+
// an open thread, i.e. landed after its last comment).
|
|
86
|
+
const pagePrs = [];
|
|
87
|
+
const pageShas = [];
|
|
66
88
|
for (const pr of nodes) {
|
|
67
89
|
if (opts.shouldCancel?.()) {
|
|
68
90
|
cancelled = true;
|
|
@@ -74,20 +96,36 @@ export async function syncRepo(opts) {
|
|
|
74
96
|
break;
|
|
75
97
|
}
|
|
76
98
|
newestMs ??= updatedMs;
|
|
77
|
-
// Only fetch changed-files for commits that could plausibly have
|
|
78
|
-
// addressed an open thread (after its last comment).
|
|
79
99
|
const unresolved = pr.reviewThreads.nodes.filter((t) => !t.isResolved && t.comments.nodes.length > 0);
|
|
80
|
-
let shas = [];
|
|
81
100
|
if (unresolved.length > 0) {
|
|
82
101
|
const threshold = Math.min(...unresolved.map((t) => Date.parse(t.comments.nodes.at(-1).createdAt)));
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
102
|
+
for (const c of pr.commits.nodes) {
|
|
103
|
+
if (Date.parse(c.commit.committedDate) > threshold)
|
|
104
|
+
pageShas.push(c.commit.oid);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
pagePrs.push({ pr, updatedMs });
|
|
108
|
+
}
|
|
109
|
+
// Fetch the whole page's commit files in one saturated pool (replacing the
|
|
110
|
+
// old per-PR serial waves), then persist each in-window PR. persistPr only
|
|
111
|
+
// reads the SHAs its own commits need, so a page-wide superset map is fine.
|
|
112
|
+
// Skip entirely if a cancel arrived mid-gather — don't do network/DB work
|
|
113
|
+
// we're about to throw away.
|
|
114
|
+
if (!cancelled && pagePrs.length > 0) {
|
|
115
|
+
const tFiles = performance.now();
|
|
116
|
+
const commitFilesBySha = await ensureCommitFiles(owner, name, pageShas, opts.token, opts.commitFileConcurrency);
|
|
117
|
+
commitFilesMs += performance.now() - tFiles;
|
|
118
|
+
for (const { pr, updatedMs } of pagePrs) {
|
|
119
|
+
if (opts.shouldCancel?.()) {
|
|
120
|
+
cancelled = true;
|
|
121
|
+
break;
|
|
122
|
+
}
|
|
123
|
+
const tPersist = performance.now();
|
|
124
|
+
await persistPr(pr, repoId, resolver, commitFilesBySha, accountId);
|
|
125
|
+
persistMs += performance.now() - tPersist;
|
|
126
|
+
prCount += 1;
|
|
127
|
+
reportProgress(updatedMs);
|
|
86
128
|
}
|
|
87
|
-
const commitFilesBySha = await ensureCommitFiles(owner, name, shas, opts.token);
|
|
88
|
-
await persistPr(pr, repoId, resolver, commitFilesBySha, accountId);
|
|
89
|
-
prCount += 1;
|
|
90
|
-
reportProgress(updatedMs);
|
|
91
129
|
}
|
|
92
130
|
cursor = pageInfo.endCursor;
|
|
93
131
|
if (cancelled || stop || !pageInfo.hasNextPage)
|
|
@@ -98,13 +136,15 @@ export async function syncRepo(opts) {
|
|
|
98
136
|
// safely delete it. Already-persisted PRs are harmless (idempotent) and get
|
|
99
137
|
// cleaned up with the repo, or resumed on the next sync for an existing repo.
|
|
100
138
|
if (cancelled) {
|
|
101
|
-
log.info(`sync ${owner}/${name} cancelled after ${prCount} PRs / ${pages} page(s)`);
|
|
139
|
+
log.info(`sync ${owner}/${name} cancelled after ${prCount} PRs / ${pages} page(s) — timing: ${timingSummary()}`);
|
|
102
140
|
return {
|
|
103
141
|
repoId: repoId ?? -1,
|
|
104
142
|
prCount,
|
|
105
143
|
pages,
|
|
106
144
|
rateLimitRemaining: lastRemaining,
|
|
107
145
|
rateLimitCost: totalCost,
|
|
146
|
+
cancelled: true,
|
|
147
|
+
endCursor: pageStartCursor,
|
|
108
148
|
};
|
|
109
149
|
}
|
|
110
150
|
// Reached the cutoff / last page — the walk is complete.
|
|
@@ -112,25 +152,37 @@ export async function syncRepo(opts) {
|
|
|
112
152
|
if (repoId === null) {
|
|
113
153
|
throw new Error(`Repository ${owner}/${name} returned no data`);
|
|
114
154
|
}
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
155
|
+
// commitState=false (a two-phase foreground pass) deliberately does NOT stamp
|
|
156
|
+
// the repo as synced — the authoritative timestamp is written by the deeper
|
|
157
|
+
// pass that follows, so planSync keeps treating the repo as not-yet-fully-
|
|
158
|
+
// synced (and the cancel endpoint as an initial backfill) until then.
|
|
159
|
+
if (commitState) {
|
|
160
|
+
const now = new Date();
|
|
161
|
+
const statePatch = mode === 'full'
|
|
162
|
+
? { lastFullSyncAt: now, lastIncrementalSyncAt: now }
|
|
163
|
+
: { lastIncrementalSyncAt: now };
|
|
164
|
+
await db
|
|
165
|
+
.insert(syncState)
|
|
166
|
+
.values({ repoId, ...statePatch, lastSyncStatus: 'ok', lastSyncError: null })
|
|
167
|
+
.onConflictDoUpdate({
|
|
168
|
+
target: syncState.repoId,
|
|
169
|
+
set: { ...statePatch, lastSyncStatus: 'ok', lastSyncError: null },
|
|
170
|
+
})
|
|
171
|
+
.execute();
|
|
172
|
+
}
|
|
173
|
+
const phase = commitState ? mode : `${mode} foreground`;
|
|
174
|
+
log.info(`sync ${owner}/${name} [${phase}] done: ${prCount} PRs over ${pages} page(s), ` +
|
|
175
|
+
`cost ${totalCost}, ${lastRemaining} remaining — timing: ${timingSummary()}`);
|
|
176
|
+
// Resume point for a follow-on phase: re-fetch the cutoff page (so its older
|
|
177
|
+
// PRs aren't skipped) if we stopped there, else the final cursor.
|
|
128
178
|
return {
|
|
129
179
|
repoId,
|
|
130
180
|
prCount,
|
|
131
181
|
pages,
|
|
132
182
|
rateLimitRemaining: lastRemaining,
|
|
133
183
|
rateLimitCost: totalCost,
|
|
184
|
+
cancelled: false,
|
|
185
|
+
endCursor: stop ? pageStartCursor : cursor,
|
|
134
186
|
};
|
|
135
187
|
}
|
|
136
188
|
catch (err) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pierre-review",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6",
|
|
4
4
|
"description": "Dashboard for tracking your team's GitHub PR activity across repos — local (SQLite + gh) or self-hosted multi-tenant cloud (Postgres + GitHub App).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"author": "Alex Wakeman",
|