pierre-review 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/config.js CHANGED
@@ -58,6 +58,15 @@ export const config = {
58
58
  host: process.env.HOST ?? (isCloud ? '0.0.0.0' : '127.0.0.1'),
59
59
  dbPath: isCloud || isAbsolute(rawDbUrl) ? rawDbUrl : resolve(backendRoot, rawDbUrl),
60
60
  backfillDays: intFromEnv('BACKFILL_DAYS', 90),
61
+ // First sync runs in two phases: a fast "foreground" window (matching the
62
+ // default timeline range) so the board is usable in seconds, then the rest of
63
+ // backfillDays is fetched in the background. Two-phase only kicks in when
64
+ // backfillDays exceeds this.
65
+ foregroundSyncDays: intFromEnv('FOREGROUND_SYNC_DAYS', 14),
66
+ // How many commit-file REST fetches to keep in flight at once (one pool per
67
+ // page). These draw from the REST quota (disjoint from the GraphQL points
68
+ // pool), so a modest pool safely cuts the dominant sync stage.
69
+ commitFileConcurrency: intFromEnv('COMMIT_FILE_CONCURRENCY', 10),
61
70
  syncCron: process.env.SYNC_CRON ?? '*/5 * * * *',
62
71
  syncOverlapMinutes: intFromEnv('SYNC_OVERLAP_MINUTES', 20),
63
72
  stallThresholdDays: intFromEnv('STALL_THRESHOLD_DAYS', 3),
@@ -7,7 +7,7 @@ const { commitFiles } = schema;
7
7
  * `commit_files` cache and filling misses via REST. SHAs are immutable, so the
8
8
  * cache never expires — re-syncs are free.
9
9
  */
10
- export async function ensureCommitFiles(owner, name, shas, token) {
10
+ export async function ensureCommitFiles(owner, name, shas, token, concurrency = 10) {
11
11
  const result = new Map();
12
12
  const unique = [...new Set(shas)];
13
13
  if (unique.length === 0)
@@ -20,12 +20,13 @@ export async function ensureCommitFiles(owner, name, shas, token) {
20
20
  .execute();
21
21
  for (const row of cached)
22
22
  result.set(row.sha, row.paths);
23
- // Fetch cache misses with bounded concurrency. These REST calls dominate
24
- // sync latency on a PR that just got several commits addressing threads;
25
- // running them serially blocks the whole page loop. SHAs are immutable and
26
- // the cache is idempotent, so parallelism is safe.
23
+ // Fetch cache misses through one continuously-saturated worker pool. These
24
+ // REST calls dominate sync latency; the caller now hands us a whole page's
25
+ // worth of misses at once (rather than per-PR waves), so a fixed set of
26
+ // workers each pull the next SHA the instant they finish — keeping `concurrency`
27
+ // requests in flight the whole time. SHAs are immutable and the cache upsert is
28
+ // idempotent, so order doesn't matter.
27
29
  const missing = unique.filter((sha) => !result.has(sha));
28
- const CONCURRENCY = 5;
29
30
  const fetchOne = async (sha) => {
30
31
  try {
31
32
  const commit = await ghRestGetFor(token, `/repos/${owner}/${name}/commits/${sha}`);
@@ -43,9 +44,14 @@ export async function ensureCommitFiles(owner, name, shas, token) {
43
44
  result.set(sha, []);
44
45
  }
45
46
  };
46
- for (let i = 0; i < missing.length; i += CONCURRENCY) {
47
- await Promise.all(missing.slice(i, i + CONCURRENCY).map(fetchOne));
48
- }
47
+ let next = 0;
48
+ const worker = async () => {
49
+ while (next < missing.length) {
50
+ const sha = missing[next++];
51
+ await fetchOne(sha);
52
+ }
53
+ };
54
+ await Promise.all(Array.from({ length: Math.min(Math.max(1, concurrency), missing.length) }, worker));
49
55
  return result;
50
56
  }
51
57
  //# sourceMappingURL=commit-files.js.map
@@ -142,16 +142,63 @@ export async function runSyncForRepo(repoId, log, opts = {}) {
142
142
  log.error(`sync ${repo.owner}/${repo.name}: no access token for account ${repo.accountId}: ${err instanceof Error ? err.message : err}`);
143
143
  return false;
144
144
  }
145
- const task = syncRepo({
145
+ const common = {
146
146
  owner: repo.owner,
147
147
  name: repo.name,
148
148
  accountId: repo.accountId,
149
149
  token,
150
- ...plan,
151
150
  log,
152
- onProgress: (p) => setSyncProgress(repoId, { ...p, mode: plan.mode }),
151
+ commitFileConcurrency: config.commitFileConcurrency,
153
152
  shouldCancel: () => cancelRequested.has(repoId),
154
- })
153
+ };
154
+ // Two-phase only for a first full backfill (never-synced, not a forced "deep"
155
+ // re-sync) when the backfill window is wider than the foreground window. A deep
156
+ // re-sync stays single-pass — its board is already populated, so there's no
157
+ // blank-board wait to shorten.
158
+ const twoPhase = !opts.forceFull && plan.mode === 'full' && config.backfillDays > config.foregroundSyncDays;
159
+ const runWalk = async () => {
160
+ if (!twoPhase) {
161
+ await syncRepo({
162
+ ...common,
163
+ mode: plan.mode,
164
+ since: plan.since,
165
+ commitState: true,
166
+ onProgress: (p) => setSyncProgress(repoId, { ...p, mode: plan.mode }),
167
+ });
168
+ return;
169
+ }
170
+ // Phase 1 — the fast foreground window (the default timeline range). Committed
171
+ // per-PR so the recent board is usable in seconds, but does NOT stamp
172
+ // syncState, so the repo stays an "initial backfill" until phase 2 finishes.
173
+ const foregroundSince = new Date(Date.now() - config.foregroundSyncDays * DAY_MS);
174
+ const p1 = await syncRepo({
175
+ ...common,
176
+ mode: 'full',
177
+ since: foregroundSince,
178
+ commitState: false,
179
+ onProgress: (p) => setSyncProgress(repoId, { ...p, mode: 'full', foregroundComplete: false }),
180
+ });
181
+ if (p1.cancelled)
182
+ return;
183
+ // Foreground done — flip the flag so the UI drops the user into the recent
184
+ // view, then continue the SAME cursor walk back to the full backfill window.
185
+ setSyncProgress(repoId, {
186
+ percent: 1,
187
+ prsProcessed: p1.prCount,
188
+ pages: p1.pages,
189
+ mode: 'full',
190
+ foregroundComplete: true,
191
+ });
192
+ await syncRepo({
193
+ ...common,
194
+ mode: 'full',
195
+ since: plan.since, // now − backfillDays
196
+ startCursor: p1.endCursor,
197
+ commitState: true,
198
+ onProgress: (p) => setSyncProgress(repoId, { ...p, mode: 'full', foregroundComplete: true }),
199
+ });
200
+ };
201
+ const task = runWalk()
155
202
  .catch((err) => {
156
203
  log.error(`background sync ${repo.owner}/${repo.name} failed: ${err instanceof Error ? err.message : err}`);
157
204
  })
@@ -193,6 +240,8 @@ export async function syncAllRepos(log) {
193
240
  accountId: repo.accountId,
194
241
  token,
195
242
  ...plan,
243
+ commitState: true,
244
+ commitFileConcurrency: config.commitFileConcurrency,
196
245
  log,
197
246
  onProgress: (p) => setSyncProgress(r.id, { ...p, mode: plan.mode }),
198
247
  shouldCancel: () => cancelRequested.has(r.id),
@@ -1,3 +1,4 @@
1
+ import { performance } from 'node:perf_hooks';
1
2
  import { eq } from 'drizzle-orm';
2
3
  import { db, schema } from '../db/client.js';
3
4
  import { getGraphqlClientFor } from '../github/client.js';
@@ -15,15 +16,28 @@ function clamp01(n) {
15
16
  }
16
17
  export async function syncRepo(opts) {
17
18
  const { owner, name, accountId, mode, since, onProgress } = opts;
19
+ const commitState = opts.commitState ?? true;
18
20
  const log = opts.log ?? consoleLogger;
19
21
  const client = getGraphqlClientFor(opts.token);
20
22
  const resolver = createUserResolver();
21
- let cursor = null;
23
+ let cursor = opts.startCursor ?? null;
24
+ // The `after` value used to fetch the page currently being processed. When we
25
+ // stop at the `since` cutoff mid-page, this (not the page's endCursor) is what a
26
+ // follow-on phase resumes from, so the cutoff page's older PRs aren't skipped.
27
+ let pageStartCursor = cursor;
22
28
  let repoId = null;
23
29
  let prCount = 0;
24
30
  let pages = 0;
25
31
  let totalCost = 0;
26
32
  let lastRemaining = 0;
33
+ // Per-stage wall-clock accumulators, so the final log attributes the 2-3 min:
34
+ // page fetch (network/GraphQL) vs commit-file REST fan-out vs DB persist. This
35
+ // is the baseline that tells us which stage to optimise next.
36
+ let graphqlMs = 0;
37
+ let commitFilesMs = 0;
38
+ let persistMs = 0;
39
+ const timingSummary = () => `graphql ${(graphqlMs / 1000).toFixed(1)}s / commit-files ` +
40
+ `${(commitFilesMs / 1000).toFixed(1)}s / persist ${(persistMs / 1000).toFixed(1)}s`;
27
41
  // Time-walked progress: PRs arrive newest-first and we stop at `since`, so the
28
42
  // span [since .. newest] is the work and the current PR's updatedAt marks how
29
43
  // far through it we are.
@@ -46,11 +60,14 @@ export async function syncRepo(opts) {
46
60
  cancelled = true;
47
61
  break;
48
62
  }
63
+ pageStartCursor = cursor;
64
+ const tPage = performance.now();
49
65
  const resp = await client(REPO_ACTIVITY_QUERY, {
50
66
  owner,
51
67
  name,
52
68
  cursor,
53
69
  });
70
+ graphqlMs += performance.now() - tPage;
54
71
  pages += 1;
55
72
  totalCost += resp.rateLimit.cost;
56
73
  lastRemaining = resp.rateLimit.remaining;
@@ -63,6 +80,11 @@ export async function syncRepo(opts) {
63
80
  repoId = await upsertRepo(owner, name, resp.repository.id, resp.repository.defaultBranchRef?.name ?? null, accountId);
64
81
  }
65
82
  const { nodes, pageInfo } = resp.repository.pullRequests;
83
+ // First select the in-window PRs on this page and gather every commit SHA
84
+ // whose changed files we need (commits that could plausibly have addressed
85
+ // an open thread, i.e. landed after its last comment).
86
+ const pagePrs = [];
87
+ const pageShas = [];
66
88
  for (const pr of nodes) {
67
89
  if (opts.shouldCancel?.()) {
68
90
  cancelled = true;
@@ -74,20 +96,36 @@ export async function syncRepo(opts) {
74
96
  break;
75
97
  }
76
98
  newestMs ??= updatedMs;
77
- // Only fetch changed-files for commits that could plausibly have
78
- // addressed an open thread (after its last comment).
79
99
  const unresolved = pr.reviewThreads.nodes.filter((t) => !t.isResolved && t.comments.nodes.length > 0);
80
- let shas = [];
81
100
  if (unresolved.length > 0) {
82
101
  const threshold = Math.min(...unresolved.map((t) => Date.parse(t.comments.nodes.at(-1).createdAt)));
83
- shas = pr.commits.nodes
84
- .filter((c) => Date.parse(c.commit.committedDate) > threshold)
85
- .map((c) => c.commit.oid);
102
+ for (const c of pr.commits.nodes) {
103
+ if (Date.parse(c.commit.committedDate) > threshold)
104
+ pageShas.push(c.commit.oid);
105
+ }
106
+ }
107
+ pagePrs.push({ pr, updatedMs });
108
+ }
109
+ // Fetch the whole page's commit files in one saturated pool (replacing the
110
+ // old per-PR serial waves), then persist each in-window PR. persistPr only
111
+ // reads the SHAs its own commits need, so a page-wide superset map is fine.
112
+ // Skip entirely if a cancel arrived mid-gather — don't do network/DB work
113
+ // we're about to throw away.
114
+ if (!cancelled && pagePrs.length > 0) {
115
+ const tFiles = performance.now();
116
+ const commitFilesBySha = await ensureCommitFiles(owner, name, pageShas, opts.token, opts.commitFileConcurrency);
117
+ commitFilesMs += performance.now() - tFiles;
118
+ for (const { pr, updatedMs } of pagePrs) {
119
+ if (opts.shouldCancel?.()) {
120
+ cancelled = true;
121
+ break;
122
+ }
123
+ const tPersist = performance.now();
124
+ await persistPr(pr, repoId, resolver, commitFilesBySha, accountId);
125
+ persistMs += performance.now() - tPersist;
126
+ prCount += 1;
127
+ reportProgress(updatedMs);
86
128
  }
87
- const commitFilesBySha = await ensureCommitFiles(owner, name, shas, opts.token);
88
- await persistPr(pr, repoId, resolver, commitFilesBySha, accountId);
89
- prCount += 1;
90
- reportProgress(updatedMs);
91
129
  }
92
130
  cursor = pageInfo.endCursor;
93
131
  if (cancelled || stop || !pageInfo.hasNextPage)
@@ -98,13 +136,15 @@ export async function syncRepo(opts) {
98
136
  // safely delete it. Already-persisted PRs are harmless (idempotent) and get
99
137
  // cleaned up with the repo, or resumed on the next sync for an existing repo.
100
138
  if (cancelled) {
101
- log.info(`sync ${owner}/${name} cancelled after ${prCount} PRs / ${pages} page(s)`);
139
+ log.info(`sync ${owner}/${name} cancelled after ${prCount} PRs / ${pages} page(s) — timing: ${timingSummary()}`);
102
140
  return {
103
141
  repoId: repoId ?? -1,
104
142
  prCount,
105
143
  pages,
106
144
  rateLimitRemaining: lastRemaining,
107
145
  rateLimitCost: totalCost,
146
+ cancelled: true,
147
+ endCursor: pageStartCursor,
108
148
  };
109
149
  }
110
150
  // Reached the cutoff / last page — the walk is complete.
@@ -112,25 +152,37 @@ export async function syncRepo(opts) {
112
152
  if (repoId === null) {
113
153
  throw new Error(`Repository ${owner}/${name} returned no data`);
114
154
  }
115
- const now = new Date();
116
- const statePatch = mode === 'full'
117
- ? { lastFullSyncAt: now, lastIncrementalSyncAt: now }
118
- : { lastIncrementalSyncAt: now };
119
- await db
120
- .insert(syncState)
121
- .values({ repoId, ...statePatch, lastSyncStatus: 'ok', lastSyncError: null })
122
- .onConflictDoUpdate({
123
- target: syncState.repoId,
124
- set: { ...statePatch, lastSyncStatus: 'ok', lastSyncError: null },
125
- })
126
- .execute();
127
- log.info(`sync ${owner}/${name} [${mode}] done: ${prCount} PRs over ${pages} page(s), cost ${totalCost}, ${lastRemaining} remaining`);
155
+ // commitState=false (a two-phase foreground pass) deliberately does NOT stamp
156
+ // the repo as synced — the authoritative timestamp is written by the deeper
157
+ // pass that follows, so planSync keeps treating the repo as not-yet-fully-
158
+ // synced (and the cancel endpoint as an initial backfill) until then.
159
+ if (commitState) {
160
+ const now = new Date();
161
+ const statePatch = mode === 'full'
162
+ ? { lastFullSyncAt: now, lastIncrementalSyncAt: now }
163
+ : { lastIncrementalSyncAt: now };
164
+ await db
165
+ .insert(syncState)
166
+ .values({ repoId, ...statePatch, lastSyncStatus: 'ok', lastSyncError: null })
167
+ .onConflictDoUpdate({
168
+ target: syncState.repoId,
169
+ set: { ...statePatch, lastSyncStatus: 'ok', lastSyncError: null },
170
+ })
171
+ .execute();
172
+ }
173
+ const phase = commitState ? mode : `${mode} foreground`;
174
+ log.info(`sync ${owner}/${name} [${phase}] done: ${prCount} PRs over ${pages} page(s), ` +
175
+ `cost ${totalCost}, ${lastRemaining} remaining — timing: ${timingSummary()}`);
176
+ // Resume point for a follow-on phase: re-fetch the cutoff page (so its older
177
+ // PRs aren't skipped) if we stopped there, else the final cursor.
128
178
  return {
129
179
  repoId,
130
180
  prCount,
131
181
  pages,
132
182
  rateLimitRemaining: lastRemaining,
133
183
  rateLimitCost: totalCost,
184
+ cancelled: false,
185
+ endCursor: stop ? pageStartCursor : cursor,
134
186
  };
135
187
  }
136
188
  catch (err) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pierre-review",
3
- "version": "0.1.5",
3
+ "version": "0.1.6",
4
4
  "description": "Dashboard for tracking your team's GitHub PR activity across repos — local (SQLite + gh) or self-hosted multi-tenant cloud (Postgres + GitHub App).",
5
5
  "type": "module",
6
6
  "author": "Alex Wakeman",