@ghcrawl/api-core 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/service.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import http from 'node:http';
2
2
  import crypto from 'node:crypto';
3
3
  import { IterableMapper } from '@shutterstock/p-map-iterable';
4
- import { actionResponseSchema, clusterDetailResponseSchema, clusterResultSchema, clusterSummariesResponseSchema, clustersResponseSchema, embedResultSchema, healthResponseSchema, neighborsResponseSchema, refreshResponseSchema, repositoriesResponseSchema, searchResponseSchema, syncResultSchema, threadsResponseSchema, } from '@ghcrawl/api-contract';
4
+ import { actionResponseSchema, authorThreadsResponseSchema, closeResponseSchema, clusterDetailResponseSchema, clusterResultSchema, clusterSummariesResponseSchema, clustersResponseSchema, embedResultSchema, healthResponseSchema, neighborsResponseSchema, refreshResponseSchema, repositoriesResponseSchema, searchResponseSchema, syncResultSchema, threadsResponseSchema, } from '@ghcrawl/api-contract';
5
5
  import { buildClusters } from './cluster/build.js';
6
6
  import { ensureRuntimeDirs, isLikelyGitHubToken, isLikelyOpenAiApiKey, loadConfig, requireGithubToken, requireOpenAiKey, } from './config.js';
7
7
  import { migrate } from './db/migrate.js';
@@ -27,6 +27,9 @@ function parseIso(value) {
27
27
  const parsed = Date.parse(value);
28
28
  return Number.isNaN(parsed) ? null : parsed;
29
29
  }
30
+ function isEffectivelyClosed(row) {
31
+ return row.state !== 'open' || row.closed_at_local !== null;
32
+ }
30
33
  function isMissingGitHubResourceError(error) {
31
34
  const status = typeof error?.status === 'number' ? Number(error.status) : null;
32
35
  if (status === 404 || status === 410) {
@@ -150,6 +153,10 @@ function threadToDto(row, clusterId) {
150
153
  number: row.number,
151
154
  kind: row.kind,
152
155
  state: row.state,
156
+ isClosed: isEffectivelyClosed(row),
157
+ closedAtGh: row.closed_at_gh ?? null,
158
+ closedAtLocal: row.closed_at_local ?? null,
159
+ closeReasonLocal: row.close_reason_local ?? null,
153
160
  title: row.title,
154
161
  body: row.body,
155
162
  authorLogin: row.author_login,
@@ -260,8 +267,11 @@ export class GHCrawlService {
260
267
  .all(repository.id, repository.id);
261
268
  for (const row of clusterRows)
262
269
  clusterIds.set(row.thread_id, row.cluster_id);
263
- let sql = "select * from threads where repo_id = ? and state = 'open'";
270
+ let sql = 'select * from threads where repo_id = ?';
264
271
  const args = [repository.id];
272
+ if (!params.includeClosed) {
273
+ sql += " and state = 'open' and closed_at_local is null";
274
+ }
265
275
  if (params.kind) {
266
276
  sql += ' and kind = ?';
267
277
  args.push(params.kind);
@@ -291,6 +301,145 @@ export class GHCrawlService {
291
301
  threads: orderedRows.map((row) => threadToDto(row, clusterIds.get(row.id) ?? null)),
292
302
  });
293
303
  }
304
+ listAuthorThreads(params) {
305
+ const repository = this.requireRepository(params.owner, params.repo);
306
+ const normalizedLogin = params.login.trim();
307
+ if (!normalizedLogin) {
308
+ return authorThreadsResponseSchema.parse({
309
+ repository,
310
+ authorLogin: '',
311
+ threads: [],
312
+ });
313
+ }
314
+ const clusterIds = new Map();
315
+ const clusterRows = this.db
316
+ .prepare(`select cm.thread_id, cm.cluster_id
317
+ from cluster_members cm
318
+ join clusters c on c.id = cm.cluster_id
319
+ where c.repo_id = ? and c.cluster_run_id = (
320
+ select id from cluster_runs where repo_id = ? and status = 'completed' order by id desc limit 1
321
+ )`)
322
+ .all(repository.id, repository.id);
323
+ for (const row of clusterRows)
324
+ clusterIds.set(row.thread_id, row.cluster_id);
325
+ const rows = this.db
326
+ .prepare(`select *
327
+ from threads
328
+ where repo_id = ? and lower(author_login) = lower(?)
329
+ ${params.includeClosed ? '' : "and state = 'open' and closed_at_local is null"}
330
+ order by updated_at_gh desc, number desc`)
331
+ .all(repository.id, normalizedLogin);
332
+ const latestRun = this.getLatestClusterRun(repository.id);
333
+ const strongestByThread = new Map();
334
+ if (latestRun && rows.length > 1) {
335
+ const edges = this.db
336
+ .prepare(`select
337
+ se.left_thread_id,
338
+ se.right_thread_id,
339
+ se.score,
340
+ t1.number as left_number,
341
+ t1.kind as left_kind,
342
+ t1.title as left_title,
343
+ t2.number as right_number,
344
+ t2.kind as right_kind,
345
+ t2.title as right_title
346
+ from similarity_edges se
347
+ join threads t1 on t1.id = se.left_thread_id
348
+ join threads t2 on t2.id = se.right_thread_id
349
+ where se.repo_id = ?
350
+ and se.cluster_run_id = ?
351
+ and lower(t1.author_login) = lower(?)
352
+ and lower(t2.author_login) = lower(?)
353
+ ${params.includeClosed ? '' : "and t1.state = 'open' and t1.closed_at_local is null and t2.state = 'open' and t2.closed_at_local is null"}`)
354
+ .all(repository.id, latestRun.id, normalizedLogin, normalizedLogin);
355
+ const updateStrongest = (sourceThreadId, match) => {
356
+ const previous = strongestByThread.get(sourceThreadId);
357
+ if (!previous || match.score > previous.score) {
358
+ strongestByThread.set(sourceThreadId, match);
359
+ }
360
+ };
361
+ for (const edge of edges) {
362
+ updateStrongest(edge.left_thread_id, {
363
+ threadId: edge.right_thread_id,
364
+ number: edge.right_number,
365
+ kind: edge.right_kind,
366
+ title: edge.right_title,
367
+ score: edge.score,
368
+ });
369
+ updateStrongest(edge.right_thread_id, {
370
+ threadId: edge.left_thread_id,
371
+ number: edge.left_number,
372
+ kind: edge.left_kind,
373
+ title: edge.left_title,
374
+ score: edge.score,
375
+ });
376
+ }
377
+ }
378
+ return authorThreadsResponseSchema.parse({
379
+ repository,
380
+ authorLogin: normalizedLogin,
381
+ threads: rows.map((row) => ({
382
+ thread: threadToDto(row, clusterIds.get(row.id) ?? null),
383
+ strongestSameAuthorMatch: strongestByThread.get(row.id) ?? null,
384
+ })),
385
+ });
386
+ }
387
+ closeThreadLocally(params) {
388
+ const repository = this.requireRepository(params.owner, params.repo);
389
+ const row = this.db
390
+ .prepare('select * from threads where repo_id = ? and number = ? limit 1')
391
+ .get(repository.id, params.threadNumber);
392
+ if (!row) {
393
+ throw new Error(`Thread #${params.threadNumber} was not found for ${repository.fullName}.`);
394
+ }
395
+ const closedAt = nowIso();
396
+ this.db
397
+ .prepare(`update threads
398
+ set closed_at_local = ?,
399
+ close_reason_local = 'manual',
400
+ updated_at = ?
401
+ where id = ?`)
402
+ .run(closedAt, closedAt, row.id);
403
+ this.parsedEmbeddingCache.delete(repository.id);
404
+ const clusterIds = this.getLatestRunClusterIdsForThread(repository.id, row.id);
405
+ const clusterClosed = this.reconcileClusterCloseState(repository.id, clusterIds) > 0;
406
+ const updated = this.db.prepare('select * from threads where id = ? limit 1').get(row.id);
407
+ return closeResponseSchema.parse({
408
+ ok: true,
409
+ repository,
410
+ thread: threadToDto(updated),
411
+ clusterId: clusterIds[0] ?? null,
412
+ clusterClosed,
413
+ message: `Marked ${updated.kind} #${updated.number} closed locally.`,
414
+ });
415
+ }
416
+ closeClusterLocally(params) {
417
+ const repository = this.requireRepository(params.owner, params.repo);
418
+ const latestRun = this.getLatestClusterRun(repository.id);
419
+ if (!latestRun) {
420
+ throw new Error(`No completed cluster run found for ${repository.fullName}.`);
421
+ }
422
+ const row = this.db
423
+ .prepare('select id from clusters where repo_id = ? and cluster_run_id = ? and id = ? limit 1')
424
+ .get(repository.id, latestRun.id, params.clusterId);
425
+ if (!row) {
426
+ throw new Error(`Cluster ${params.clusterId} was not found for ${repository.fullName}.`);
427
+ }
428
+ const closedAt = nowIso();
429
+ this.db
430
+ .prepare(`update clusters
431
+ set closed_at_local = ?,
432
+ close_reason_local = 'manual'
433
+ where id = ?`)
434
+ .run(closedAt, row.id);
435
+ return closeResponseSchema.parse({
436
+ ok: true,
437
+ repository,
438
+ clusterId: row.id,
439
+ clusterClosed: true,
440
+ message: `Marked cluster ${row.id} closed locally.`,
441
+ });
442
+ }
294
443
  async syncRepository(params) {
295
444
  const crawlStartedAt = params.startedAt ?? nowIso();
296
445
  const includeComments = params.includeComments ?? false;
@@ -385,6 +534,10 @@ export class GHCrawlService {
385
534
  })
386
535
  : 0;
387
536
  const threadsClosed = threadsClosedFromClosedSweep + threadsClosedFromDirectReconcile;
537
+ this.parsedEmbeddingCache.delete(repoId);
538
+ if (threadsClosed > 0) {
539
+ this.reconcileClusterCloseState(repoId);
540
+ }
388
541
  const finishedAt = nowIso();
389
542
  const reconciledOpenCloseAt = shouldSweepClosedOverlap || shouldReconcileMissingOpenThreads ? finishedAt : null;
390
543
  const nextSyncCursor = {
@@ -596,7 +749,7 @@ export class GHCrawlService {
596
749
  from documents_fts
597
750
  join documents d on d.id = documents_fts.rowid
598
751
  join threads t on t.id = d.thread_id
599
- where t.repo_id = ? and t.state = 'open' and documents_fts match ?
752
+ where t.repo_id = ? and t.state = 'open' and t.closed_at_local is null and documents_fts match ?
600
753
  order by rank
601
754
  limit ?`)
602
755
  .all(repository.id, params.query, limit * 2);
@@ -618,7 +771,7 @@ export class GHCrawlService {
618
771
  const threadRows = candidateIds.size
619
772
  ? this.db
620
773
  .prepare(`select * from threads
621
- where repo_id = ? and state = 'open' and id in (${[...candidateIds].map(() => '?').join(',')})
774
+ where repo_id = ? and state = 'open' and closed_at_local is null and id in (${[...candidateIds].map(() => '?').join(',')})
622
775
  order by updated_at_gh desc, number desc`)
623
776
  .all(repository.id, ...candidateIds)
624
777
  : [];
@@ -730,7 +883,8 @@ export class GHCrawlService {
730
883
  }
731
884
  const rows = this.db
732
885
  .prepare(`select c.id, c.repo_id, c.representative_thread_id, c.member_count,
733
- cm.thread_id, cm.score_to_representative, t.number, t.kind, t.title
886
+ c.closed_at_local, c.close_reason_local,
887
+ cm.thread_id, cm.score_to_representative, t.number, t.kind, t.title, t.state, t.closed_at_local as thread_closed_at_local
734
888
  from clusters c
735
889
  left join cluster_members cm on cm.cluster_id = c.id
736
890
  left join threads t on t.id = cm.thread_id
@@ -742,6 +896,9 @@ export class GHCrawlService {
742
896
  const cluster = clusters.get(row.id) ?? {
743
897
  id: row.id,
744
898
  repoId: row.repo_id,
899
+ isClosed: row.close_reason_local !== null,
900
+ closedAtLocal: row.closed_at_local,
901
+ closeReasonLocal: row.close_reason_local,
745
902
  representativeThreadId: row.representative_thread_id,
746
903
  memberCount: row.member_count,
747
904
  members: [],
@@ -751,15 +908,20 @@ export class GHCrawlService {
751
908
  threadId: row.thread_id,
752
909
  number: row.number,
753
910
  kind: row.kind,
911
+ isClosed: row.state !== null && isEffectivelyClosed({ state: row.state, closed_at_local: row.thread_closed_at_local }),
754
912
  title: row.title,
755
913
  scoreToRepresentative: row.score_to_representative,
756
914
  });
757
915
  }
758
916
  clusters.set(row.id, cluster);
759
917
  }
918
+ const clusterValues = Array.from(clusters.values()).map((cluster) => ({
919
+ ...cluster,
920
+ isClosed: cluster.isClosed || (cluster.memberCount > 0 && cluster.members.every((member) => member.isClosed)),
921
+ }));
760
922
  return clustersResponseSchema.parse({
761
923
  repository,
762
- clusters: Array.from(clusters.values()),
924
+ clusters: clusterValues.filter((cluster) => (params.includeClosed ? true : !cluster.isClosed)),
763
925
  });
764
926
  }
765
927
  async refreshRepository(params) {
@@ -814,6 +976,7 @@ export class GHCrawlService {
814
976
  minSize: params.minSize,
815
977
  sort: params.sort,
816
978
  search: params.search,
979
+ includeClosedClusters: params.includeClosed === true,
817
980
  });
818
981
  const clusters = params.limit ? snapshot.clusters.slice(0, params.limit) : snapshot.clusters;
819
982
  return clusterSummariesResponseSchema.parse({
@@ -822,6 +985,9 @@ export class GHCrawlService {
822
985
  clusters: clusters.map((cluster) => ({
823
986
  clusterId: cluster.clusterId,
824
987
  displayTitle: cluster.displayTitle,
988
+ isClosed: cluster.isClosed,
989
+ closedAtLocal: cluster.closedAtLocal,
990
+ closeReasonLocal: cluster.closeReasonLocal,
825
991
  totalCount: cluster.totalCount,
826
992
  issueCount: cluster.issueCount,
827
993
  pullRequestCount: cluster.pullRequestCount,
@@ -837,6 +1003,7 @@ export class GHCrawlService {
837
1003
  owner: params.owner,
838
1004
  repo: params.repo,
839
1005
  minSize: 0,
1006
+ includeClosedClusters: params.includeClosed === true,
840
1007
  });
841
1008
  const cluster = snapshot.clusters.find((item) => item.clusterId === params.clusterId);
842
1009
  if (!cluster) {
@@ -846,6 +1013,7 @@ export class GHCrawlService {
846
1013
  owner: params.owner,
847
1014
  repo: params.repo,
848
1015
  clusterId: params.clusterId,
1016
+ clusterRunId: snapshot.clusterRunId ?? undefined,
849
1017
  });
850
1018
  const members = detail.members.slice(0, params.memberLimit ?? detail.members.length).map((member) => {
851
1019
  const threadDetail = this.getTuiThreadDetail({
@@ -869,6 +1037,9 @@ export class GHCrawlService {
869
1037
  cluster: {
870
1038
  clusterId: cluster.clusterId,
871
1039
  displayTitle: cluster.displayTitle,
1040
+ isClosed: cluster.isClosed,
1041
+ closedAtLocal: cluster.closedAtLocal,
1042
+ closeReasonLocal: cluster.closeReasonLocal,
872
1043
  totalCount: cluster.totalCount,
873
1044
  issueCount: cluster.issueCount,
874
1045
  pullRequestCount: cluster.pullRequestCount,
@@ -885,9 +1056,11 @@ export class GHCrawlService {
885
1056
  const stats = this.getTuiRepoStats(repository.id);
886
1057
  const latestRun = this.getLatestClusterRun(repository.id);
887
1058
  if (!latestRun) {
888
- return { repository, stats, clusters: [] };
1059
+ return { repository, stats, clusterRunId: null, clusters: [] };
889
1060
  }
1061
+ const includeClosedClusters = params.includeClosedClusters ?? true;
890
1062
  const clusters = this.listRawTuiClusters(repository.id, latestRun.id)
1063
+ .filter((cluster) => (includeClosedClusters ? true : !cluster.isClosed))
891
1064
  .filter((cluster) => cluster.totalCount >= (params.minSize ?? 10))
892
1065
  .filter((cluster) => {
893
1066
  const search = params.search?.trim().toLowerCase();
@@ -899,21 +1072,23 @@ export class GHCrawlService {
899
1072
  return {
900
1073
  repository,
901
1074
  stats,
1075
+ clusterRunId: latestRun.id,
902
1076
  clusters,
903
1077
  };
904
1078
  }
905
1079
  getTuiClusterDetail(params) {
906
1080
  const repository = this.requireRepository(params.owner, params.repo);
907
- const latestRun = this.getLatestClusterRun(repository.id);
908
- if (!latestRun) {
1081
+ const clusterRunId = params.clusterRunId ??
1082
+ (this.getLatestClusterRun(repository.id)?.id ?? null);
1083
+ if (!clusterRunId) {
909
1084
  throw new Error(`No completed cluster run found for ${repository.fullName}. Run cluster first.`);
910
1085
  }
911
- const summary = this.listRawTuiClusters(repository.id, latestRun.id).find((cluster) => cluster.clusterId === params.clusterId);
1086
+ const summary = this.getRawTuiClusterSummary(repository.id, clusterRunId, params.clusterId);
912
1087
  if (!summary) {
913
1088
  throw new Error(`Cluster ${params.clusterId} was not found for ${repository.fullName}.`);
914
1089
  }
915
1090
  const rows = this.db
916
- .prepare(`select t.id, t.number, t.kind, t.title, t.updated_at_gh, t.html_url, t.labels_json, cm.score_to_representative
1091
+ .prepare(`select t.id, t.number, t.kind, t.state, t.closed_at_local, t.title, t.updated_at_gh, t.html_url, t.labels_json, cm.score_to_representative
917
1092
  from cluster_members cm
918
1093
  join threads t on t.id = cm.thread_id
919
1094
  where cm.cluster_id = ?
@@ -925,6 +1100,9 @@ export class GHCrawlService {
925
1100
  return {
926
1101
  clusterId: summary.clusterId,
927
1102
  displayTitle: summary.displayTitle,
1103
+ isClosed: summary.isClosed,
1104
+ closedAtLocal: summary.closedAtLocal,
1105
+ closeReasonLocal: summary.closeReasonLocal,
928
1106
  totalCount: summary.totalCount,
929
1107
  issueCount: summary.issueCount,
930
1108
  pullRequestCount: summary.pullRequestCount,
@@ -936,6 +1114,7 @@ export class GHCrawlService {
936
1114
  id: row.id,
937
1115
  number: row.number,
938
1116
  kind: row.kind,
1117
+ isClosed: isEffectivelyClosed(row),
939
1118
  title: row.title,
940
1119
  updatedAtGh: row.updated_at_gh,
941
1120
  htmlUrl: row.html_url,
@@ -948,11 +1127,11 @@ export class GHCrawlService {
948
1127
  const repository = this.requireRepository(params.owner, params.repo);
949
1128
  const row = params.threadId
950
1129
  ? (this.db
951
- .prepare('select * from threads where repo_id = ? and id = ? and state = \'open\' limit 1')
1130
+ .prepare('select * from threads where repo_id = ? and id = ? limit 1')
952
1131
  .get(repository.id, params.threadId) ?? null)
953
1132
  : params.threadNumber
954
1133
  ? (this.db
955
- .prepare('select * from threads where repo_id = ? and number = ? and state = \'open\' limit 1')
1134
+ .prepare('select * from threads where repo_id = ? and number = ? limit 1')
956
1135
  .get(repository.id, params.threadNumber) ?? null)
957
1136
  : null;
958
1137
  if (!row) {
@@ -1112,7 +1291,7 @@ export class GHCrawlService {
1112
1291
  const counts = this.db
1113
1292
  .prepare(`select kind, count(*) as count
1114
1293
  from threads
1115
- where repo_id = ? and state = 'open'
1294
+ where repo_id = ? and state = 'open' and closed_at_local is null
1116
1295
  group by kind`)
1117
1296
  .all(repoId);
1118
1297
  const latestRun = this.getLatestClusterRun(repoId);
@@ -1140,11 +1319,73 @@ export class GHCrawlService {
1140
1319
  .prepare("select id, finished_at from cluster_runs where repo_id = ? and status = 'completed' order by id desc limit 1")
1141
1320
  .get(repoId) ?? null);
1142
1321
  }
1322
+ getLatestRunClusterIdsForThread(repoId, threadId) {
1323
+ const latestRun = this.getLatestClusterRun(repoId);
1324
+ if (!latestRun) {
1325
+ return [];
1326
+ }
1327
+ return this.db
1328
+ .prepare(`select cm.cluster_id
1329
+ from cluster_members cm
1330
+ join clusters c on c.id = cm.cluster_id
1331
+ where c.repo_id = ? and c.cluster_run_id = ? and cm.thread_id = ?
1332
+ order by cm.cluster_id asc`)
1333
+ .all(repoId, latestRun.id, threadId).map((row) => row.cluster_id);
1334
+ }
1335
+ reconcileClusterCloseState(repoId, clusterIds) {
1336
+ const latestRun = this.getLatestClusterRun(repoId);
1337
+ if (!latestRun) {
1338
+ return 0;
1339
+ }
1340
+ const resolvedClusterIds = clusterIds && clusterIds.length > 0
1341
+ ? Array.from(new Set(clusterIds))
1342
+ : this.db
1343
+ .prepare('select id from clusters where repo_id = ? and cluster_run_id = ? order by id asc')
1344
+ .all(repoId, latestRun.id).map((row) => row.id);
1345
+ if (resolvedClusterIds.length === 0) {
1346
+ return 0;
1347
+ }
1348
+ const summarize = this.db.prepare(`select
1349
+ c.id,
1350
+ c.close_reason_local,
1351
+ count(*) as member_count,
1352
+ sum(case when t.state != 'open' or t.closed_at_local is not null then 1 else 0 end) as closed_member_count
1353
+ from clusters c
1354
+ join cluster_members cm on cm.cluster_id = c.id
1355
+ join threads t on t.id = cm.thread_id
1356
+ where c.id = ?
1357
+ group by c.id, c.close_reason_local`);
1358
+ const markClosed = this.db.prepare(`update clusters
1359
+ set closed_at_local = coalesce(closed_at_local, ?),
1360
+ close_reason_local = 'all_members_closed'
1361
+ where id = ?`);
1362
+ const clearClosed = this.db.prepare(`update clusters
1363
+ set closed_at_local = null,
1364
+ close_reason_local = null
1365
+ where id = ? and close_reason_local = 'all_members_closed'`);
1366
+ let changed = 0;
1367
+ for (const clusterId of resolvedClusterIds) {
1368
+ const row = summarize.get(clusterId);
1369
+ if (!row || row.close_reason_local === 'manual') {
1370
+ continue;
1371
+ }
1372
+ if (row.member_count > 0 && row.closed_member_count >= row.member_count) {
1373
+ const result = markClosed.run(nowIso(), clusterId);
1374
+ changed += result.changes;
1375
+ continue;
1376
+ }
1377
+ const cleared = clearClosed.run(clusterId);
1378
+ changed += cleared.changes;
1379
+ }
1380
+ return changed;
1381
+ }
1143
1382
  listRawTuiClusters(repoId, clusterRunId) {
1144
1383
  const rows = this.db
1145
1384
  .prepare(`select
1146
1385
  c.id as cluster_id,
1147
1386
  c.member_count,
1387
+ c.closed_at_local,
1388
+ c.close_reason_local,
1148
1389
  c.representative_thread_id,
1149
1390
  rt.number as representative_number,
1150
1391
  rt.kind as representative_kind,
@@ -1152,6 +1393,7 @@ export class GHCrawlService {
1152
1393
  max(coalesce(t.updated_at_gh, t.updated_at)) as latest_updated_at,
1153
1394
  sum(case when t.kind = 'issue' then 1 else 0 end) as issue_count,
1154
1395
  sum(case when t.kind = 'pull_request' then 1 else 0 end) as pull_request_count,
1396
+ sum(case when t.state != 'open' or t.closed_at_local is not null then 1 else 0 end) as closed_member_count,
1155
1397
  group_concat(lower(coalesce(t.title, '')), ' ') as search_text
1156
1398
  from clusters c
1157
1399
  left join threads rt on rt.id = c.representative_thread_id
@@ -1161,6 +1403,8 @@ export class GHCrawlService {
1161
1403
  group by
1162
1404
  c.id,
1163
1405
  c.member_count,
1406
+ c.closed_at_local,
1407
+ c.close_reason_local,
1164
1408
  c.representative_thread_id,
1165
1409
  rt.number,
1166
1410
  rt.kind,
@@ -1169,6 +1413,9 @@ export class GHCrawlService {
1169
1413
  return rows.map((row) => ({
1170
1414
  clusterId: row.cluster_id,
1171
1415
  displayTitle: row.representative_title ?? `Cluster ${row.cluster_id}`,
1416
+ isClosed: row.close_reason_local !== null || row.closed_member_count >= row.member_count,
1417
+ closedAtLocal: row.closed_at_local,
1418
+ closeReasonLocal: row.close_reason_local,
1172
1419
  totalCount: row.member_count,
1173
1420
  issueCount: row.issue_count,
1174
1421
  pullRequestCount: row.pull_request_count,
@@ -1179,6 +1426,56 @@ export class GHCrawlService {
1179
1426
  searchText: `${(row.representative_title ?? '').toLowerCase()} ${row.search_text ?? ''}`.trim(),
1180
1427
  }));
1181
1428
  }
1429
+ getRawTuiClusterSummary(repoId, clusterRunId, clusterId) {
1430
+ const row = this.db
1431
+ .prepare(`select
1432
+ c.id as cluster_id,
1433
+ c.member_count,
1434
+ c.closed_at_local,
1435
+ c.close_reason_local,
1436
+ c.representative_thread_id,
1437
+ rt.number as representative_number,
1438
+ rt.kind as representative_kind,
1439
+ rt.title as representative_title,
1440
+ max(coalesce(t.updated_at_gh, t.updated_at)) as latest_updated_at,
1441
+ sum(case when t.kind = 'issue' then 1 else 0 end) as issue_count,
1442
+ sum(case when t.kind = 'pull_request' then 1 else 0 end) as pull_request_count,
1443
+ sum(case when t.state != 'open' or t.closed_at_local is not null then 1 else 0 end) as closed_member_count,
1444
+ group_concat(lower(coalesce(t.title, '')), ' ') as search_text
1445
+ from clusters c
1446
+ left join threads rt on rt.id = c.representative_thread_id
1447
+ join cluster_members cm on cm.cluster_id = c.id
1448
+ join threads t on t.id = cm.thread_id
1449
+ where c.repo_id = ? and c.cluster_run_id = ? and c.id = ?
1450
+ group by
1451
+ c.id,
1452
+ c.member_count,
1453
+ c.closed_at_local,
1454
+ c.close_reason_local,
1455
+ c.representative_thread_id,
1456
+ rt.number,
1457
+ rt.kind,
1458
+ rt.title`)
1459
+ .get(repoId, clusterRunId, clusterId);
1460
+ if (!row) {
1461
+ return null;
1462
+ }
1463
+ return {
1464
+ clusterId: row.cluster_id,
1465
+ displayTitle: row.representative_title ?? `Cluster ${row.cluster_id}`,
1466
+ isClosed: row.close_reason_local !== null || row.closed_member_count >= row.member_count,
1467
+ closedAtLocal: row.closed_at_local,
1468
+ closeReasonLocal: row.close_reason_local,
1469
+ totalCount: row.member_count,
1470
+ issueCount: row.issue_count,
1471
+ pullRequestCount: row.pull_request_count,
1472
+ latestUpdatedAt: row.latest_updated_at,
1473
+ representativeThreadId: row.representative_thread_id,
1474
+ representativeNumber: row.representative_number,
1475
+ representativeKind: row.representative_kind,
1476
+ searchText: `${(row.representative_title ?? '').toLowerCase()} ${row.search_text ?? ''}`.trim(),
1477
+ };
1478
+ }
1182
1479
  compareTuiClusterSummary(left, right, sort) {
1183
1480
  const leftTime = left.latestUpdatedAt ? Date.parse(left.latestUpdatedAt) : 0;
1184
1481
  const rightTime = right.latestUpdatedAt ? Date.parse(right.latestUpdatedAt) : 0;
@@ -1306,6 +1603,7 @@ export class GHCrawlService {
1306
1603
  from threads
1307
1604
  where repo_id = ?
1308
1605
  and state = 'open'
1606
+ and closed_at_local is null
1309
1607
  and (last_pulled_at is null or last_pulled_at < ?)
1310
1608
  order by number asc`)
1311
1609
  .all(params.repoId, params.crawlStartedAt);
@@ -1350,6 +1648,7 @@ export class GHCrawlService {
1350
1648
  from threads
1351
1649
  where repo_id = ?
1352
1650
  and state = 'open'
1651
+ and closed_at_local is null
1353
1652
  and (last_pulled_at is null or last_pulled_at < ?)
1354
1653
  order by number asc`)
1355
1654
  .all(params.repoId, params.crawlStartedAt);
@@ -1637,11 +1936,12 @@ export class GHCrawlService {
1637
1936
  }
1638
1937
  loadStoredEmbeddings(repoId) {
1639
1938
  return this.db
1640
- .prepare(`select t.id, t.repo_id, t.number, t.kind, t.state, t.title, t.body, t.author_login, t.html_url, t.labels_json,
1939
+ .prepare(`select t.id, t.repo_id, t.number, t.kind, t.state, t.closed_at_gh, t.closed_at_local, t.close_reason_local,
1940
+ t.title, t.body, t.author_login, t.html_url, t.labels_json,
1641
1941
  t.updated_at_gh, t.first_pulled_at, t.last_pulled_at, e.source_kind, e.embedding_json
1642
1942
  from threads t
1643
1943
  join document_embeddings e on e.thread_id = t.id
1644
- where t.repo_id = ? and t.state = 'open' and e.model = ?
1944
+ where t.repo_id = ? and t.state = 'open' and t.closed_at_local is null and e.model = ?
1645
1945
  order by t.number asc, e.source_kind asc`)
1646
1946
  .all(repoId, this.config.embedModel);
1647
1947
  }
@@ -1688,7 +1988,9 @@ export class GHCrawlService {
1688
1988
  and se.cluster_run_id = ?
1689
1989
  and (se.left_thread_id = ? or se.right_thread_id = ?)
1690
1990
  and t1.state = 'open'
1991
+ and t1.closed_at_local is null
1691
1992
  and t2.state = 'open'
1993
+ and t2.closed_at_local is null
1692
1994
  order by se.score desc
1693
1995
  limit ?`)
1694
1996
  .all(threadId, threadId, threadId, threadId, repoId, latestRun.id, threadId, threadId, limit);
@@ -1703,7 +2005,7 @@ export class GHCrawlService {
1703
2005
  getEmbeddingWorkset(repoId, threadNumber) {
1704
2006
  let sql = `select t.id, t.number, t.title, t.body
1705
2007
  from threads t
1706
- where t.repo_id = ? and t.state = 'open'`;
2008
+ where t.repo_id = ? and t.state = 'open' and t.closed_at_local is null`;
1707
2009
  const args = [repoId];
1708
2010
  if (threadNumber) {
1709
2011
  sql += ' and t.number = ?';
@@ -1736,7 +2038,7 @@ export class GHCrawlService {
1736
2038
  let sql = `select s.thread_id, s.summary_kind, s.summary_text
1737
2039
  from document_summaries s
1738
2040
  join threads t on t.id = s.thread_id
1739
- where t.repo_id = ? and t.state = 'open' and s.model = ?`;
2041
+ where t.repo_id = ? and t.state = 'open' and t.closed_at_local is null and s.model = ?`;
1740
2042
  const args = [repoId, this.config.summaryModel];
1741
2043
  if (threadNumber) {
1742
2044
  sql += ' and t.number = ?';