@ghcrawl/api-core 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/server.d.ts.map +1 -1
- package/dist/api/server.js +30 -3
- package/dist/api/server.js.map +1 -1
- package/dist/config.d.ts +3 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +7 -2
- package/dist/config.js.map +1 -1
- package/dist/db/migrate.d.ts.map +1 -1
- package/dist/db/migrate.js +13 -0
- package/dist/db/migrate.js.map +1 -1
- package/dist/service.d.ts +31 -1
- package/dist/service.d.ts.map +1 -1
- package/dist/service.js +261 -14
- package/dist/service.js.map +1 -1
- package/package.json +2 -2
package/dist/service.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import http from 'node:http';
|
|
2
2
|
import crypto from 'node:crypto';
|
|
3
3
|
import { IterableMapper } from '@shutterstock/p-map-iterable';
|
|
4
|
-
import { actionResponseSchema, clusterDetailResponseSchema, clusterResultSchema, clusterSummariesResponseSchema, clustersResponseSchema, embedResultSchema, healthResponseSchema, neighborsResponseSchema, refreshResponseSchema, repositoriesResponseSchema, searchResponseSchema, syncResultSchema, threadsResponseSchema, } from '@ghcrawl/api-contract';
|
|
4
|
+
import { actionResponseSchema, authorThreadsResponseSchema, closeResponseSchema, clusterDetailResponseSchema, clusterResultSchema, clusterSummariesResponseSchema, clustersResponseSchema, embedResultSchema, healthResponseSchema, neighborsResponseSchema, refreshResponseSchema, repositoriesResponseSchema, searchResponseSchema, syncResultSchema, threadsResponseSchema, } from '@ghcrawl/api-contract';
|
|
5
5
|
import { buildClusters } from './cluster/build.js';
|
|
6
6
|
import { ensureRuntimeDirs, isLikelyGitHubToken, isLikelyOpenAiApiKey, loadConfig, requireGithubToken, requireOpenAiKey, } from './config.js';
|
|
7
7
|
import { migrate } from './db/migrate.js';
|
|
@@ -27,6 +27,9 @@ function parseIso(value) {
|
|
|
27
27
|
const parsed = Date.parse(value);
|
|
28
28
|
return Number.isNaN(parsed) ? null : parsed;
|
|
29
29
|
}
|
|
30
|
+
function isEffectivelyClosed(row) {
|
|
31
|
+
return row.state !== 'open' || row.closed_at_local !== null;
|
|
32
|
+
}
|
|
30
33
|
function isMissingGitHubResourceError(error) {
|
|
31
34
|
const status = typeof error?.status === 'number' ? Number(error.status) : null;
|
|
32
35
|
if (status === 404 || status === 410) {
|
|
@@ -150,6 +153,10 @@ function threadToDto(row, clusterId) {
|
|
|
150
153
|
number: row.number,
|
|
151
154
|
kind: row.kind,
|
|
152
155
|
state: row.state,
|
|
156
|
+
isClosed: isEffectivelyClosed(row),
|
|
157
|
+
closedAtGh: row.closed_at_gh ?? null,
|
|
158
|
+
closedAtLocal: row.closed_at_local ?? null,
|
|
159
|
+
closeReasonLocal: row.close_reason_local ?? null,
|
|
153
160
|
title: row.title,
|
|
154
161
|
body: row.body,
|
|
155
162
|
authorLogin: row.author_login,
|
|
@@ -260,8 +267,11 @@ export class GHCrawlService {
|
|
|
260
267
|
.all(repository.id, repository.id);
|
|
261
268
|
for (const row of clusterRows)
|
|
262
269
|
clusterIds.set(row.thread_id, row.cluster_id);
|
|
263
|
-
let sql =
|
|
270
|
+
let sql = 'select * from threads where repo_id = ?';
|
|
264
271
|
const args = [repository.id];
|
|
272
|
+
if (!params.includeClosed) {
|
|
273
|
+
sql += " and state = 'open' and closed_at_local is null";
|
|
274
|
+
}
|
|
265
275
|
if (params.kind) {
|
|
266
276
|
sql += ' and kind = ?';
|
|
267
277
|
args.push(params.kind);
|
|
@@ -291,6 +301,145 @@ export class GHCrawlService {
|
|
|
291
301
|
threads: orderedRows.map((row) => threadToDto(row, clusterIds.get(row.id) ?? null)),
|
|
292
302
|
});
|
|
293
303
|
}
|
|
304
|
+
listAuthorThreads(params) {
|
|
305
|
+
const repository = this.requireRepository(params.owner, params.repo);
|
|
306
|
+
const normalizedLogin = params.login.trim();
|
|
307
|
+
if (!normalizedLogin) {
|
|
308
|
+
return authorThreadsResponseSchema.parse({
|
|
309
|
+
repository,
|
|
310
|
+
authorLogin: '',
|
|
311
|
+
threads: [],
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
const clusterIds = new Map();
|
|
315
|
+
const clusterRows = this.db
|
|
316
|
+
.prepare(`select cm.thread_id, cm.cluster_id
|
|
317
|
+
from cluster_members cm
|
|
318
|
+
join clusters c on c.id = cm.cluster_id
|
|
319
|
+
where c.repo_id = ? and c.cluster_run_id = (
|
|
320
|
+
select id from cluster_runs where repo_id = ? and status = 'completed' order by id desc limit 1
|
|
321
|
+
)`)
|
|
322
|
+
.all(repository.id, repository.id);
|
|
323
|
+
for (const row of clusterRows)
|
|
324
|
+
clusterIds.set(row.thread_id, row.cluster_id);
|
|
325
|
+
const rows = this.db
|
|
326
|
+
.prepare(`select *
|
|
327
|
+
from threads
|
|
328
|
+
where repo_id = ? and lower(author_login) = lower(?)
|
|
329
|
+
${params.includeClosed ? '' : "and state = 'open' and closed_at_local is null"}
|
|
330
|
+
order by updated_at_gh desc, number desc`)
|
|
331
|
+
.all(repository.id, normalizedLogin);
|
|
332
|
+
const latestRun = this.getLatestClusterRun(repository.id);
|
|
333
|
+
const strongestByThread = new Map();
|
|
334
|
+
if (latestRun && rows.length > 1) {
|
|
335
|
+
const edges = this.db
|
|
336
|
+
.prepare(`select
|
|
337
|
+
se.left_thread_id,
|
|
338
|
+
se.right_thread_id,
|
|
339
|
+
se.score,
|
|
340
|
+
t1.number as left_number,
|
|
341
|
+
t1.kind as left_kind,
|
|
342
|
+
t1.title as left_title,
|
|
343
|
+
t2.number as right_number,
|
|
344
|
+
t2.kind as right_kind,
|
|
345
|
+
t2.title as right_title
|
|
346
|
+
from similarity_edges se
|
|
347
|
+
join threads t1 on t1.id = se.left_thread_id
|
|
348
|
+
join threads t2 on t2.id = se.right_thread_id
|
|
349
|
+
where se.repo_id = ?
|
|
350
|
+
and se.cluster_run_id = ?
|
|
351
|
+
and lower(t1.author_login) = lower(?)
|
|
352
|
+
and lower(t2.author_login) = lower(?)
|
|
353
|
+
${params.includeClosed ? '' : "and t1.state = 'open' and t1.closed_at_local is null and t2.state = 'open' and t2.closed_at_local is null"}`)
|
|
354
|
+
.all(repository.id, latestRun.id, normalizedLogin, normalizedLogin);
|
|
355
|
+
const updateStrongest = (sourceThreadId, match) => {
|
|
356
|
+
const previous = strongestByThread.get(sourceThreadId);
|
|
357
|
+
if (!previous || match.score > previous.score) {
|
|
358
|
+
strongestByThread.set(sourceThreadId, match);
|
|
359
|
+
}
|
|
360
|
+
};
|
|
361
|
+
for (const edge of edges) {
|
|
362
|
+
updateStrongest(edge.left_thread_id, {
|
|
363
|
+
threadId: edge.right_thread_id,
|
|
364
|
+
number: edge.right_number,
|
|
365
|
+
kind: edge.right_kind,
|
|
366
|
+
title: edge.right_title,
|
|
367
|
+
score: edge.score,
|
|
368
|
+
});
|
|
369
|
+
updateStrongest(edge.right_thread_id, {
|
|
370
|
+
threadId: edge.left_thread_id,
|
|
371
|
+
number: edge.left_number,
|
|
372
|
+
kind: edge.left_kind,
|
|
373
|
+
title: edge.left_title,
|
|
374
|
+
score: edge.score,
|
|
375
|
+
});
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
return authorThreadsResponseSchema.parse({
|
|
379
|
+
repository,
|
|
380
|
+
authorLogin: normalizedLogin,
|
|
381
|
+
threads: rows.map((row) => ({
|
|
382
|
+
thread: threadToDto(row, clusterIds.get(row.id) ?? null),
|
|
383
|
+
strongestSameAuthorMatch: strongestByThread.get(row.id) ?? null,
|
|
384
|
+
})),
|
|
385
|
+
});
|
|
386
|
+
}
|
|
387
|
+
closeThreadLocally(params) {
|
|
388
|
+
const repository = this.requireRepository(params.owner, params.repo);
|
|
389
|
+
const row = this.db
|
|
390
|
+
.prepare('select * from threads where repo_id = ? and number = ? limit 1')
|
|
391
|
+
.get(repository.id, params.threadNumber);
|
|
392
|
+
if (!row) {
|
|
393
|
+
throw new Error(`Thread #${params.threadNumber} was not found for ${repository.fullName}.`);
|
|
394
|
+
}
|
|
395
|
+
const closedAt = nowIso();
|
|
396
|
+
this.db
|
|
397
|
+
.prepare(`update threads
|
|
398
|
+
set closed_at_local = ?,
|
|
399
|
+
close_reason_local = 'manual',
|
|
400
|
+
updated_at = ?
|
|
401
|
+
where id = ?`)
|
|
402
|
+
.run(closedAt, closedAt, row.id);
|
|
403
|
+
this.parsedEmbeddingCache.delete(repository.id);
|
|
404
|
+
const clusterIds = this.getLatestRunClusterIdsForThread(repository.id, row.id);
|
|
405
|
+
const clusterClosed = this.reconcileClusterCloseState(repository.id, clusterIds) > 0;
|
|
406
|
+
const updated = this.db.prepare('select * from threads where id = ? limit 1').get(row.id);
|
|
407
|
+
return closeResponseSchema.parse({
|
|
408
|
+
ok: true,
|
|
409
|
+
repository,
|
|
410
|
+
thread: threadToDto(updated),
|
|
411
|
+
clusterId: clusterIds[0] ?? null,
|
|
412
|
+
clusterClosed,
|
|
413
|
+
message: `Marked ${updated.kind} #${updated.number} closed locally.`,
|
|
414
|
+
});
|
|
415
|
+
}
|
|
416
|
+
closeClusterLocally(params) {
|
|
417
|
+
const repository = this.requireRepository(params.owner, params.repo);
|
|
418
|
+
const latestRun = this.getLatestClusterRun(repository.id);
|
|
419
|
+
if (!latestRun) {
|
|
420
|
+
throw new Error(`No completed cluster run found for ${repository.fullName}.`);
|
|
421
|
+
}
|
|
422
|
+
const row = this.db
|
|
423
|
+
.prepare('select id from clusters where repo_id = ? and cluster_run_id = ? and id = ? limit 1')
|
|
424
|
+
.get(repository.id, latestRun.id, params.clusterId);
|
|
425
|
+
if (!row) {
|
|
426
|
+
throw new Error(`Cluster ${params.clusterId} was not found for ${repository.fullName}.`);
|
|
427
|
+
}
|
|
428
|
+
const closedAt = nowIso();
|
|
429
|
+
this.db
|
|
430
|
+
.prepare(`update clusters
|
|
431
|
+
set closed_at_local = ?,
|
|
432
|
+
close_reason_local = 'manual'
|
|
433
|
+
where id = ?`)
|
|
434
|
+
.run(closedAt, row.id);
|
|
435
|
+
return closeResponseSchema.parse({
|
|
436
|
+
ok: true,
|
|
437
|
+
repository,
|
|
438
|
+
clusterId: row.id,
|
|
439
|
+
clusterClosed: true,
|
|
440
|
+
message: `Marked cluster ${row.id} closed locally.`,
|
|
441
|
+
});
|
|
442
|
+
}
|
|
294
443
|
async syncRepository(params) {
|
|
295
444
|
const crawlStartedAt = params.startedAt ?? nowIso();
|
|
296
445
|
const includeComments = params.includeComments ?? false;
|
|
@@ -385,6 +534,10 @@ export class GHCrawlService {
|
|
|
385
534
|
})
|
|
386
535
|
: 0;
|
|
387
536
|
const threadsClosed = threadsClosedFromClosedSweep + threadsClosedFromDirectReconcile;
|
|
537
|
+
this.parsedEmbeddingCache.delete(repoId);
|
|
538
|
+
if (threadsClosed > 0) {
|
|
539
|
+
this.reconcileClusterCloseState(repoId);
|
|
540
|
+
}
|
|
388
541
|
const finishedAt = nowIso();
|
|
389
542
|
const reconciledOpenCloseAt = shouldSweepClosedOverlap || shouldReconcileMissingOpenThreads ? finishedAt : null;
|
|
390
543
|
const nextSyncCursor = {
|
|
@@ -596,7 +749,7 @@ export class GHCrawlService {
|
|
|
596
749
|
from documents_fts
|
|
597
750
|
join documents d on d.id = documents_fts.rowid
|
|
598
751
|
join threads t on t.id = d.thread_id
|
|
599
|
-
|
|
752
|
+
where t.repo_id = ? and t.state = 'open' and t.closed_at_local is null and documents_fts match ?
|
|
600
753
|
order by rank
|
|
601
754
|
limit ?`)
|
|
602
755
|
.all(repository.id, params.query, limit * 2);
|
|
@@ -618,7 +771,7 @@ export class GHCrawlService {
|
|
|
618
771
|
const threadRows = candidateIds.size
|
|
619
772
|
? this.db
|
|
620
773
|
.prepare(`select * from threads
|
|
621
|
-
where repo_id = ? and state = 'open' and id in (${[...candidateIds].map(() => '?').join(',')})
|
|
774
|
+
where repo_id = ? and state = 'open' and closed_at_local is null and id in (${[...candidateIds].map(() => '?').join(',')})
|
|
622
775
|
order by updated_at_gh desc, number desc`)
|
|
623
776
|
.all(repository.id, ...candidateIds)
|
|
624
777
|
: [];
|
|
@@ -730,7 +883,8 @@ export class GHCrawlService {
|
|
|
730
883
|
}
|
|
731
884
|
const rows = this.db
|
|
732
885
|
.prepare(`select c.id, c.repo_id, c.representative_thread_id, c.member_count,
|
|
733
|
-
|
|
886
|
+
c.closed_at_local, c.close_reason_local,
|
|
887
|
+
cm.thread_id, cm.score_to_representative, t.number, t.kind, t.title, t.state, t.closed_at_local as thread_closed_at_local
|
|
734
888
|
from clusters c
|
|
735
889
|
left join cluster_members cm on cm.cluster_id = c.id
|
|
736
890
|
left join threads t on t.id = cm.thread_id
|
|
@@ -742,6 +896,9 @@ export class GHCrawlService {
|
|
|
742
896
|
const cluster = clusters.get(row.id) ?? {
|
|
743
897
|
id: row.id,
|
|
744
898
|
repoId: row.repo_id,
|
|
899
|
+
isClosed: row.close_reason_local !== null,
|
|
900
|
+
closedAtLocal: row.closed_at_local,
|
|
901
|
+
closeReasonLocal: row.close_reason_local,
|
|
745
902
|
representativeThreadId: row.representative_thread_id,
|
|
746
903
|
memberCount: row.member_count,
|
|
747
904
|
members: [],
|
|
@@ -751,15 +908,20 @@ export class GHCrawlService {
|
|
|
751
908
|
threadId: row.thread_id,
|
|
752
909
|
number: row.number,
|
|
753
910
|
kind: row.kind,
|
|
911
|
+
isClosed: row.state !== null && isEffectivelyClosed({ state: row.state, closed_at_local: row.thread_closed_at_local }),
|
|
754
912
|
title: row.title,
|
|
755
913
|
scoreToRepresentative: row.score_to_representative,
|
|
756
914
|
});
|
|
757
915
|
}
|
|
758
916
|
clusters.set(row.id, cluster);
|
|
759
917
|
}
|
|
918
|
+
const clusterValues = Array.from(clusters.values()).map((cluster) => ({
|
|
919
|
+
...cluster,
|
|
920
|
+
isClosed: cluster.isClosed || (cluster.memberCount > 0 && cluster.members.every((member) => member.isClosed)),
|
|
921
|
+
}));
|
|
760
922
|
return clustersResponseSchema.parse({
|
|
761
923
|
repository,
|
|
762
|
-
clusters:
|
|
924
|
+
clusters: clusterValues.filter((cluster) => (params.includeClosed ? true : !cluster.isClosed)),
|
|
763
925
|
});
|
|
764
926
|
}
|
|
765
927
|
async refreshRepository(params) {
|
|
@@ -814,6 +976,7 @@ export class GHCrawlService {
|
|
|
814
976
|
minSize: params.minSize,
|
|
815
977
|
sort: params.sort,
|
|
816
978
|
search: params.search,
|
|
979
|
+
includeClosedClusters: params.includeClosed === true,
|
|
817
980
|
});
|
|
818
981
|
const clusters = params.limit ? snapshot.clusters.slice(0, params.limit) : snapshot.clusters;
|
|
819
982
|
return clusterSummariesResponseSchema.parse({
|
|
@@ -822,6 +985,9 @@ export class GHCrawlService {
|
|
|
822
985
|
clusters: clusters.map((cluster) => ({
|
|
823
986
|
clusterId: cluster.clusterId,
|
|
824
987
|
displayTitle: cluster.displayTitle,
|
|
988
|
+
isClosed: cluster.isClosed,
|
|
989
|
+
closedAtLocal: cluster.closedAtLocal,
|
|
990
|
+
closeReasonLocal: cluster.closeReasonLocal,
|
|
825
991
|
totalCount: cluster.totalCount,
|
|
826
992
|
issueCount: cluster.issueCount,
|
|
827
993
|
pullRequestCount: cluster.pullRequestCount,
|
|
@@ -837,6 +1003,7 @@ export class GHCrawlService {
|
|
|
837
1003
|
owner: params.owner,
|
|
838
1004
|
repo: params.repo,
|
|
839
1005
|
minSize: 0,
|
|
1006
|
+
includeClosedClusters: params.includeClosed === true,
|
|
840
1007
|
});
|
|
841
1008
|
const cluster = snapshot.clusters.find((item) => item.clusterId === params.clusterId);
|
|
842
1009
|
if (!cluster) {
|
|
@@ -869,6 +1036,9 @@ export class GHCrawlService {
|
|
|
869
1036
|
cluster: {
|
|
870
1037
|
clusterId: cluster.clusterId,
|
|
871
1038
|
displayTitle: cluster.displayTitle,
|
|
1039
|
+
isClosed: cluster.isClosed,
|
|
1040
|
+
closedAtLocal: cluster.closedAtLocal,
|
|
1041
|
+
closeReasonLocal: cluster.closeReasonLocal,
|
|
872
1042
|
totalCount: cluster.totalCount,
|
|
873
1043
|
issueCount: cluster.issueCount,
|
|
874
1044
|
pullRequestCount: cluster.pullRequestCount,
|
|
@@ -887,7 +1057,9 @@ export class GHCrawlService {
|
|
|
887
1057
|
if (!latestRun) {
|
|
888
1058
|
return { repository, stats, clusters: [] };
|
|
889
1059
|
}
|
|
1060
|
+
const includeClosedClusters = params.includeClosedClusters ?? true;
|
|
890
1061
|
const clusters = this.listRawTuiClusters(repository.id, latestRun.id)
|
|
1062
|
+
.filter((cluster) => (includeClosedClusters ? true : !cluster.isClosed))
|
|
891
1063
|
.filter((cluster) => cluster.totalCount >= (params.minSize ?? 10))
|
|
892
1064
|
.filter((cluster) => {
|
|
893
1065
|
const search = params.search?.trim().toLowerCase();
|
|
@@ -913,7 +1085,7 @@ export class GHCrawlService {
|
|
|
913
1085
|
throw new Error(`Cluster ${params.clusterId} was not found for ${repository.fullName}.`);
|
|
914
1086
|
}
|
|
915
1087
|
const rows = this.db
|
|
916
|
-
.prepare(`select t.id, t.number, t.kind, t.title, t.updated_at_gh, t.html_url, t.labels_json, cm.score_to_representative
|
|
1088
|
+
.prepare(`select t.id, t.number, t.kind, t.state, t.closed_at_local, t.title, t.updated_at_gh, t.html_url, t.labels_json, cm.score_to_representative
|
|
917
1089
|
from cluster_members cm
|
|
918
1090
|
join threads t on t.id = cm.thread_id
|
|
919
1091
|
where cm.cluster_id = ?
|
|
@@ -925,6 +1097,9 @@ export class GHCrawlService {
|
|
|
925
1097
|
return {
|
|
926
1098
|
clusterId: summary.clusterId,
|
|
927
1099
|
displayTitle: summary.displayTitle,
|
|
1100
|
+
isClosed: summary.isClosed,
|
|
1101
|
+
closedAtLocal: summary.closedAtLocal,
|
|
1102
|
+
closeReasonLocal: summary.closeReasonLocal,
|
|
928
1103
|
totalCount: summary.totalCount,
|
|
929
1104
|
issueCount: summary.issueCount,
|
|
930
1105
|
pullRequestCount: summary.pullRequestCount,
|
|
@@ -936,6 +1111,7 @@ export class GHCrawlService {
|
|
|
936
1111
|
id: row.id,
|
|
937
1112
|
number: row.number,
|
|
938
1113
|
kind: row.kind,
|
|
1114
|
+
isClosed: isEffectivelyClosed(row),
|
|
939
1115
|
title: row.title,
|
|
940
1116
|
updatedAtGh: row.updated_at_gh,
|
|
941
1117
|
htmlUrl: row.html_url,
|
|
@@ -948,11 +1124,11 @@ export class GHCrawlService {
|
|
|
948
1124
|
const repository = this.requireRepository(params.owner, params.repo);
|
|
949
1125
|
const row = params.threadId
|
|
950
1126
|
? (this.db
|
|
951
|
-
.prepare('select * from threads where repo_id = ? and id = ?
|
|
1127
|
+
.prepare('select * from threads where repo_id = ? and id = ? limit 1')
|
|
952
1128
|
.get(repository.id, params.threadId) ?? null)
|
|
953
1129
|
: params.threadNumber
|
|
954
1130
|
? (this.db
|
|
955
|
-
.prepare('select * from threads where repo_id = ? and number = ?
|
|
1131
|
+
.prepare('select * from threads where repo_id = ? and number = ? limit 1')
|
|
956
1132
|
.get(repository.id, params.threadNumber) ?? null)
|
|
957
1133
|
: null;
|
|
958
1134
|
if (!row) {
|
|
@@ -1112,7 +1288,7 @@ export class GHCrawlService {
|
|
|
1112
1288
|
const counts = this.db
|
|
1113
1289
|
.prepare(`select kind, count(*) as count
|
|
1114
1290
|
from threads
|
|
1115
|
-
where repo_id = ? and state = 'open'
|
|
1291
|
+
where repo_id = ? and state = 'open' and closed_at_local is null
|
|
1116
1292
|
group by kind`)
|
|
1117
1293
|
.all(repoId);
|
|
1118
1294
|
const latestRun = this.getLatestClusterRun(repoId);
|
|
@@ -1140,11 +1316,73 @@ export class GHCrawlService {
|
|
|
1140
1316
|
.prepare("select id, finished_at from cluster_runs where repo_id = ? and status = 'completed' order by id desc limit 1")
|
|
1141
1317
|
.get(repoId) ?? null);
|
|
1142
1318
|
}
|
|
1319
|
+
getLatestRunClusterIdsForThread(repoId, threadId) {
|
|
1320
|
+
const latestRun = this.getLatestClusterRun(repoId);
|
|
1321
|
+
if (!latestRun) {
|
|
1322
|
+
return [];
|
|
1323
|
+
}
|
|
1324
|
+
return this.db
|
|
1325
|
+
.prepare(`select cm.cluster_id
|
|
1326
|
+
from cluster_members cm
|
|
1327
|
+
join clusters c on c.id = cm.cluster_id
|
|
1328
|
+
where c.repo_id = ? and c.cluster_run_id = ? and cm.thread_id = ?
|
|
1329
|
+
order by cm.cluster_id asc`)
|
|
1330
|
+
.all(repoId, latestRun.id, threadId).map((row) => row.cluster_id);
|
|
1331
|
+
}
|
|
1332
|
+
reconcileClusterCloseState(repoId, clusterIds) {
|
|
1333
|
+
const latestRun = this.getLatestClusterRun(repoId);
|
|
1334
|
+
if (!latestRun) {
|
|
1335
|
+
return 0;
|
|
1336
|
+
}
|
|
1337
|
+
const resolvedClusterIds = clusterIds && clusterIds.length > 0
|
|
1338
|
+
? Array.from(new Set(clusterIds))
|
|
1339
|
+
: this.db
|
|
1340
|
+
.prepare('select id from clusters where repo_id = ? and cluster_run_id = ? order by id asc')
|
|
1341
|
+
.all(repoId, latestRun.id).map((row) => row.id);
|
|
1342
|
+
if (resolvedClusterIds.length === 0) {
|
|
1343
|
+
return 0;
|
|
1344
|
+
}
|
|
1345
|
+
const summarize = this.db.prepare(`select
|
|
1346
|
+
c.id,
|
|
1347
|
+
c.close_reason_local,
|
|
1348
|
+
count(*) as member_count,
|
|
1349
|
+
sum(case when t.state != 'open' or t.closed_at_local is not null then 1 else 0 end) as closed_member_count
|
|
1350
|
+
from clusters c
|
|
1351
|
+
join cluster_members cm on cm.cluster_id = c.id
|
|
1352
|
+
join threads t on t.id = cm.thread_id
|
|
1353
|
+
where c.id = ?
|
|
1354
|
+
group by c.id, c.close_reason_local`);
|
|
1355
|
+
const markClosed = this.db.prepare(`update clusters
|
|
1356
|
+
set closed_at_local = coalesce(closed_at_local, ?),
|
|
1357
|
+
close_reason_local = 'all_members_closed'
|
|
1358
|
+
where id = ?`);
|
|
1359
|
+
const clearClosed = this.db.prepare(`update clusters
|
|
1360
|
+
set closed_at_local = null,
|
|
1361
|
+
close_reason_local = null
|
|
1362
|
+
where id = ? and close_reason_local = 'all_members_closed'`);
|
|
1363
|
+
let changed = 0;
|
|
1364
|
+
for (const clusterId of resolvedClusterIds) {
|
|
1365
|
+
const row = summarize.get(clusterId);
|
|
1366
|
+
if (!row || row.close_reason_local === 'manual') {
|
|
1367
|
+
continue;
|
|
1368
|
+
}
|
|
1369
|
+
if (row.member_count > 0 && row.closed_member_count >= row.member_count) {
|
|
1370
|
+
const result = markClosed.run(nowIso(), clusterId);
|
|
1371
|
+
changed += result.changes;
|
|
1372
|
+
continue;
|
|
1373
|
+
}
|
|
1374
|
+
const cleared = clearClosed.run(clusterId);
|
|
1375
|
+
changed += cleared.changes;
|
|
1376
|
+
}
|
|
1377
|
+
return changed;
|
|
1378
|
+
}
|
|
1143
1379
|
listRawTuiClusters(repoId, clusterRunId) {
|
|
1144
1380
|
const rows = this.db
|
|
1145
1381
|
.prepare(`select
|
|
1146
1382
|
c.id as cluster_id,
|
|
1147
1383
|
c.member_count,
|
|
1384
|
+
c.closed_at_local,
|
|
1385
|
+
c.close_reason_local,
|
|
1148
1386
|
c.representative_thread_id,
|
|
1149
1387
|
rt.number as representative_number,
|
|
1150
1388
|
rt.kind as representative_kind,
|
|
@@ -1152,6 +1390,7 @@ export class GHCrawlService {
|
|
|
1152
1390
|
max(coalesce(t.updated_at_gh, t.updated_at)) as latest_updated_at,
|
|
1153
1391
|
sum(case when t.kind = 'issue' then 1 else 0 end) as issue_count,
|
|
1154
1392
|
sum(case when t.kind = 'pull_request' then 1 else 0 end) as pull_request_count,
|
|
1393
|
+
sum(case when t.state != 'open' or t.closed_at_local is not null then 1 else 0 end) as closed_member_count,
|
|
1155
1394
|
group_concat(lower(coalesce(t.title, '')), ' ') as search_text
|
|
1156
1395
|
from clusters c
|
|
1157
1396
|
left join threads rt on rt.id = c.representative_thread_id
|
|
@@ -1169,6 +1408,9 @@ export class GHCrawlService {
|
|
|
1169
1408
|
return rows.map((row) => ({
|
|
1170
1409
|
clusterId: row.cluster_id,
|
|
1171
1410
|
displayTitle: row.representative_title ?? `Cluster ${row.cluster_id}`,
|
|
1411
|
+
isClosed: row.close_reason_local !== null || row.closed_member_count >= row.member_count,
|
|
1412
|
+
closedAtLocal: row.closed_at_local,
|
|
1413
|
+
closeReasonLocal: row.close_reason_local,
|
|
1172
1414
|
totalCount: row.member_count,
|
|
1173
1415
|
issueCount: row.issue_count,
|
|
1174
1416
|
pullRequestCount: row.pull_request_count,
|
|
@@ -1306,6 +1548,7 @@ export class GHCrawlService {
|
|
|
1306
1548
|
from threads
|
|
1307
1549
|
where repo_id = ?
|
|
1308
1550
|
and state = 'open'
|
|
1551
|
+
and closed_at_local is null
|
|
1309
1552
|
and (last_pulled_at is null or last_pulled_at < ?)
|
|
1310
1553
|
order by number asc`)
|
|
1311
1554
|
.all(params.repoId, params.crawlStartedAt);
|
|
@@ -1350,6 +1593,7 @@ export class GHCrawlService {
|
|
|
1350
1593
|
from threads
|
|
1351
1594
|
where repo_id = ?
|
|
1352
1595
|
and state = 'open'
|
|
1596
|
+
and closed_at_local is null
|
|
1353
1597
|
and (last_pulled_at is null or last_pulled_at < ?)
|
|
1354
1598
|
order by number asc`)
|
|
1355
1599
|
.all(params.repoId, params.crawlStartedAt);
|
|
@@ -1637,11 +1881,12 @@ export class GHCrawlService {
|
|
|
1637
1881
|
}
|
|
1638
1882
|
loadStoredEmbeddings(repoId) {
|
|
1639
1883
|
return this.db
|
|
1640
|
-
.prepare(`select t.id, t.repo_id, t.number, t.kind, t.state, t.
|
|
1884
|
+
.prepare(`select t.id, t.repo_id, t.number, t.kind, t.state, t.closed_at_gh, t.closed_at_local, t.close_reason_local,
|
|
1885
|
+
t.title, t.body, t.author_login, t.html_url, t.labels_json,
|
|
1641
1886
|
t.updated_at_gh, t.first_pulled_at, t.last_pulled_at, e.source_kind, e.embedding_json
|
|
1642
1887
|
from threads t
|
|
1643
1888
|
join document_embeddings e on e.thread_id = t.id
|
|
1644
|
-
where t.repo_id = ? and t.state = 'open' and e.model = ?
|
|
1889
|
+
where t.repo_id = ? and t.state = 'open' and t.closed_at_local is null and e.model = ?
|
|
1645
1890
|
order by t.number asc, e.source_kind asc`)
|
|
1646
1891
|
.all(repoId, this.config.embedModel);
|
|
1647
1892
|
}
|
|
@@ -1688,7 +1933,9 @@ export class GHCrawlService {
|
|
|
1688
1933
|
and se.cluster_run_id = ?
|
|
1689
1934
|
and (se.left_thread_id = ? or se.right_thread_id = ?)
|
|
1690
1935
|
and t1.state = 'open'
|
|
1936
|
+
and t1.closed_at_local is null
|
|
1691
1937
|
and t2.state = 'open'
|
|
1938
|
+
and t2.closed_at_local is null
|
|
1692
1939
|
order by se.score desc
|
|
1693
1940
|
limit ?`)
|
|
1694
1941
|
.all(threadId, threadId, threadId, threadId, repoId, latestRun.id, threadId, threadId, limit);
|
|
@@ -1703,7 +1950,7 @@ export class GHCrawlService {
|
|
|
1703
1950
|
getEmbeddingWorkset(repoId, threadNumber) {
|
|
1704
1951
|
let sql = `select t.id, t.number, t.title, t.body
|
|
1705
1952
|
from threads t
|
|
1706
|
-
where t.repo_id = ? and t.state = 'open'`;
|
|
1953
|
+
where t.repo_id = ? and t.state = 'open' and t.closed_at_local is null`;
|
|
1707
1954
|
const args = [repoId];
|
|
1708
1955
|
if (threadNumber) {
|
|
1709
1956
|
sql += ' and t.number = ?';
|
|
@@ -1736,7 +1983,7 @@ export class GHCrawlService {
|
|
|
1736
1983
|
let sql = `select s.thread_id, s.summary_kind, s.summary_text
|
|
1737
1984
|
from document_summaries s
|
|
1738
1985
|
join threads t on t.id = s.thread_id
|
|
1739
|
-
where t.repo_id = ? and t.state = 'open' and s.model = ?`;
|
|
1986
|
+
where t.repo_id = ? and t.state = 'open' and t.closed_at_local is null and s.model = ?`;
|
|
1740
1987
|
const args = [repoId, this.config.summaryModel];
|
|
1741
1988
|
if (threadNumber) {
|
|
1742
1989
|
sql += ' and t.number = ?';
|