@ghcrawl/api-core 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/server.d.ts.map +1 -1
- package/dist/api/server.js +30 -3
- package/dist/api/server.js.map +1 -1
- package/dist/config.d.ts +3 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +7 -2
- package/dist/config.js.map +1 -1
- package/dist/db/migrate.d.ts.map +1 -1
- package/dist/db/migrate.js +18 -0
- package/dist/db/migrate.js.map +1 -1
- package/dist/service.d.ts +34 -1
- package/dist/service.d.ts.map +1 -1
- package/dist/service.js +320 -18
- package/dist/service.js.map +1 -1
- package/package.json +4 -4
package/dist/service.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import http from 'node:http';
|
|
2
2
|
import crypto from 'node:crypto';
|
|
3
3
|
import { IterableMapper } from '@shutterstock/p-map-iterable';
|
|
4
|
-
import { actionResponseSchema, clusterDetailResponseSchema, clusterResultSchema, clusterSummariesResponseSchema, clustersResponseSchema, embedResultSchema, healthResponseSchema, neighborsResponseSchema, refreshResponseSchema, repositoriesResponseSchema, searchResponseSchema, syncResultSchema, threadsResponseSchema, } from '@ghcrawl/api-contract';
|
|
4
|
+
import { actionResponseSchema, authorThreadsResponseSchema, closeResponseSchema, clusterDetailResponseSchema, clusterResultSchema, clusterSummariesResponseSchema, clustersResponseSchema, embedResultSchema, healthResponseSchema, neighborsResponseSchema, refreshResponseSchema, repositoriesResponseSchema, searchResponseSchema, syncResultSchema, threadsResponseSchema, } from '@ghcrawl/api-contract';
|
|
5
5
|
import { buildClusters } from './cluster/build.js';
|
|
6
6
|
import { ensureRuntimeDirs, isLikelyGitHubToken, isLikelyOpenAiApiKey, loadConfig, requireGithubToken, requireOpenAiKey, } from './config.js';
|
|
7
7
|
import { migrate } from './db/migrate.js';
|
|
@@ -27,6 +27,9 @@ function parseIso(value) {
|
|
|
27
27
|
const parsed = Date.parse(value);
|
|
28
28
|
return Number.isNaN(parsed) ? null : parsed;
|
|
29
29
|
}
|
|
30
|
+
function isEffectivelyClosed(row) {
|
|
31
|
+
return row.state !== 'open' || row.closed_at_local !== null;
|
|
32
|
+
}
|
|
30
33
|
function isMissingGitHubResourceError(error) {
|
|
31
34
|
const status = typeof error?.status === 'number' ? Number(error.status) : null;
|
|
32
35
|
if (status === 404 || status === 410) {
|
|
@@ -150,6 +153,10 @@ function threadToDto(row, clusterId) {
|
|
|
150
153
|
number: row.number,
|
|
151
154
|
kind: row.kind,
|
|
152
155
|
state: row.state,
|
|
156
|
+
isClosed: isEffectivelyClosed(row),
|
|
157
|
+
closedAtGh: row.closed_at_gh ?? null,
|
|
158
|
+
closedAtLocal: row.closed_at_local ?? null,
|
|
159
|
+
closeReasonLocal: row.close_reason_local ?? null,
|
|
153
160
|
title: row.title,
|
|
154
161
|
body: row.body,
|
|
155
162
|
authorLogin: row.author_login,
|
|
@@ -260,8 +267,11 @@ export class GHCrawlService {
|
|
|
260
267
|
.all(repository.id, repository.id);
|
|
261
268
|
for (const row of clusterRows)
|
|
262
269
|
clusterIds.set(row.thread_id, row.cluster_id);
|
|
263
|
-
let sql =
|
|
270
|
+
let sql = 'select * from threads where repo_id = ?';
|
|
264
271
|
const args = [repository.id];
|
|
272
|
+
if (!params.includeClosed) {
|
|
273
|
+
sql += " and state = 'open' and closed_at_local is null";
|
|
274
|
+
}
|
|
265
275
|
if (params.kind) {
|
|
266
276
|
sql += ' and kind = ?';
|
|
267
277
|
args.push(params.kind);
|
|
@@ -291,6 +301,145 @@ export class GHCrawlService {
|
|
|
291
301
|
threads: orderedRows.map((row) => threadToDto(row, clusterIds.get(row.id) ?? null)),
|
|
292
302
|
});
|
|
293
303
|
}
|
|
304
|
+
listAuthorThreads(params) {
|
|
305
|
+
const repository = this.requireRepository(params.owner, params.repo);
|
|
306
|
+
const normalizedLogin = params.login.trim();
|
|
307
|
+
if (!normalizedLogin) {
|
|
308
|
+
return authorThreadsResponseSchema.parse({
|
|
309
|
+
repository,
|
|
310
|
+
authorLogin: '',
|
|
311
|
+
threads: [],
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
const clusterIds = new Map();
|
|
315
|
+
const clusterRows = this.db
|
|
316
|
+
.prepare(`select cm.thread_id, cm.cluster_id
|
|
317
|
+
from cluster_members cm
|
|
318
|
+
join clusters c on c.id = cm.cluster_id
|
|
319
|
+
where c.repo_id = ? and c.cluster_run_id = (
|
|
320
|
+
select id from cluster_runs where repo_id = ? and status = 'completed' order by id desc limit 1
|
|
321
|
+
)`)
|
|
322
|
+
.all(repository.id, repository.id);
|
|
323
|
+
for (const row of clusterRows)
|
|
324
|
+
clusterIds.set(row.thread_id, row.cluster_id);
|
|
325
|
+
const rows = this.db
|
|
326
|
+
.prepare(`select *
|
|
327
|
+
from threads
|
|
328
|
+
where repo_id = ? and lower(author_login) = lower(?)
|
|
329
|
+
${params.includeClosed ? '' : "and state = 'open' and closed_at_local is null"}
|
|
330
|
+
order by updated_at_gh desc, number desc`)
|
|
331
|
+
.all(repository.id, normalizedLogin);
|
|
332
|
+
const latestRun = this.getLatestClusterRun(repository.id);
|
|
333
|
+
const strongestByThread = new Map();
|
|
334
|
+
if (latestRun && rows.length > 1) {
|
|
335
|
+
const edges = this.db
|
|
336
|
+
.prepare(`select
|
|
337
|
+
se.left_thread_id,
|
|
338
|
+
se.right_thread_id,
|
|
339
|
+
se.score,
|
|
340
|
+
t1.number as left_number,
|
|
341
|
+
t1.kind as left_kind,
|
|
342
|
+
t1.title as left_title,
|
|
343
|
+
t2.number as right_number,
|
|
344
|
+
t2.kind as right_kind,
|
|
345
|
+
t2.title as right_title
|
|
346
|
+
from similarity_edges se
|
|
347
|
+
join threads t1 on t1.id = se.left_thread_id
|
|
348
|
+
join threads t2 on t2.id = se.right_thread_id
|
|
349
|
+
where se.repo_id = ?
|
|
350
|
+
and se.cluster_run_id = ?
|
|
351
|
+
and lower(t1.author_login) = lower(?)
|
|
352
|
+
and lower(t2.author_login) = lower(?)
|
|
353
|
+
${params.includeClosed ? '' : "and t1.state = 'open' and t1.closed_at_local is null and t2.state = 'open' and t2.closed_at_local is null"}`)
|
|
354
|
+
.all(repository.id, latestRun.id, normalizedLogin, normalizedLogin);
|
|
355
|
+
const updateStrongest = (sourceThreadId, match) => {
|
|
356
|
+
const previous = strongestByThread.get(sourceThreadId);
|
|
357
|
+
if (!previous || match.score > previous.score) {
|
|
358
|
+
strongestByThread.set(sourceThreadId, match);
|
|
359
|
+
}
|
|
360
|
+
};
|
|
361
|
+
for (const edge of edges) {
|
|
362
|
+
updateStrongest(edge.left_thread_id, {
|
|
363
|
+
threadId: edge.right_thread_id,
|
|
364
|
+
number: edge.right_number,
|
|
365
|
+
kind: edge.right_kind,
|
|
366
|
+
title: edge.right_title,
|
|
367
|
+
score: edge.score,
|
|
368
|
+
});
|
|
369
|
+
updateStrongest(edge.right_thread_id, {
|
|
370
|
+
threadId: edge.left_thread_id,
|
|
371
|
+
number: edge.left_number,
|
|
372
|
+
kind: edge.left_kind,
|
|
373
|
+
title: edge.left_title,
|
|
374
|
+
score: edge.score,
|
|
375
|
+
});
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
return authorThreadsResponseSchema.parse({
|
|
379
|
+
repository,
|
|
380
|
+
authorLogin: normalizedLogin,
|
|
381
|
+
threads: rows.map((row) => ({
|
|
382
|
+
thread: threadToDto(row, clusterIds.get(row.id) ?? null),
|
|
383
|
+
strongestSameAuthorMatch: strongestByThread.get(row.id) ?? null,
|
|
384
|
+
})),
|
|
385
|
+
});
|
|
386
|
+
}
|
|
387
|
+
closeThreadLocally(params) {
|
|
388
|
+
const repository = this.requireRepository(params.owner, params.repo);
|
|
389
|
+
const row = this.db
|
|
390
|
+
.prepare('select * from threads where repo_id = ? and number = ? limit 1')
|
|
391
|
+
.get(repository.id, params.threadNumber);
|
|
392
|
+
if (!row) {
|
|
393
|
+
throw new Error(`Thread #${params.threadNumber} was not found for ${repository.fullName}.`);
|
|
394
|
+
}
|
|
395
|
+
const closedAt = nowIso();
|
|
396
|
+
this.db
|
|
397
|
+
.prepare(`update threads
|
|
398
|
+
set closed_at_local = ?,
|
|
399
|
+
close_reason_local = 'manual',
|
|
400
|
+
updated_at = ?
|
|
401
|
+
where id = ?`)
|
|
402
|
+
.run(closedAt, closedAt, row.id);
|
|
403
|
+
this.parsedEmbeddingCache.delete(repository.id);
|
|
404
|
+
const clusterIds = this.getLatestRunClusterIdsForThread(repository.id, row.id);
|
|
405
|
+
const clusterClosed = this.reconcileClusterCloseState(repository.id, clusterIds) > 0;
|
|
406
|
+
const updated = this.db.prepare('select * from threads where id = ? limit 1').get(row.id);
|
|
407
|
+
return closeResponseSchema.parse({
|
|
408
|
+
ok: true,
|
|
409
|
+
repository,
|
|
410
|
+
thread: threadToDto(updated),
|
|
411
|
+
clusterId: clusterIds[0] ?? null,
|
|
412
|
+
clusterClosed,
|
|
413
|
+
message: `Marked ${updated.kind} #${updated.number} closed locally.`,
|
|
414
|
+
});
|
|
415
|
+
}
|
|
416
|
+
closeClusterLocally(params) {
|
|
417
|
+
const repository = this.requireRepository(params.owner, params.repo);
|
|
418
|
+
const latestRun = this.getLatestClusterRun(repository.id);
|
|
419
|
+
if (!latestRun) {
|
|
420
|
+
throw new Error(`No completed cluster run found for ${repository.fullName}.`);
|
|
421
|
+
}
|
|
422
|
+
const row = this.db
|
|
423
|
+
.prepare('select id from clusters where repo_id = ? and cluster_run_id = ? and id = ? limit 1')
|
|
424
|
+
.get(repository.id, latestRun.id, params.clusterId);
|
|
425
|
+
if (!row) {
|
|
426
|
+
throw new Error(`Cluster ${params.clusterId} was not found for ${repository.fullName}.`);
|
|
427
|
+
}
|
|
428
|
+
const closedAt = nowIso();
|
|
429
|
+
this.db
|
|
430
|
+
.prepare(`update clusters
|
|
431
|
+
set closed_at_local = ?,
|
|
432
|
+
close_reason_local = 'manual'
|
|
433
|
+
where id = ?`)
|
|
434
|
+
.run(closedAt, row.id);
|
|
435
|
+
return closeResponseSchema.parse({
|
|
436
|
+
ok: true,
|
|
437
|
+
repository,
|
|
438
|
+
clusterId: row.id,
|
|
439
|
+
clusterClosed: true,
|
|
440
|
+
message: `Marked cluster ${row.id} closed locally.`,
|
|
441
|
+
});
|
|
442
|
+
}
|
|
294
443
|
async syncRepository(params) {
|
|
295
444
|
const crawlStartedAt = params.startedAt ?? nowIso();
|
|
296
445
|
const includeComments = params.includeComments ?? false;
|
|
@@ -385,6 +534,10 @@ export class GHCrawlService {
|
|
|
385
534
|
})
|
|
386
535
|
: 0;
|
|
387
536
|
const threadsClosed = threadsClosedFromClosedSweep + threadsClosedFromDirectReconcile;
|
|
537
|
+
this.parsedEmbeddingCache.delete(repoId);
|
|
538
|
+
if (threadsClosed > 0) {
|
|
539
|
+
this.reconcileClusterCloseState(repoId);
|
|
540
|
+
}
|
|
388
541
|
const finishedAt = nowIso();
|
|
389
542
|
const reconciledOpenCloseAt = shouldSweepClosedOverlap || shouldReconcileMissingOpenThreads ? finishedAt : null;
|
|
390
543
|
const nextSyncCursor = {
|
|
@@ -596,7 +749,7 @@ export class GHCrawlService {
|
|
|
596
749
|
from documents_fts
|
|
597
750
|
join documents d on d.id = documents_fts.rowid
|
|
598
751
|
join threads t on t.id = d.thread_id
|
|
599
|
-
|
|
752
|
+
where t.repo_id = ? and t.state = 'open' and t.closed_at_local is null and documents_fts match ?
|
|
600
753
|
order by rank
|
|
601
754
|
limit ?`)
|
|
602
755
|
.all(repository.id, params.query, limit * 2);
|
|
@@ -618,7 +771,7 @@ export class GHCrawlService {
|
|
|
618
771
|
const threadRows = candidateIds.size
|
|
619
772
|
? this.db
|
|
620
773
|
.prepare(`select * from threads
|
|
621
|
-
where repo_id = ? and state = 'open' and id in (${[...candidateIds].map(() => '?').join(',')})
|
|
774
|
+
where repo_id = ? and state = 'open' and closed_at_local is null and id in (${[...candidateIds].map(() => '?').join(',')})
|
|
622
775
|
order by updated_at_gh desc, number desc`)
|
|
623
776
|
.all(repository.id, ...candidateIds)
|
|
624
777
|
: [];
|
|
@@ -730,7 +883,8 @@ export class GHCrawlService {
|
|
|
730
883
|
}
|
|
731
884
|
const rows = this.db
|
|
732
885
|
.prepare(`select c.id, c.repo_id, c.representative_thread_id, c.member_count,
|
|
733
|
-
|
|
886
|
+
c.closed_at_local, c.close_reason_local,
|
|
887
|
+
cm.thread_id, cm.score_to_representative, t.number, t.kind, t.title, t.state, t.closed_at_local as thread_closed_at_local
|
|
734
888
|
from clusters c
|
|
735
889
|
left join cluster_members cm on cm.cluster_id = c.id
|
|
736
890
|
left join threads t on t.id = cm.thread_id
|
|
@@ -742,6 +896,9 @@ export class GHCrawlService {
|
|
|
742
896
|
const cluster = clusters.get(row.id) ?? {
|
|
743
897
|
id: row.id,
|
|
744
898
|
repoId: row.repo_id,
|
|
899
|
+
isClosed: row.close_reason_local !== null,
|
|
900
|
+
closedAtLocal: row.closed_at_local,
|
|
901
|
+
closeReasonLocal: row.close_reason_local,
|
|
745
902
|
representativeThreadId: row.representative_thread_id,
|
|
746
903
|
memberCount: row.member_count,
|
|
747
904
|
members: [],
|
|
@@ -751,15 +908,20 @@ export class GHCrawlService {
|
|
|
751
908
|
threadId: row.thread_id,
|
|
752
909
|
number: row.number,
|
|
753
910
|
kind: row.kind,
|
|
911
|
+
isClosed: row.state !== null && isEffectivelyClosed({ state: row.state, closed_at_local: row.thread_closed_at_local }),
|
|
754
912
|
title: row.title,
|
|
755
913
|
scoreToRepresentative: row.score_to_representative,
|
|
756
914
|
});
|
|
757
915
|
}
|
|
758
916
|
clusters.set(row.id, cluster);
|
|
759
917
|
}
|
|
918
|
+
const clusterValues = Array.from(clusters.values()).map((cluster) => ({
|
|
919
|
+
...cluster,
|
|
920
|
+
isClosed: cluster.isClosed || (cluster.memberCount > 0 && cluster.members.every((member) => member.isClosed)),
|
|
921
|
+
}));
|
|
760
922
|
return clustersResponseSchema.parse({
|
|
761
923
|
repository,
|
|
762
|
-
clusters:
|
|
924
|
+
clusters: clusterValues.filter((cluster) => (params.includeClosed ? true : !cluster.isClosed)),
|
|
763
925
|
});
|
|
764
926
|
}
|
|
765
927
|
async refreshRepository(params) {
|
|
@@ -814,6 +976,7 @@ export class GHCrawlService {
|
|
|
814
976
|
minSize: params.minSize,
|
|
815
977
|
sort: params.sort,
|
|
816
978
|
search: params.search,
|
|
979
|
+
includeClosedClusters: params.includeClosed === true,
|
|
817
980
|
});
|
|
818
981
|
const clusters = params.limit ? snapshot.clusters.slice(0, params.limit) : snapshot.clusters;
|
|
819
982
|
return clusterSummariesResponseSchema.parse({
|
|
@@ -822,6 +985,9 @@ export class GHCrawlService {
|
|
|
822
985
|
clusters: clusters.map((cluster) => ({
|
|
823
986
|
clusterId: cluster.clusterId,
|
|
824
987
|
displayTitle: cluster.displayTitle,
|
|
988
|
+
isClosed: cluster.isClosed,
|
|
989
|
+
closedAtLocal: cluster.closedAtLocal,
|
|
990
|
+
closeReasonLocal: cluster.closeReasonLocal,
|
|
825
991
|
totalCount: cluster.totalCount,
|
|
826
992
|
issueCount: cluster.issueCount,
|
|
827
993
|
pullRequestCount: cluster.pullRequestCount,
|
|
@@ -837,6 +1003,7 @@ export class GHCrawlService {
|
|
|
837
1003
|
owner: params.owner,
|
|
838
1004
|
repo: params.repo,
|
|
839
1005
|
minSize: 0,
|
|
1006
|
+
includeClosedClusters: params.includeClosed === true,
|
|
840
1007
|
});
|
|
841
1008
|
const cluster = snapshot.clusters.find((item) => item.clusterId === params.clusterId);
|
|
842
1009
|
if (!cluster) {
|
|
@@ -846,6 +1013,7 @@ export class GHCrawlService {
|
|
|
846
1013
|
owner: params.owner,
|
|
847
1014
|
repo: params.repo,
|
|
848
1015
|
clusterId: params.clusterId,
|
|
1016
|
+
clusterRunId: snapshot.clusterRunId ?? undefined,
|
|
849
1017
|
});
|
|
850
1018
|
const members = detail.members.slice(0, params.memberLimit ?? detail.members.length).map((member) => {
|
|
851
1019
|
const threadDetail = this.getTuiThreadDetail({
|
|
@@ -869,6 +1037,9 @@ export class GHCrawlService {
|
|
|
869
1037
|
cluster: {
|
|
870
1038
|
clusterId: cluster.clusterId,
|
|
871
1039
|
displayTitle: cluster.displayTitle,
|
|
1040
|
+
isClosed: cluster.isClosed,
|
|
1041
|
+
closedAtLocal: cluster.closedAtLocal,
|
|
1042
|
+
closeReasonLocal: cluster.closeReasonLocal,
|
|
872
1043
|
totalCount: cluster.totalCount,
|
|
873
1044
|
issueCount: cluster.issueCount,
|
|
874
1045
|
pullRequestCount: cluster.pullRequestCount,
|
|
@@ -885,9 +1056,11 @@ export class GHCrawlService {
|
|
|
885
1056
|
const stats = this.getTuiRepoStats(repository.id);
|
|
886
1057
|
const latestRun = this.getLatestClusterRun(repository.id);
|
|
887
1058
|
if (!latestRun) {
|
|
888
|
-
return { repository, stats, clusters: [] };
|
|
1059
|
+
return { repository, stats, clusterRunId: null, clusters: [] };
|
|
889
1060
|
}
|
|
1061
|
+
const includeClosedClusters = params.includeClosedClusters ?? true;
|
|
890
1062
|
const clusters = this.listRawTuiClusters(repository.id, latestRun.id)
|
|
1063
|
+
.filter((cluster) => (includeClosedClusters ? true : !cluster.isClosed))
|
|
891
1064
|
.filter((cluster) => cluster.totalCount >= (params.minSize ?? 10))
|
|
892
1065
|
.filter((cluster) => {
|
|
893
1066
|
const search = params.search?.trim().toLowerCase();
|
|
@@ -899,21 +1072,23 @@ export class GHCrawlService {
|
|
|
899
1072
|
return {
|
|
900
1073
|
repository,
|
|
901
1074
|
stats,
|
|
1075
|
+
clusterRunId: latestRun.id,
|
|
902
1076
|
clusters,
|
|
903
1077
|
};
|
|
904
1078
|
}
|
|
905
1079
|
getTuiClusterDetail(params) {
|
|
906
1080
|
const repository = this.requireRepository(params.owner, params.repo);
|
|
907
|
-
const
|
|
908
|
-
|
|
1081
|
+
const clusterRunId = params.clusterRunId ??
|
|
1082
|
+
(this.getLatestClusterRun(repository.id)?.id ?? null);
|
|
1083
|
+
if (!clusterRunId) {
|
|
909
1084
|
throw new Error(`No completed cluster run found for ${repository.fullName}. Run cluster first.`);
|
|
910
1085
|
}
|
|
911
|
-
const summary = this.
|
|
1086
|
+
const summary = this.getRawTuiClusterSummary(repository.id, clusterRunId, params.clusterId);
|
|
912
1087
|
if (!summary) {
|
|
913
1088
|
throw new Error(`Cluster ${params.clusterId} was not found for ${repository.fullName}.`);
|
|
914
1089
|
}
|
|
915
1090
|
const rows = this.db
|
|
916
|
-
.prepare(`select t.id, t.number, t.kind, t.title, t.updated_at_gh, t.html_url, t.labels_json, cm.score_to_representative
|
|
1091
|
+
.prepare(`select t.id, t.number, t.kind, t.state, t.closed_at_local, t.title, t.updated_at_gh, t.html_url, t.labels_json, cm.score_to_representative
|
|
917
1092
|
from cluster_members cm
|
|
918
1093
|
join threads t on t.id = cm.thread_id
|
|
919
1094
|
where cm.cluster_id = ?
|
|
@@ -925,6 +1100,9 @@ export class GHCrawlService {
|
|
|
925
1100
|
return {
|
|
926
1101
|
clusterId: summary.clusterId,
|
|
927
1102
|
displayTitle: summary.displayTitle,
|
|
1103
|
+
isClosed: summary.isClosed,
|
|
1104
|
+
closedAtLocal: summary.closedAtLocal,
|
|
1105
|
+
closeReasonLocal: summary.closeReasonLocal,
|
|
928
1106
|
totalCount: summary.totalCount,
|
|
929
1107
|
issueCount: summary.issueCount,
|
|
930
1108
|
pullRequestCount: summary.pullRequestCount,
|
|
@@ -936,6 +1114,7 @@ export class GHCrawlService {
|
|
|
936
1114
|
id: row.id,
|
|
937
1115
|
number: row.number,
|
|
938
1116
|
kind: row.kind,
|
|
1117
|
+
isClosed: isEffectivelyClosed(row),
|
|
939
1118
|
title: row.title,
|
|
940
1119
|
updatedAtGh: row.updated_at_gh,
|
|
941
1120
|
htmlUrl: row.html_url,
|
|
@@ -948,11 +1127,11 @@ export class GHCrawlService {
|
|
|
948
1127
|
const repository = this.requireRepository(params.owner, params.repo);
|
|
949
1128
|
const row = params.threadId
|
|
950
1129
|
? (this.db
|
|
951
|
-
.prepare('select * from threads where repo_id = ? and id = ?
|
|
1130
|
+
.prepare('select * from threads where repo_id = ? and id = ? limit 1')
|
|
952
1131
|
.get(repository.id, params.threadId) ?? null)
|
|
953
1132
|
: params.threadNumber
|
|
954
1133
|
? (this.db
|
|
955
|
-
.prepare('select * from threads where repo_id = ? and number = ?
|
|
1134
|
+
.prepare('select * from threads where repo_id = ? and number = ? limit 1')
|
|
956
1135
|
.get(repository.id, params.threadNumber) ?? null)
|
|
957
1136
|
: null;
|
|
958
1137
|
if (!row) {
|
|
@@ -1112,7 +1291,7 @@ export class GHCrawlService {
|
|
|
1112
1291
|
const counts = this.db
|
|
1113
1292
|
.prepare(`select kind, count(*) as count
|
|
1114
1293
|
from threads
|
|
1115
|
-
where repo_id = ? and state = 'open'
|
|
1294
|
+
where repo_id = ? and state = 'open' and closed_at_local is null
|
|
1116
1295
|
group by kind`)
|
|
1117
1296
|
.all(repoId);
|
|
1118
1297
|
const latestRun = this.getLatestClusterRun(repoId);
|
|
@@ -1140,11 +1319,73 @@ export class GHCrawlService {
|
|
|
1140
1319
|
.prepare("select id, finished_at from cluster_runs where repo_id = ? and status = 'completed' order by id desc limit 1")
|
|
1141
1320
|
.get(repoId) ?? null);
|
|
1142
1321
|
}
|
|
1322
|
+
getLatestRunClusterIdsForThread(repoId, threadId) {
|
|
1323
|
+
const latestRun = this.getLatestClusterRun(repoId);
|
|
1324
|
+
if (!latestRun) {
|
|
1325
|
+
return [];
|
|
1326
|
+
}
|
|
1327
|
+
return this.db
|
|
1328
|
+
.prepare(`select cm.cluster_id
|
|
1329
|
+
from cluster_members cm
|
|
1330
|
+
join clusters c on c.id = cm.cluster_id
|
|
1331
|
+
where c.repo_id = ? and c.cluster_run_id = ? and cm.thread_id = ?
|
|
1332
|
+
order by cm.cluster_id asc`)
|
|
1333
|
+
.all(repoId, latestRun.id, threadId).map((row) => row.cluster_id);
|
|
1334
|
+
}
|
|
1335
|
+
reconcileClusterCloseState(repoId, clusterIds) {
|
|
1336
|
+
const latestRun = this.getLatestClusterRun(repoId);
|
|
1337
|
+
if (!latestRun) {
|
|
1338
|
+
return 0;
|
|
1339
|
+
}
|
|
1340
|
+
const resolvedClusterIds = clusterIds && clusterIds.length > 0
|
|
1341
|
+
? Array.from(new Set(clusterIds))
|
|
1342
|
+
: this.db
|
|
1343
|
+
.prepare('select id from clusters where repo_id = ? and cluster_run_id = ? order by id asc')
|
|
1344
|
+
.all(repoId, latestRun.id).map((row) => row.id);
|
|
1345
|
+
if (resolvedClusterIds.length === 0) {
|
|
1346
|
+
return 0;
|
|
1347
|
+
}
|
|
1348
|
+
const summarize = this.db.prepare(`select
|
|
1349
|
+
c.id,
|
|
1350
|
+
c.close_reason_local,
|
|
1351
|
+
count(*) as member_count,
|
|
1352
|
+
sum(case when t.state != 'open' or t.closed_at_local is not null then 1 else 0 end) as closed_member_count
|
|
1353
|
+
from clusters c
|
|
1354
|
+
join cluster_members cm on cm.cluster_id = c.id
|
|
1355
|
+
join threads t on t.id = cm.thread_id
|
|
1356
|
+
where c.id = ?
|
|
1357
|
+
group by c.id, c.close_reason_local`);
|
|
1358
|
+
const markClosed = this.db.prepare(`update clusters
|
|
1359
|
+
set closed_at_local = coalesce(closed_at_local, ?),
|
|
1360
|
+
close_reason_local = 'all_members_closed'
|
|
1361
|
+
where id = ?`);
|
|
1362
|
+
const clearClosed = this.db.prepare(`update clusters
|
|
1363
|
+
set closed_at_local = null,
|
|
1364
|
+
close_reason_local = null
|
|
1365
|
+
where id = ? and close_reason_local = 'all_members_closed'`);
|
|
1366
|
+
let changed = 0;
|
|
1367
|
+
for (const clusterId of resolvedClusterIds) {
|
|
1368
|
+
const row = summarize.get(clusterId);
|
|
1369
|
+
if (!row || row.close_reason_local === 'manual') {
|
|
1370
|
+
continue;
|
|
1371
|
+
}
|
|
1372
|
+
if (row.member_count > 0 && row.closed_member_count >= row.member_count) {
|
|
1373
|
+
const result = markClosed.run(nowIso(), clusterId);
|
|
1374
|
+
changed += result.changes;
|
|
1375
|
+
continue;
|
|
1376
|
+
}
|
|
1377
|
+
const cleared = clearClosed.run(clusterId);
|
|
1378
|
+
changed += cleared.changes;
|
|
1379
|
+
}
|
|
1380
|
+
return changed;
|
|
1381
|
+
}
|
|
1143
1382
|
listRawTuiClusters(repoId, clusterRunId) {
|
|
1144
1383
|
const rows = this.db
|
|
1145
1384
|
.prepare(`select
|
|
1146
1385
|
c.id as cluster_id,
|
|
1147
1386
|
c.member_count,
|
|
1387
|
+
c.closed_at_local,
|
|
1388
|
+
c.close_reason_local,
|
|
1148
1389
|
c.representative_thread_id,
|
|
1149
1390
|
rt.number as representative_number,
|
|
1150
1391
|
rt.kind as representative_kind,
|
|
@@ -1152,6 +1393,7 @@ export class GHCrawlService {
|
|
|
1152
1393
|
max(coalesce(t.updated_at_gh, t.updated_at)) as latest_updated_at,
|
|
1153
1394
|
sum(case when t.kind = 'issue' then 1 else 0 end) as issue_count,
|
|
1154
1395
|
sum(case when t.kind = 'pull_request' then 1 else 0 end) as pull_request_count,
|
|
1396
|
+
sum(case when t.state != 'open' or t.closed_at_local is not null then 1 else 0 end) as closed_member_count,
|
|
1155
1397
|
group_concat(lower(coalesce(t.title, '')), ' ') as search_text
|
|
1156
1398
|
from clusters c
|
|
1157
1399
|
left join threads rt on rt.id = c.representative_thread_id
|
|
@@ -1161,6 +1403,8 @@ export class GHCrawlService {
|
|
|
1161
1403
|
group by
|
|
1162
1404
|
c.id,
|
|
1163
1405
|
c.member_count,
|
|
1406
|
+
c.closed_at_local,
|
|
1407
|
+
c.close_reason_local,
|
|
1164
1408
|
c.representative_thread_id,
|
|
1165
1409
|
rt.number,
|
|
1166
1410
|
rt.kind,
|
|
@@ -1169,6 +1413,9 @@ export class GHCrawlService {
|
|
|
1169
1413
|
return rows.map((row) => ({
|
|
1170
1414
|
clusterId: row.cluster_id,
|
|
1171
1415
|
displayTitle: row.representative_title ?? `Cluster ${row.cluster_id}`,
|
|
1416
|
+
isClosed: row.close_reason_local !== null || row.closed_member_count >= row.member_count,
|
|
1417
|
+
closedAtLocal: row.closed_at_local,
|
|
1418
|
+
closeReasonLocal: row.close_reason_local,
|
|
1172
1419
|
totalCount: row.member_count,
|
|
1173
1420
|
issueCount: row.issue_count,
|
|
1174
1421
|
pullRequestCount: row.pull_request_count,
|
|
@@ -1179,6 +1426,56 @@ export class GHCrawlService {
|
|
|
1179
1426
|
searchText: `${(row.representative_title ?? '').toLowerCase()} ${row.search_text ?? ''}`.trim(),
|
|
1180
1427
|
}));
|
|
1181
1428
|
}
|
|
1429
|
+
getRawTuiClusterSummary(repoId, clusterRunId, clusterId) {
|
|
1430
|
+
const row = this.db
|
|
1431
|
+
.prepare(`select
|
|
1432
|
+
c.id as cluster_id,
|
|
1433
|
+
c.member_count,
|
|
1434
|
+
c.closed_at_local,
|
|
1435
|
+
c.close_reason_local,
|
|
1436
|
+
c.representative_thread_id,
|
|
1437
|
+
rt.number as representative_number,
|
|
1438
|
+
rt.kind as representative_kind,
|
|
1439
|
+
rt.title as representative_title,
|
|
1440
|
+
max(coalesce(t.updated_at_gh, t.updated_at)) as latest_updated_at,
|
|
1441
|
+
sum(case when t.kind = 'issue' then 1 else 0 end) as issue_count,
|
|
1442
|
+
sum(case when t.kind = 'pull_request' then 1 else 0 end) as pull_request_count,
|
|
1443
|
+
sum(case when t.state != 'open' or t.closed_at_local is not null then 1 else 0 end) as closed_member_count,
|
|
1444
|
+
group_concat(lower(coalesce(t.title, '')), ' ') as search_text
|
|
1445
|
+
from clusters c
|
|
1446
|
+
left join threads rt on rt.id = c.representative_thread_id
|
|
1447
|
+
join cluster_members cm on cm.cluster_id = c.id
|
|
1448
|
+
join threads t on t.id = cm.thread_id
|
|
1449
|
+
where c.repo_id = ? and c.cluster_run_id = ? and c.id = ?
|
|
1450
|
+
group by
|
|
1451
|
+
c.id,
|
|
1452
|
+
c.member_count,
|
|
1453
|
+
c.closed_at_local,
|
|
1454
|
+
c.close_reason_local,
|
|
1455
|
+
c.representative_thread_id,
|
|
1456
|
+
rt.number,
|
|
1457
|
+
rt.kind,
|
|
1458
|
+
rt.title`)
|
|
1459
|
+
.get(repoId, clusterRunId, clusterId);
|
|
1460
|
+
if (!row) {
|
|
1461
|
+
return null;
|
|
1462
|
+
}
|
|
1463
|
+
return {
|
|
1464
|
+
clusterId: row.cluster_id,
|
|
1465
|
+
displayTitle: row.representative_title ?? `Cluster ${row.cluster_id}`,
|
|
1466
|
+
isClosed: row.close_reason_local !== null || row.closed_member_count >= row.member_count,
|
|
1467
|
+
closedAtLocal: row.closed_at_local,
|
|
1468
|
+
closeReasonLocal: row.close_reason_local,
|
|
1469
|
+
totalCount: row.member_count,
|
|
1470
|
+
issueCount: row.issue_count,
|
|
1471
|
+
pullRequestCount: row.pull_request_count,
|
|
1472
|
+
latestUpdatedAt: row.latest_updated_at,
|
|
1473
|
+
representativeThreadId: row.representative_thread_id,
|
|
1474
|
+
representativeNumber: row.representative_number,
|
|
1475
|
+
representativeKind: row.representative_kind,
|
|
1476
|
+
searchText: `${(row.representative_title ?? '').toLowerCase()} ${row.search_text ?? ''}`.trim(),
|
|
1477
|
+
};
|
|
1478
|
+
}
|
|
1182
1479
|
compareTuiClusterSummary(left, right, sort) {
|
|
1183
1480
|
const leftTime = left.latestUpdatedAt ? Date.parse(left.latestUpdatedAt) : 0;
|
|
1184
1481
|
const rightTime = right.latestUpdatedAt ? Date.parse(right.latestUpdatedAt) : 0;
|
|
@@ -1306,6 +1603,7 @@ export class GHCrawlService {
|
|
|
1306
1603
|
from threads
|
|
1307
1604
|
where repo_id = ?
|
|
1308
1605
|
and state = 'open'
|
|
1606
|
+
and closed_at_local is null
|
|
1309
1607
|
and (last_pulled_at is null or last_pulled_at < ?)
|
|
1310
1608
|
order by number asc`)
|
|
1311
1609
|
.all(params.repoId, params.crawlStartedAt);
|
|
@@ -1350,6 +1648,7 @@ export class GHCrawlService {
|
|
|
1350
1648
|
from threads
|
|
1351
1649
|
where repo_id = ?
|
|
1352
1650
|
and state = 'open'
|
|
1651
|
+
and closed_at_local is null
|
|
1353
1652
|
and (last_pulled_at is null or last_pulled_at < ?)
|
|
1354
1653
|
order by number asc`)
|
|
1355
1654
|
.all(params.repoId, params.crawlStartedAt);
|
|
@@ -1637,11 +1936,12 @@ export class GHCrawlService {
|
|
|
1637
1936
|
}
|
|
1638
1937
|
loadStoredEmbeddings(repoId) {
|
|
1639
1938
|
return this.db
|
|
1640
|
-
.prepare(`select t.id, t.repo_id, t.number, t.kind, t.state, t.
|
|
1939
|
+
.prepare(`select t.id, t.repo_id, t.number, t.kind, t.state, t.closed_at_gh, t.closed_at_local, t.close_reason_local,
|
|
1940
|
+
t.title, t.body, t.author_login, t.html_url, t.labels_json,
|
|
1641
1941
|
t.updated_at_gh, t.first_pulled_at, t.last_pulled_at, e.source_kind, e.embedding_json
|
|
1642
1942
|
from threads t
|
|
1643
1943
|
join document_embeddings e on e.thread_id = t.id
|
|
1644
|
-
where t.repo_id = ? and t.state = 'open' and e.model = ?
|
|
1944
|
+
where t.repo_id = ? and t.state = 'open' and t.closed_at_local is null and e.model = ?
|
|
1645
1945
|
order by t.number asc, e.source_kind asc`)
|
|
1646
1946
|
.all(repoId, this.config.embedModel);
|
|
1647
1947
|
}
|
|
@@ -1688,7 +1988,9 @@ export class GHCrawlService {
|
|
|
1688
1988
|
and se.cluster_run_id = ?
|
|
1689
1989
|
and (se.left_thread_id = ? or se.right_thread_id = ?)
|
|
1690
1990
|
and t1.state = 'open'
|
|
1991
|
+
and t1.closed_at_local is null
|
|
1691
1992
|
and t2.state = 'open'
|
|
1993
|
+
and t2.closed_at_local is null
|
|
1692
1994
|
order by se.score desc
|
|
1693
1995
|
limit ?`)
|
|
1694
1996
|
.all(threadId, threadId, threadId, threadId, repoId, latestRun.id, threadId, threadId, limit);
|
|
@@ -1703,7 +2005,7 @@ export class GHCrawlService {
|
|
|
1703
2005
|
getEmbeddingWorkset(repoId, threadNumber) {
|
|
1704
2006
|
let sql = `select t.id, t.number, t.title, t.body
|
|
1705
2007
|
from threads t
|
|
1706
|
-
where t.repo_id = ? and t.state = 'open'`;
|
|
2008
|
+
where t.repo_id = ? and t.state = 'open' and t.closed_at_local is null`;
|
|
1707
2009
|
const args = [repoId];
|
|
1708
2010
|
if (threadNumber) {
|
|
1709
2011
|
sql += ' and t.number = ?';
|
|
@@ -1736,7 +2038,7 @@ export class GHCrawlService {
|
|
|
1736
2038
|
let sql = `select s.thread_id, s.summary_kind, s.summary_text
|
|
1737
2039
|
from document_summaries s
|
|
1738
2040
|
join threads t on t.id = s.thread_id
|
|
1739
|
-
where t.repo_id = ? and t.state = 'open' and s.model = ?`;
|
|
2041
|
+
where t.repo_id = ? and t.state = 'open' and t.closed_at_local is null and s.model = ?`;
|
|
1740
2042
|
const args = [repoId, this.config.summaryModel];
|
|
1741
2043
|
if (threadNumber) {
|
|
1742
2044
|
sql += ' and t.number = ?';
|