@ghcrawl/api-core 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cluster/edge-worker.d.ts +2 -0
- package/dist/cluster/edge-worker.d.ts.map +1 -0
- package/dist/cluster/edge-worker.js +48 -0
- package/dist/cluster/edge-worker.js.map +1 -0
- package/dist/cluster/exact-edges.d.ts +20 -0
- package/dist/cluster/exact-edges.d.ts.map +1 -0
- package/dist/cluster/exact-edges.js +80 -0
- package/dist/cluster/exact-edges.js.map +1 -0
- package/dist/cluster/perf.integration.d.ts +2 -0
- package/dist/cluster/perf.integration.d.ts.map +1 -0
- package/dist/cluster/perf.integration.js +287 -0
- package/dist/cluster/perf.integration.js.map +1 -0
- package/dist/db/migrate.d.ts.map +1 -1
- package/dist/db/migrate.js +5 -0
- package/dist/db/migrate.js.map +1 -1
- package/dist/search/exact.d.ts +13 -0
- package/dist/search/exact.d.ts.map +1 -1
- package/dist/search/exact.js +58 -6
- package/dist/search/exact.js.map +1 -1
- package/dist/service.d.ts +8 -1
- package/dist/service.d.ts.map +1 -1
- package/dist/service.js +205 -58
- package/dist/service.js.map +1 -1
- package/package.json +5 -4
package/dist/service.d.ts
CHANGED
|
@@ -63,6 +63,7 @@ export type TuiThreadDetail = {
|
|
|
63
63
|
export type TuiSnapshot = {
|
|
64
64
|
repository: RepositoryDto;
|
|
65
65
|
stats: TuiRepoStats;
|
|
66
|
+
clusterRunId: number | null;
|
|
66
67
|
clusters: TuiClusterSummary[];
|
|
67
68
|
};
|
|
68
69
|
export type DoctorResult = {
|
|
@@ -168,7 +169,7 @@ export declare class GHCrawlService {
|
|
|
168
169
|
minScore?: number;
|
|
169
170
|
k?: number;
|
|
170
171
|
onProgress?: (message: string) => void;
|
|
171
|
-
}): ClusterResultDto
|
|
172
|
+
}): Promise<ClusterResultDto>;
|
|
172
173
|
searchRepository(params: {
|
|
173
174
|
owner: string;
|
|
174
175
|
repo: string;
|
|
@@ -225,6 +226,7 @@ export declare class GHCrawlService {
|
|
|
225
226
|
owner: string;
|
|
226
227
|
repo: string;
|
|
227
228
|
clusterId: number;
|
|
229
|
+
clusterRunId?: number;
|
|
228
230
|
}): TuiClusterDetail;
|
|
229
231
|
getTuiThreadDetail(params: {
|
|
230
232
|
owner: string;
|
|
@@ -241,6 +243,7 @@ export declare class GHCrawlService {
|
|
|
241
243
|
private getLatestRunClusterIdsForThread;
|
|
242
244
|
private reconcileClusterCloseState;
|
|
243
245
|
private listRawTuiClusters;
|
|
246
|
+
private getRawTuiClusterSummary;
|
|
244
247
|
private compareTuiClusterSummary;
|
|
245
248
|
private fetchThreadComments;
|
|
246
249
|
private requireAi;
|
|
@@ -263,11 +266,15 @@ export declare class GHCrawlService {
|
|
|
263
266
|
private chunkEmbeddingTasks;
|
|
264
267
|
private loadStoredEmbeddings;
|
|
265
268
|
private loadParsedStoredEmbeddings;
|
|
269
|
+
private loadClusterableThreadMeta;
|
|
266
270
|
private listStoredClusterNeighbors;
|
|
267
271
|
private getEmbeddingWorkset;
|
|
268
272
|
private loadCombinedSummaryTextMap;
|
|
269
273
|
private edgeKey;
|
|
270
274
|
private aggregateRepositoryEdges;
|
|
275
|
+
private mergeSourceKindEdges;
|
|
276
|
+
private countEmbeddingsForSourceKind;
|
|
277
|
+
private resolveEdgeWorkerUrl;
|
|
271
278
|
private persistClusterRun;
|
|
272
279
|
private pruneOldClusterRuns;
|
|
273
280
|
private upsertSummary;
|
package/dist/service.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"service.d.ts","sourceRoot":"","sources":["../src/service.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"service.d.ts","sourceRoot":"","sources":["../src/service.ts"],"names":[],"mappings":"AAQA,OAAO,EAgBL,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,qBAAqB,EAC1B,KAAK,aAAa,EAClB,KAAK,qBAAqB,EAE1B,KAAK,gBAAgB,EACrB,KAAK,wBAAwB,EAC7B,KAAK,gBAAgB,EACrB,KAAK,cAAc,EACnB,KAAK,cAAc,EACnB,KAAK,iBAAiB,EACtB,KAAK,eAAe,EACpB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,KAAK,YAAY,EACjB,KAAK,UAAU,EACf,KAAK,cAAc,EACnB,KAAK,aAAa,EAClB,KAAK,SAAS,EACd,KAAK,eAAe,EACrB,MAAM,uBAAuB,CAAC;AAI/B,OAAO,EAOL,KAAK,iBAAiB,EACtB,KAAK,cAAc,EACpB,MAAM,aAAa,CAAC;AAErB,OAAO,EAAU,KAAK,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAE7D,OAAO,EAAoB,KAAK,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACzE,OAAO,EAAkB,KAAK,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAgGvE,MAAM,MAAM,kBAAkB,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEnD,MAAM,MAAM,YAAY,GAAG;IACzB,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,0BAA0B,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1C,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,qBAAqB,EAAE,MAAM,CAAC;IAC9B,qBAAqB,EAAE,MAAM,CAAC;IAC9B,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,0BAA0B,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3C,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,OAAO,CAAC;IAClB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC;IACzB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,sBAAsB,EAAE,MAAM,GAAG,IAAI,CAAC;IACtC,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,kBAAkB,EAAE,OAAO,GAAG,cAAc,GAAG,IAAI,CAAC;IACpD,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,OAAO,GAAG,cAAc,CAAC;IAC/B,QAAQ,EAAE,OAAO,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,OAAO,CAAC;IAClB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC;IACzB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,sBAAsB,EAAE,MAAM,GAAG,IAAI,CAAC;IACtC,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,kBAAkB,EAAE,OAAO,GAAG,cAAc,GAAG,IAAI,CAAC;IACpD,OAAO,EAAE,gBAAgB,EAAE,CAAC;CAC7B,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,MAAM,EAAE,SAAS,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC,iBAAiB,GAAG,kBAAkB,GAAG,2BAA2B,GAAG,gBAAgB,EAAE,MAAM,CAAC,CAAC,CAAC;IAC5H,SAAS,EAAE,YAAY,CAAC,WAAW,CAAC,CAAC;CACtC,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG;IACxB,UAAU,EAAE,aAAa,CAAC;IAC1B,KAAK,EAAE,YAAY,CAAC;IACpB,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,QAAQ,EAAE,iBAAiB,EAAE,CAAC;CAC/B,CAAC;AAEF,MAAM,MAAM,YAAY,GAAG;IACzB,MAAM,EAAE,cAAc,CAAC;IACvB,MAAM,EAAE;QACN,UAAU,EAAE,OAAO,CAAC;QACpB,MAAM,EAAE,iBAAiB,CAAC;QAC1B,QAAQ,EAAE,OAAO,CAAC;QAClB,MAAM,EAAE,OAAO,CAAC;QAChB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;KACtB,CAAC;IACF,MAAM,EAAE;QACN,UAAU,EAAE,OAAO,CAAC;QACpB,MAAM,EAAE,iBAAiB,CAAC;QAC1B,QAAQ,EAAE,OAAO,CAAC;QAClB,MAAM,EAAE,OAAO,CAAC;QAChB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;KACtB,CAAC;CACH,CAAC;AAEF,KAAK,WAAW,GAAG;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;IACvC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF,KAAK,oBAAoB,GAAG,cAAc,CAAC;AAC3C,KAAK,uBAAuB,GAAG,iBAAiB,CAAC;AAyKjD,qBAAa,cAAc;IACzB,QAAQ,CAAC,MAAM,EAAE,cAAc,CAAC;IAChC,QAAQ,CAAC,EAAE,EAAE,cAAc,CAAC;IAC5B,QAAQ,CAAC,MAAM,CAAC,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,EAAE,CAAC,EAAE,UAAU,CAAC;IACzB,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAiD;gBAE1E,OAAO,GAAE;QACnB,MAAM,CAAC,EAAE,cAAc,CAAC;QACxB,EAAE,CAAC,EAAE,cAAc,CAAC;QACpB,MAAM,CAAC,EAAE,YAAY,CAAC;QACtB,EAAE,CAAC,EAAE,UAAU,CAAC;KACZ;IASN,KAAK,IAAI,IAAI;IAKb,IAAI,IAAI,cAAc;IAehB,MAAM,IAAI,OAAO,CAAC,YAAY,CAAC;IAmDrC,gBAAgB,IAAI,oBAAoB;IAKxC,WAAW,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,OAAO,GAAG,cAAc,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;QAAC,aAAa,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,eAAe;IAmDnJ,iBAAiB,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,aAAa,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,qBAAqB;IA4GzH,kBAAkB,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,GAAG,aAAa;IAmChG,mBAAmB,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,GAAG,aAAa;IAiCxF,cAAc,CAClB,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,aAAa,CAAC;IA2InB,mBAAmB,CAAC,MAAM,EAAE;QAChC,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,eAAe,CAAC,EAAE,OAAO,CAAC;QAC1B,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG,OAAO,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC;IAsFlH,aAAa,CAAC,MAAM,EAAE;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG;QAAE,cAAc,EAAE,MAAM,CAAC;QAAC,gBAAgB,EAAE,MAAM,CAAA;KAAE;IAoClD,eAAe,CAAC,MAAM,EAAE;QAC5B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG,OAAO,CAAC,cAAc,CAAC;IAoDrB,iBAAiB,CAAC,MAAM,EAAE;QAC9B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,CAAC,CAAC,EAAE,MAAM,CAAC;QACX,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG,OAAO,CAAC,gBAAgB,CAAC;IA0CvB,gBAAgB,CAAC,MAAM,EAAE;QAC7B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,CAAC,EAAE,UAAU,CAAC;QAClB,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAmHjC,aAAa,CAAC,MAAM,EAAE;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB,GAAG,uBAAuB;IAiD3B,YAAY,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,aAAa,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,gBAAgB;IAyE1F,iBAAiB,CAAC,MAAM,EAAE;QAC9B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,CAAC,EAAE,OAAO,CAAC;QACf,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG,OAAO,CAAC,eAAe,CAAC;IAkD5B,oBAAoB,CAAC,MAAM,EAAE;QAC3B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,IAAI,CAAC,EAAE,kBAAkB,CAAC;QAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,aAAa,CAAC,EAAE,OAAO,CAAC;KACzB,GAAG,wBAAwB;IA8B5B,oBAAoB,CAAC,MAAM,EAAE;QAC3B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,aAAa,CAAC,EAAE,OAAO,CAAC;KACzB,GAAG,qBAAqB;IAwDzB,cAAc,CAAC,MAAM,EAAE;QACrB,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,IAAI,CAAC,EAAE,kBAAkB,CAAC;QAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,qBAAqB,CAAC,EAAE,OAAO,CAAC;KACjC,GAAG,WAAW;IA2Bf,mBAAmB,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,gBAAgB;IAiExH,kBAAkB,CAAC,MAAM,EAAE;QACzB,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;KAC5B,GAAG,eAAe;IA0Eb,WAAW,CAAC,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,cAAc,CAAC;IAgClE,OAAO,CAAC,kBAAkB;IAmE1B,OAAO,CAAC,oBAAoB;IA4B5B,OAAO,CAAC,eAAe;IA8BvB,OAAO,CAAC,mBAAmB;IAQ3B,OAAO,CAAC,+BAA+B;IAkBvC,OAAO,CAAC,0BAA0B;IAoElC,OAAO,CAAC,kBAAkB;IAiE1B,OAAO,CAAC,uBAAuB;IAuE/B,OAAO,CAAC,wBAAwB;YASlB,mBAAmB;IA4DjC,OAAO,CAAC,SAAS;IAOjB,OAAO,CAAC,aAAa;IAOrB,OAAO,CAAC,iBAAiB;IASzB,OAAO,CAAC,gBAAgB;IAgBxB,OAAO,CAAC,YAAY;YAoEN,uBAAuB;YAmFvB,2BAA2B;IAuGzC,OAAO,CAAC,eAAe;IA0BvB,OAAO,CAAC,eAAe;IAoCvB,OAAO,CAAC,kBAAkB;IAmD1B,OAAO,CAAC,mBAAmB;IAkD3B,OAAO,CAAC,oBAAoB;IAoB5B,OAAO,CAAC,uBAAuB;IAI/B,OAAO,CAAC,uBAAuB;YAKjB,sBAAsB;YAgCtB,2BAA2B;IAiCzC,OAAO,CAAC,mBAAmB;IAmB3B,OAAO,CAAC,mBAAmB;IAwB3B,OAAO,CAAC,oBAAoB;IAc5B,OAAO,CAAC,0BAA0B;IAoBlC,OAAO,CAAC,yBAAyB;IA4BjC,OAAO,CAAC,0BAA0B;IAwDlC,OAAO,CAAC,mBAAmB;IA+C3B,OAAO,CAAC,0BAA0B;IAyClC,OAAO,CAAC,OAAO;YAMD,wBAAwB;IA4GtC,OAAO,CAAC,oBAAoB;IAsB5B,OAAO,CAAC,4BAA4B;IAepC,OAAO,CAAC,oBAAoB;IAQ5B,OAAO,CAAC,iBAAiB;IAsDzB,OAAO,CAAC,mBAAmB;IAI3B,OAAO,CAAC,aAAa;IAarB,OAAO,CAAC,eAAe;IA2BvB,OAAO,CAAC,QAAQ;IAOhB,OAAO,CAAC,SAAS;CAkBlB;AAED,wBAAgB,eAAe,CAAC,GAAG,EAAE,GAAG,GAAG;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAOzE"}
|
package/dist/service.js
CHANGED
|
@@ -1,19 +1,25 @@
|
|
|
1
1
|
import http from 'node:http';
|
|
2
2
|
import crypto from 'node:crypto';
|
|
3
|
+
import { existsSync } from 'node:fs';
|
|
4
|
+
import os from 'node:os';
|
|
5
|
+
import { fileURLToPath } from 'node:url';
|
|
6
|
+
import { Worker } from 'node:worker_threads';
|
|
3
7
|
import { IterableMapper } from '@shutterstock/p-map-iterable';
|
|
4
8
|
import { actionResponseSchema, authorThreadsResponseSchema, closeResponseSchema, clusterDetailResponseSchema, clusterResultSchema, clusterSummariesResponseSchema, clustersResponseSchema, embedResultSchema, healthResponseSchema, neighborsResponseSchema, refreshResponseSchema, repositoriesResponseSchema, searchResponseSchema, syncResultSchema, threadsResponseSchema, } from '@ghcrawl/api-contract';
|
|
5
9
|
import { buildClusters } from './cluster/build.js';
|
|
10
|
+
import { buildSourceKindEdges } from './cluster/exact-edges.js';
|
|
6
11
|
import { ensureRuntimeDirs, isLikelyGitHubToken, isLikelyOpenAiApiKey, loadConfig, requireGithubToken, requireOpenAiKey, } from './config.js';
|
|
7
12
|
import { migrate } from './db/migrate.js';
|
|
8
13
|
import { openDb } from './db/sqlite.js';
|
|
9
14
|
import { buildCanonicalDocument, isBotLikeAuthor } from './documents/normalize.js';
|
|
10
15
|
import { makeGitHubClient } from './github/client.js';
|
|
11
16
|
import { OpenAiProvider } from './openai/provider.js';
|
|
12
|
-
import { cosineSimilarity, rankNearestNeighbors } from './search/exact.js';
|
|
17
|
+
import { cosineSimilarity, normalizeEmbedding, rankNearestNeighbors } from './search/exact.js';
|
|
13
18
|
const SYNC_BATCH_SIZE = 100;
|
|
14
19
|
const SYNC_BATCH_DELAY_MS = 5000;
|
|
15
20
|
const STALE_CLOSED_SWEEP_LIMIT = 1000;
|
|
16
21
|
const CLUSTER_PROGRESS_INTERVAL_MS = 5000;
|
|
22
|
+
const CLUSTER_PARALLEL_MIN_EMBEDDINGS = 5000;
|
|
17
23
|
const EMBED_ESTIMATED_CHARS_PER_TOKEN = 3;
|
|
18
24
|
const EMBED_MAX_ITEM_TOKENS = 7000;
|
|
19
25
|
const EMBED_MAX_BATCH_TOKENS = 250000;
|
|
@@ -697,24 +703,15 @@ export class GHCrawlService {
|
|
|
697
703
|
throw error;
|
|
698
704
|
}
|
|
699
705
|
}
|
|
700
|
-
clusterRepository(params) {
|
|
706
|
+
async clusterRepository(params) {
|
|
701
707
|
const repository = this.requireRepository(params.owner, params.repo);
|
|
702
708
|
const runId = this.startRun('cluster_runs', repository.id, repository.fullName);
|
|
703
709
|
const minScore = params.minScore ?? 0.82;
|
|
704
710
|
const k = params.k ?? 6;
|
|
705
711
|
try {
|
|
706
|
-
const
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
threadMeta.set(row.id, { number: row.number, title: row.title });
|
|
710
|
-
}
|
|
711
|
-
const items = Array.from(threadMeta.entries()).map(([id, meta]) => ({
|
|
712
|
-
id,
|
|
713
|
-
number: meta.number,
|
|
714
|
-
title: meta.title,
|
|
715
|
-
}));
|
|
716
|
-
params.onProgress?.(`[cluster] loaded ${items.length} embedded thread(s) across ${new Set(rows.map((row) => row.source_kind)).size} source kind(s) for ${repository.fullName} k=${k} minScore=${minScore}`);
|
|
717
|
-
const aggregatedEdges = this.aggregateRepositoryEdges(rows, {
|
|
712
|
+
const { items, sourceKinds } = this.loadClusterableThreadMeta(repository.id);
|
|
713
|
+
params.onProgress?.(`[cluster] loaded ${items.length} embedded thread(s) across ${sourceKinds.length} source kind(s) for ${repository.fullName} k=${k} minScore=${minScore}`);
|
|
714
|
+
const aggregatedEdges = await this.aggregateRepositoryEdges(repository.id, sourceKinds, {
|
|
718
715
|
limit: k,
|
|
719
716
|
minScore,
|
|
720
717
|
onProgress: params.onProgress,
|
|
@@ -954,7 +951,7 @@ export class GHCrawlService {
|
|
|
954
951
|
});
|
|
955
952
|
}
|
|
956
953
|
if (selected.cluster) {
|
|
957
|
-
cluster = this.clusterRepository({
|
|
954
|
+
cluster = await this.clusterRepository({
|
|
958
955
|
owner: params.owner,
|
|
959
956
|
repo: params.repo,
|
|
960
957
|
onProgress: params.onProgress,
|
|
@@ -1013,6 +1010,7 @@ export class GHCrawlService {
|
|
|
1013
1010
|
owner: params.owner,
|
|
1014
1011
|
repo: params.repo,
|
|
1015
1012
|
clusterId: params.clusterId,
|
|
1013
|
+
clusterRunId: snapshot.clusterRunId ?? undefined,
|
|
1016
1014
|
});
|
|
1017
1015
|
const members = detail.members.slice(0, params.memberLimit ?? detail.members.length).map((member) => {
|
|
1018
1016
|
const threadDetail = this.getTuiThreadDetail({
|
|
@@ -1055,7 +1053,7 @@ export class GHCrawlService {
|
|
|
1055
1053
|
const stats = this.getTuiRepoStats(repository.id);
|
|
1056
1054
|
const latestRun = this.getLatestClusterRun(repository.id);
|
|
1057
1055
|
if (!latestRun) {
|
|
1058
|
-
return { repository, stats, clusters: [] };
|
|
1056
|
+
return { repository, stats, clusterRunId: null, clusters: [] };
|
|
1059
1057
|
}
|
|
1060
1058
|
const includeClosedClusters = params.includeClosedClusters ?? true;
|
|
1061
1059
|
const clusters = this.listRawTuiClusters(repository.id, latestRun.id)
|
|
@@ -1071,16 +1069,18 @@ export class GHCrawlService {
|
|
|
1071
1069
|
return {
|
|
1072
1070
|
repository,
|
|
1073
1071
|
stats,
|
|
1072
|
+
clusterRunId: latestRun.id,
|
|
1074
1073
|
clusters,
|
|
1075
1074
|
};
|
|
1076
1075
|
}
|
|
1077
1076
|
getTuiClusterDetail(params) {
|
|
1078
1077
|
const repository = this.requireRepository(params.owner, params.repo);
|
|
1079
|
-
const
|
|
1080
|
-
|
|
1078
|
+
const clusterRunId = params.clusterRunId ??
|
|
1079
|
+
(this.getLatestClusterRun(repository.id)?.id ?? null);
|
|
1080
|
+
if (!clusterRunId) {
|
|
1081
1081
|
throw new Error(`No completed cluster run found for ${repository.fullName}. Run cluster first.`);
|
|
1082
1082
|
}
|
|
1083
|
-
const summary = this.
|
|
1083
|
+
const summary = this.getRawTuiClusterSummary(repository.id, clusterRunId, params.clusterId);
|
|
1084
1084
|
if (!summary) {
|
|
1085
1085
|
throw new Error(`Cluster ${params.clusterId} was not found for ${repository.fullName}.`);
|
|
1086
1086
|
}
|
|
@@ -1204,7 +1204,7 @@ export class GHCrawlService {
|
|
|
1204
1204
|
});
|
|
1205
1205
|
}
|
|
1206
1206
|
case 'cluster': {
|
|
1207
|
-
const result = this.clusterRepository(request);
|
|
1207
|
+
const result = await this.clusterRepository(request);
|
|
1208
1208
|
return actionResponseSchema.parse({
|
|
1209
1209
|
ok: true,
|
|
1210
1210
|
action: request.action,
|
|
@@ -1400,6 +1400,8 @@ export class GHCrawlService {
|
|
|
1400
1400
|
group by
|
|
1401
1401
|
c.id,
|
|
1402
1402
|
c.member_count,
|
|
1403
|
+
c.closed_at_local,
|
|
1404
|
+
c.close_reason_local,
|
|
1403
1405
|
c.representative_thread_id,
|
|
1404
1406
|
rt.number,
|
|
1405
1407
|
rt.kind,
|
|
@@ -1421,6 +1423,56 @@ export class GHCrawlService {
|
|
|
1421
1423
|
searchText: `${(row.representative_title ?? '').toLowerCase()} ${row.search_text ?? ''}`.trim(),
|
|
1422
1424
|
}));
|
|
1423
1425
|
}
|
|
1426
|
+
getRawTuiClusterSummary(repoId, clusterRunId, clusterId) {
|
|
1427
|
+
const row = this.db
|
|
1428
|
+
.prepare(`select
|
|
1429
|
+
c.id as cluster_id,
|
|
1430
|
+
c.member_count,
|
|
1431
|
+
c.closed_at_local,
|
|
1432
|
+
c.close_reason_local,
|
|
1433
|
+
c.representative_thread_id,
|
|
1434
|
+
rt.number as representative_number,
|
|
1435
|
+
rt.kind as representative_kind,
|
|
1436
|
+
rt.title as representative_title,
|
|
1437
|
+
max(coalesce(t.updated_at_gh, t.updated_at)) as latest_updated_at,
|
|
1438
|
+
sum(case when t.kind = 'issue' then 1 else 0 end) as issue_count,
|
|
1439
|
+
sum(case when t.kind = 'pull_request' then 1 else 0 end) as pull_request_count,
|
|
1440
|
+
sum(case when t.state != 'open' or t.closed_at_local is not null then 1 else 0 end) as closed_member_count,
|
|
1441
|
+
group_concat(lower(coalesce(t.title, '')), ' ') as search_text
|
|
1442
|
+
from clusters c
|
|
1443
|
+
left join threads rt on rt.id = c.representative_thread_id
|
|
1444
|
+
join cluster_members cm on cm.cluster_id = c.id
|
|
1445
|
+
join threads t on t.id = cm.thread_id
|
|
1446
|
+
where c.repo_id = ? and c.cluster_run_id = ? and c.id = ?
|
|
1447
|
+
group by
|
|
1448
|
+
c.id,
|
|
1449
|
+
c.member_count,
|
|
1450
|
+
c.closed_at_local,
|
|
1451
|
+
c.close_reason_local,
|
|
1452
|
+
c.representative_thread_id,
|
|
1453
|
+
rt.number,
|
|
1454
|
+
rt.kind,
|
|
1455
|
+
rt.title`)
|
|
1456
|
+
.get(repoId, clusterRunId, clusterId);
|
|
1457
|
+
if (!row) {
|
|
1458
|
+
return null;
|
|
1459
|
+
}
|
|
1460
|
+
return {
|
|
1461
|
+
clusterId: row.cluster_id,
|
|
1462
|
+
displayTitle: row.representative_title ?? `Cluster ${row.cluster_id}`,
|
|
1463
|
+
isClosed: row.close_reason_local !== null || row.closed_member_count >= row.member_count,
|
|
1464
|
+
closedAtLocal: row.closed_at_local,
|
|
1465
|
+
closeReasonLocal: row.close_reason_local,
|
|
1466
|
+
totalCount: row.member_count,
|
|
1467
|
+
issueCount: row.issue_count,
|
|
1468
|
+
pullRequestCount: row.pull_request_count,
|
|
1469
|
+
latestUpdatedAt: row.latest_updated_at,
|
|
1470
|
+
representativeThreadId: row.representative_thread_id,
|
|
1471
|
+
representativeNumber: row.representative_number,
|
|
1472
|
+
representativeKind: row.representative_kind,
|
|
1473
|
+
searchText: `${(row.representative_title ?? '').toLowerCase()} ${row.search_text ?? ''}`.trim(),
|
|
1474
|
+
};
|
|
1475
|
+
}
|
|
1424
1476
|
compareTuiClusterSummary(left, right, sort) {
|
|
1425
1477
|
const leftTime = left.latestUpdatedAt ? Date.parse(left.latestUpdatedAt) : 0;
|
|
1426
1478
|
const rightTime = right.latestUpdatedAt ? Date.parse(right.latestUpdatedAt) : 0;
|
|
@@ -1895,13 +1947,39 @@ export class GHCrawlService {
|
|
|
1895
1947
|
if (cached) {
|
|
1896
1948
|
return cached;
|
|
1897
1949
|
}
|
|
1898
|
-
const parsed = this.loadStoredEmbeddings(repoId).map((row) =>
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1950
|
+
const parsed = this.loadStoredEmbeddings(repoId).map((row) => {
|
|
1951
|
+
const embedding = JSON.parse(row.embedding_json);
|
|
1952
|
+
const normalized = normalizeEmbedding(embedding);
|
|
1953
|
+
return {
|
|
1954
|
+
...row,
|
|
1955
|
+
embedding,
|
|
1956
|
+
normalizedEmbedding: normalized.normalized,
|
|
1957
|
+
embeddingNorm: normalized.norm,
|
|
1958
|
+
};
|
|
1959
|
+
});
|
|
1902
1960
|
this.parsedEmbeddingCache.set(repoId, parsed);
|
|
1903
1961
|
return parsed;
|
|
1904
1962
|
}
|
|
1963
|
+
loadClusterableThreadMeta(repoId) {
|
|
1964
|
+
const rows = this.db
|
|
1965
|
+
.prepare(`select t.id, t.number, t.title, e.source_kind
|
|
1966
|
+
from threads t
|
|
1967
|
+
join document_embeddings e on e.thread_id = t.id
|
|
1968
|
+
where t.repo_id = ?
|
|
1969
|
+
and t.state = 'open'
|
|
1970
|
+
and t.closed_at_local is null`)
|
|
1971
|
+
.all(repoId);
|
|
1972
|
+
const itemsById = new Map();
|
|
1973
|
+
const sourceKinds = new Set();
|
|
1974
|
+
for (const row of rows) {
|
|
1975
|
+
itemsById.set(row.id, { id: row.id, number: row.number, title: row.title });
|
|
1976
|
+
sourceKinds.add(row.source_kind);
|
|
1977
|
+
}
|
|
1978
|
+
return {
|
|
1979
|
+
items: Array.from(itemsById.values()),
|
|
1980
|
+
sourceKinds: Array.from(sourceKinds.values()),
|
|
1981
|
+
};
|
|
1982
|
+
}
|
|
1905
1983
|
listStoredClusterNeighbors(repoId, threadId, limit) {
|
|
1906
1984
|
const latestRun = this.getLatestClusterRun(repoId);
|
|
1907
1985
|
if (!latestRun) {
|
|
@@ -2017,50 +2095,119 @@ export class GHCrawlService {
|
|
|
2017
2095
|
const right = Math.max(leftThreadId, rightThreadId);
|
|
2018
2096
|
return `${left}:${right}`;
|
|
2019
2097
|
}
|
|
2020
|
-
aggregateRepositoryEdges(
|
|
2021
|
-
const bySource = new Map();
|
|
2022
|
-
for (const row of rows) {
|
|
2023
|
-
const list = bySource.get(row.source_kind) ?? [];
|
|
2024
|
-
list.push({ id: row.id, embedding: row.embedding });
|
|
2025
|
-
bySource.set(row.source_kind, list);
|
|
2026
|
-
}
|
|
2098
|
+
async aggregateRepositoryEdges(repoId, sourceKinds, params) {
|
|
2027
2099
|
const aggregated = new Map();
|
|
2028
|
-
const totalItems =
|
|
2029
|
-
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2100
|
+
const totalItems = sourceKinds.reduce((sum, sourceKind) => sum + this.countEmbeddingsForSourceKind(repoId, sourceKind), 0);
|
|
2101
|
+
if (sourceKinds.length === 0 || totalItems === 0) {
|
|
2102
|
+
return aggregated;
|
|
2103
|
+
}
|
|
2104
|
+
const shouldParallelize = sourceKinds.length > 1 && totalItems >= CLUSTER_PARALLEL_MIN_EMBEDDINGS && os.availableParallelism() > 1;
|
|
2105
|
+
if (!shouldParallelize) {
|
|
2106
|
+
const rows = this.loadParsedStoredEmbeddings(repoId);
|
|
2107
|
+
const bySource = new Map();
|
|
2108
|
+
for (const row of rows) {
|
|
2109
|
+
const list = bySource.get(row.source_kind) ?? [];
|
|
2110
|
+
list.push({ id: row.id, normalizedEmbedding: row.normalizedEmbedding });
|
|
2111
|
+
bySource.set(row.source_kind, list);
|
|
2112
|
+
}
|
|
2113
|
+
let processedItems = 0;
|
|
2114
|
+
for (const sourceKind of sourceKinds) {
|
|
2115
|
+
const items = bySource.get(sourceKind) ?? [];
|
|
2116
|
+
const edges = buildSourceKindEdges(items, {
|
|
2035
2117
|
limit: params.limit,
|
|
2036
2118
|
minScore: params.minScore,
|
|
2037
|
-
|
|
2119
|
+
progressIntervalMs: CLUSTER_PROGRESS_INTERVAL_MS,
|
|
2120
|
+
onProgress: (progress) => {
|
|
2121
|
+
if (!params.onProgress)
|
|
2122
|
+
return;
|
|
2123
|
+
params.onProgress(`[cluster] identifying similarity edges ${processedItems + progress.processedItems}/${totalItems} source embeddings processed current_edges~=${aggregated.size + progress.currentEdgeEstimate}`);
|
|
2124
|
+
},
|
|
2038
2125
|
});
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
2051
|
-
|
|
2126
|
+
processedItems += items.length;
|
|
2127
|
+
this.mergeSourceKindEdges(aggregated, edges, sourceKind);
|
|
2128
|
+
}
|
|
2129
|
+
return aggregated;
|
|
2130
|
+
}
|
|
2131
|
+
const workerUrl = this.resolveEdgeWorkerUrl();
|
|
2132
|
+
const progressBySource = new Map();
|
|
2133
|
+
const edgeSets = await Promise.all(sourceKinds.map((sourceKind) => new Promise((resolve, reject) => {
|
|
2134
|
+
const worker = new Worker(workerUrl, {
|
|
2135
|
+
workerData: {
|
|
2136
|
+
dbPath: this.config.dbPath,
|
|
2137
|
+
repoId,
|
|
2138
|
+
sourceKind,
|
|
2139
|
+
limit: params.limit,
|
|
2140
|
+
minScore: params.minScore,
|
|
2141
|
+
},
|
|
2142
|
+
});
|
|
2143
|
+
worker.on('message', (message) => {
|
|
2144
|
+
if (!message || typeof message !== 'object') {
|
|
2145
|
+
return;
|
|
2146
|
+
}
|
|
2147
|
+
const typed = message;
|
|
2148
|
+
if (typed.type === 'progress') {
|
|
2149
|
+
progressBySource.set(typed.sourceKind, {
|
|
2150
|
+
processedItems: typed.processedItems,
|
|
2151
|
+
totalItems: typed.totalItems,
|
|
2152
|
+
currentEdgeEstimate: typed.currentEdgeEstimate,
|
|
2052
2153
|
});
|
|
2154
|
+
if (params.onProgress) {
|
|
2155
|
+
const processedItems = Array.from(progressBySource.values()).reduce((sum, value) => sum + value.processedItems, 0);
|
|
2156
|
+
const currentEdgeEstimate = Array.from(progressBySource.values()).reduce((sum, value) => sum + value.currentEdgeEstimate, 0);
|
|
2157
|
+
params.onProgress(`[cluster] identifying similarity edges ${processedItems}/${totalItems} source embeddings processed current_edges~=${aggregated.size + currentEdgeEstimate}`);
|
|
2158
|
+
}
|
|
2159
|
+
return;
|
|
2053
2160
|
}
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2161
|
+
resolve(typed.edges);
|
|
2162
|
+
});
|
|
2163
|
+
worker.on('error', reject);
|
|
2164
|
+
worker.on('exit', (code) => {
|
|
2165
|
+
if (code !== 0) {
|
|
2166
|
+
reject(new Error(`edge worker for ${sourceKind} exited with code ${code}`));
|
|
2059
2167
|
}
|
|
2060
|
-
}
|
|
2168
|
+
});
|
|
2169
|
+
})));
|
|
2170
|
+
for (const [index, edges] of edgeSets.entries()) {
|
|
2171
|
+
this.mergeSourceKindEdges(aggregated, edges, sourceKinds[index]);
|
|
2061
2172
|
}
|
|
2062
2173
|
return aggregated;
|
|
2063
2174
|
}
|
|
2175
|
+
mergeSourceKindEdges(aggregated, edges, sourceKind) {
|
|
2176
|
+
for (const edge of edges) {
|
|
2177
|
+
const key = this.edgeKey(edge.leftThreadId, edge.rightThreadId);
|
|
2178
|
+
const existing = aggregated.get(key);
|
|
2179
|
+
if (existing) {
|
|
2180
|
+
existing.score = Math.max(existing.score, edge.score);
|
|
2181
|
+
existing.sourceKinds.add(sourceKind);
|
|
2182
|
+
continue;
|
|
2183
|
+
}
|
|
2184
|
+
aggregated.set(key, {
|
|
2185
|
+
leftThreadId: edge.leftThreadId,
|
|
2186
|
+
rightThreadId: edge.rightThreadId,
|
|
2187
|
+
score: edge.score,
|
|
2188
|
+
sourceKinds: new Set([sourceKind]),
|
|
2189
|
+
});
|
|
2190
|
+
}
|
|
2191
|
+
}
|
|
2192
|
+
countEmbeddingsForSourceKind(repoId, sourceKind) {
|
|
2193
|
+
const row = this.db
|
|
2194
|
+
.prepare(`select count(*) as count
|
|
2195
|
+
from document_embeddings e
|
|
2196
|
+
join threads t on t.id = e.thread_id
|
|
2197
|
+
where t.repo_id = ?
|
|
2198
|
+
and t.state = 'open'
|
|
2199
|
+
and t.closed_at_local is null
|
|
2200
|
+
and e.source_kind = ?`)
|
|
2201
|
+
.get(repoId, sourceKind);
|
|
2202
|
+
return row.count;
|
|
2203
|
+
}
|
|
2204
|
+
resolveEdgeWorkerUrl() {
|
|
2205
|
+
const jsUrl = new URL('./cluster/edge-worker.js', import.meta.url);
|
|
2206
|
+
if (existsSync(fileURLToPath(jsUrl))) {
|
|
2207
|
+
return jsUrl;
|
|
2208
|
+
}
|
|
2209
|
+
return new URL('./cluster/edge-worker.ts', import.meta.url);
|
|
2210
|
+
}
|
|
2064
2211
|
persistClusterRun(repoId, runId, aggregatedEdges, clusters) {
|
|
2065
2212
|
const insertEdge = this.db.prepare(`insert into similarity_edges (repo_id, cluster_run_id, left_thread_id, right_thread_id, method, score, explanation_json, created_at)
|
|
2066
2213
|
values (?, ?, ?, ?, ?, ?, ?, ?)`);
|