@ghcrawl/api-core 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/service.d.ts CHANGED
@@ -63,6 +63,7 @@ export type TuiThreadDetail = {
63
63
  export type TuiSnapshot = {
64
64
  repository: RepositoryDto;
65
65
  stats: TuiRepoStats;
66
+ clusterRunId: number | null;
66
67
  clusters: TuiClusterSummary[];
67
68
  };
68
69
  export type DoctorResult = {
@@ -168,7 +169,7 @@ export declare class GHCrawlService {
168
169
  minScore?: number;
169
170
  k?: number;
170
171
  onProgress?: (message: string) => void;
171
- }): ClusterResultDto;
172
+ }): Promise<ClusterResultDto>;
172
173
  searchRepository(params: {
173
174
  owner: string;
174
175
  repo: string;
@@ -225,6 +226,7 @@ export declare class GHCrawlService {
225
226
  owner: string;
226
227
  repo: string;
227
228
  clusterId: number;
229
+ clusterRunId?: number;
228
230
  }): TuiClusterDetail;
229
231
  getTuiThreadDetail(params: {
230
232
  owner: string;
@@ -241,6 +243,7 @@ export declare class GHCrawlService {
241
243
  private getLatestRunClusterIdsForThread;
242
244
  private reconcileClusterCloseState;
243
245
  private listRawTuiClusters;
246
+ private getRawTuiClusterSummary;
244
247
  private compareTuiClusterSummary;
245
248
  private fetchThreadComments;
246
249
  private requireAi;
@@ -263,11 +266,15 @@ export declare class GHCrawlService {
263
266
  private chunkEmbeddingTasks;
264
267
  private loadStoredEmbeddings;
265
268
  private loadParsedStoredEmbeddings;
269
+ private loadClusterableThreadMeta;
266
270
  private listStoredClusterNeighbors;
267
271
  private getEmbeddingWorkset;
268
272
  private loadCombinedSummaryTextMap;
269
273
  private edgeKey;
270
274
  private aggregateRepositoryEdges;
275
+ private mergeSourceKindEdges;
276
+ private countEmbeddingsForSourceKind;
277
+ private resolveEdgeWorkerUrl;
271
278
  private persistClusterRun;
272
279
  private pruneOldClusterRuns;
273
280
  private upsertSummary;
@@ -1 +1 @@
1
- {"version":3,"file":"service.d.ts","sourceRoot":"","sources":["../src/service.ts"],"names":[],"mappings":"AAIA,OAAO,EAgBL,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,qBAAqB,EAC1B,KAAK,aAAa,EAClB,KAAK,qBAAqB,EAE1B,KAAK,gBAAgB,EACrB,KAAK,wBAAwB,EAC7B,KAAK,gBAAgB,EACrB,KAAK,cAAc,EACnB,KAAK,cAAc,EACnB,KAAK,iBAAiB,EACtB,KAAK,eAAe,EACpB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,KAAK,YAAY,EACjB,KAAK,UAAU,EACf,KAAK,cAAc,EACnB,KAAK,aAAa,EAClB,KAAK,SAAS,EACd,KAAK,eAAe,EACrB,MAAM,uBAAuB,CAAC;AAG/B,OAAO,EAOL,KAAK,iBAAiB,EACtB,KAAK,cAAc,EACpB,MAAM,aAAa,CAAC;AAErB,OAAO,EAAU,KAAK,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAE7D,OAAO,EAAoB,KAAK,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACzE,OAAO,EAAkB,KAAK,UAAU,EAAE,MAAM,sBAAsB,CAAC;AA8FvE,MAAM,MAAM,kBAAkB,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEnD,MAAM,MAAM,YAAY,GAAG;IACzB,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,0BAA0B,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1C,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,qBAAqB,EAAE,MAAM,CAAC;IAC9B,qBAAqB,EAAE,MAAM,CAAC;IAC9B,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,0BAA0B,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3C,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,OAAO,CAAC;IAClB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC;IACzB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,sBAAsB,EAAE,MAAM,GAAG,IAAI,CAAC;IACtC,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,kBAAkB,EAAE,OAAO,GAAG,cAAc,GAAG,IAAI,CAAC;IACpD,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,OAAO,GAAG,cAAc,CAAC;IAC/B,QAAQ,EAAE,OAAO,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,OAAO,CAAC;IAClB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC;IACzB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,sBAAsB,EAAE,MAAM,GAAG,IAAI,CAAC;IACtC,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,kBAAkB,EAAE,OAAO,GAAG,cAAc,GAAG,IAAI,CAAC;IACpD,OAAO,EAAE,gBAAgB,EAAE,CAAC;CAC7B,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,MAAM,EAAE,SAAS,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC,iBAAiB,GAAG,kBAAkB,GAAG,2BAA2B,GAAG,gBAAgB,EAAE,MAAM,CAAC,CAAC,CAAC;IAC5H,SAAS,EAAE,YAAY,CAAC,WAAW,CAAC,CAAC;CACtC,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG;IACxB,UAAU,EAAE,aAAa,CAAC;IAC1B,KAAK,EAAE,YAAY,CAAC;IACpB,QAAQ,EAAE,iBAAiB,EAAE,CAAC;CAC/B,CAAC;AAEF,MAAM,MAAM,YAAY,GAAG;IACzB,MAAM,EAAE,cAAc,CAAC;IACvB,MAAM,EAAE;QACN,UAAU,EAAE,OAAO,CAAC;QACpB,MAAM,EAAE,iBAAiB,CAAC;QAC1B,QAAQ,EAAE,OAAO,CAAC;QAClB,MAAM,EAAE,OAAO,CAAC;QAChB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;KACtB,CAAC;IACF,MAAM,EAAE;QACN,UAAU,EAAE,OAAO,CAAC;QACpB,MAAM,EAAE,iBAAiB,CAAC;QAC1B,QAAQ,EAAE,OAAO,CAAC;QAClB,MAAM,EAAE,OAAO,CAAC;QAChB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;KACtB,CAAC;CACH,CAAC;AAEF,KAAK,WAAW,GAAG;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;IACvC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF,KAAK,oBAAoB,GAAG,cAAc,CAAC;AAC3C,KAAK,uBAAuB,GAAG,iBAAiB,CAAC;AAwKjD,qBAAa,cAAc;IACzB,QAAQ,CAAC,MAAM,EAAE,cAAc,CAAC;IAChC,QAAQ,CAAC,EAAE,EAAE,cAAc,CAAC;IAC5B,QAAQ,CAAC,MAAM,CAAC,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,EAAE,CAAC,EAAE,UAAU,CAAC;IACzB,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAiD;gBAE1E,OAAO,GAAE;QACnB,MAAM,CAAC,EAAE,cAAc,CAAC;QACxB,EAAE,CAAC,EAAE,cAAc,CAAC;QACpB,MAAM,CAAC,EAAE,YAAY,CAAC;QACtB,EAAE,CAAC,EAAE,UAAU,CAAC;KACZ;IASN,KAAK,IAAI,IAAI;IAKb,IAAI,IAAI,cAAc;IAehB,MAAM,IAAI,OAAO,CAAC,YAAY,CAAC;IAmDrC,gBAAgB,IAAI,oBAAoB;IAKxC,WAAW,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,OAAO,GAAG,cAAc,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;QAAC,aAAa,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,eAAe;IAmDnJ,iBAAiB,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,aAAa,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,qBAAqB;IA4GzH,kBAAkB,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,GAAG,aAAa;IAmChG,mBAAmB,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,GAAG,aAAa;IAiCxF,cAAc,CAClB,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,aAAa,CAAC;IA2InB,mBAAmB,CAAC,MAAM,EAAE;QAChC,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,eAAe,CAAC,EAAE,OAAO,CAAC;QAC1B,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG,OAAO,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC;IAsFlH,aAAa,CAAC,MAAM,EAAE;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG;QAAE,cAAc,EAAE,MAAM,CAAC;QAAC,gBAAgB,EAAE,MAAM,CAAA;KAAE;IAoClD,eAAe,CAAC,MAAM,EAAE;QAC5B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG,OAAO,CAAC,cAAc,CAAC;IAoD3B,iBAAiB,CAAC,MAAM,EAAE;QACxB,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,CAAC,CAAC,EAAE,MAAM,CAAC;QACX,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG,gBAAgB;IAmDd,gBAAgB,CAAC,MAAM,EAAE;QAC7B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,CAAC,EAAE,UAAU,CAAC;QAClB,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAmHjC,aAAa,CAAC,MAAM,EAAE;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB,GAAG,uBAAuB;IAiD3B,YAAY,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,aAAa,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,gBAAgB;IAyE1F,iBAAiB,CAAC,MAAM,EAAE;QAC9B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,CAAC,EAAE,OAAO,CAAC;QACf,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG,OAAO,CAAC,eAAe,CAAC;IAkD5B,oBAAoB,CAAC,MAAM,EAAE;QAC3B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,IAAI,CAAC,EAAE,kBAAkB,CAAC;QAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,aAAa,CAAC,EAAE,OAAO,CAAC;KACzB,GAAG,wBAAwB;IA8B5B,oBAAoB,CAAC,MAAM,EAAE;QAC3B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,aAAa,CAAC,EAAE,OAAO,CAAC;KACzB,GAAG,qBAAqB;IAuDzB,cAAc,CAAC,MAAM,EAAE;QACrB,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,IAAI,CAAC,EAAE,kBAAkB,CAAC;QAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,qBAAqB,CAAC,EAAE,OAAO,CAAC;KACjC,GAAG,WAAW;IA0Bf,mBAAmB,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,GAAG,gBAAgB;IA+DjG,kBAAkB,CAAC,MAAM,EAAE;QACzB,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;KAC5B,GAAG,eAAe;IA0Eb,WAAW,CAAC,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,cAAc,CAAC;IAgClE,OAAO,CAAC,kBAAkB;IAmE1B,OAAO,CAAC,oBAAoB;IA4B5B,OAAO,CAAC,eAAe;IA8BvB,OAAO,CAAC,mBAAmB;IAQ3B,OAAO,CAAC,+BAA+B;IAkBvC,OAAO,CAAC,0BAA0B;IAoElC,OAAO,CAAC,kBAAkB;IA+D1B,OAAO,CAAC,wBAAwB;YASlB,mBAAmB;IA4DjC,OAAO,CAAC,SAAS;IAOjB,OAAO,CAAC,aAAa;IAOrB,OAAO,CAAC,iBAAiB;IASzB,OAAO,CAAC,gBAAgB;IAgBxB,OAAO,CAAC,YAAY;YAoEN,uBAAuB;YAmFvB,2BAA2B;IAuGzC,OAAO,CAAC,eAAe;IA0BvB,OAAO,CAAC,eAAe;IAoCvB,OAAO,CAAC,kBAAkB;IAmD1B,OAAO,CAAC,mBAAmB;IAkD3B,OAAO,CAAC,oBAAoB;IAoB5B,OAAO,CAAC,uBAAuB;IAI/B,OAAO,CAAC,uBAAuB;YAKjB,sBAAsB;YAgCtB,2BAA2B;IAiCzC,OAAO,CAAC,mBAAmB;IAmB3B,OAAO,CAAC,mBAAmB;IAwB3B,OAAO,CAAC,oBAAoB;IAc5B,OAAO,CAAC,0BAA0B;IAclC,OAAO,CAAC,0BAA0B;IAwDlC,OAAO,CAAC,mBAAmB;IA+C3B,OAAO,CAAC,0BAA0B;IAyClC,OAAO,CAAC,OAAO;IAMf,OAAO,CAAC,wBAAwB;IAoDhC,OAAO,CAAC,iBAAiB;IAsDzB,OAAO,CAAC,mBAAmB;IAI3B,OAAO,CAAC,aAAa;IAarB,OAAO,CAAC,eAAe;IA2BvB,OAAO,CAAC,QAAQ;IAOhB,OAAO,CAAC,SAAS;CAkBlB;AAED,wBAAgB,eAAe,CAAC,GAAG,EAAE,GAAG,GAAG;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAOzE"}
1
+ {"version":3,"file":"service.d.ts","sourceRoot":"","sources":["../src/service.ts"],"names":[],"mappings":"AAQA,OAAO,EAgBL,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,qBAAqB,EAC1B,KAAK,aAAa,EAClB,KAAK,qBAAqB,EAE1B,KAAK,gBAAgB,EACrB,KAAK,wBAAwB,EAC7B,KAAK,gBAAgB,EACrB,KAAK,cAAc,EACnB,KAAK,cAAc,EACnB,KAAK,iBAAiB,EACtB,KAAK,eAAe,EACpB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,KAAK,YAAY,EACjB,KAAK,UAAU,EACf,KAAK,cAAc,EACnB,KAAK,aAAa,EAClB,KAAK,SAAS,EACd,KAAK,eAAe,EACrB,MAAM,uBAAuB,CAAC;AAI/B,OAAO,EAOL,KAAK,iBAAiB,EACtB,KAAK,cAAc,EACpB,MAAM,aAAa,CAAC;AAErB,OAAO,EAAU,KAAK,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAE7D,OAAO,EAAoB,KAAK,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACzE,OAAO,EAAkB,KAAK,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAgGvE,MAAM,MAAM,kBAAkB,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEnD,MAAM,MAAM,YAAY,GAAG;IACzB,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,0BAA0B,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1C,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,qBAAqB,EAAE,MAAM,CAAC;IAC9B,qBAAqB,EAAE,MAAM,CAAC;IAC9B,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,0BAA0B,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3C,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,OAAO,CAAC;IAClB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC;IACzB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,sBAAsB,EAAE,MAAM,GAAG,IAAI,CAAC;IACtC,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,kBAAkB,EAAE,OAAO,GAAG,cAAc,GAAG,IAAI,CAAC;IACpD,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,OAAO,GAAG,cAAc,CAAC;IAC/B,QAAQ,EAAE,OAAO,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,OAAO,CAAC;IAClB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC;IACzB,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,sBAAsB,EAAE,MAAM,GAAG,IAAI,CAAC;IACtC,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,kBAAkB,EAAE,OAAO,GAAG,cAAc,GAAG,IAAI,CAAC;IACpD,OAAO,EAAE,gBAAgB,EAAE,CAAC;CAC7B,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,MAAM,EAAE,SAAS,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC,iBAAiB,GAAG,kBAAkB,GAAG,2BAA2B,GAAG,gBAAgB,EAAE,MAAM,CAAC,CAAC,CAAC;IAC5H,SAAS,EAAE,YAAY,CAAC,WAAW,CAAC,CAAC;CACtC,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG;IACxB,UAAU,EAAE,aAAa,CAAC;IAC1B,KAAK,EAAE,YAAY,CAAC;IACpB,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,QAAQ,EAAE,iBAAiB,EAAE,CAAC;CAC/B,CAAC;AAEF,MAAM,MAAM,YAAY,GAAG;IACzB,MAAM,EAAE,cAAc,CAAC;IACvB,MAAM,EAAE;QACN,UAAU,EAAE,OAAO,CAAC;QACpB,MAAM,EAAE,iBAAiB,CAAC;QAC1B,QAAQ,EAAE,OAAO,CAAC;QAClB,MAAM,EAAE,OAAO,CAAC;QAChB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;KACtB,CAAC;IACF,MAAM,EAAE;QACN,UAAU,EAAE,OAAO,CAAC;QACpB,MAAM,EAAE,iBAAiB,CAAC;QAC1B,QAAQ,EAAE,OAAO,CAAC;QAClB,MAAM,EAAE,OAAO,CAAC;QAChB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;KACtB,CAAC;CACH,CAAC;AAEF,KAAK,WAAW,GAAG;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;IACvC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF,KAAK,oBAAoB,GAAG,cAAc,CAAC;AAC3C,KAAK,uBAAuB,GAAG,iBAAiB,CAAC;AAyKjD,qBAAa,cAAc;IACzB,QAAQ,CAAC,MAAM,EAAE,cAAc,CAAC;IAChC,QAAQ,CAAC,EAAE,EAAE,cAAc,CAAC;IAC5B,QAAQ,CAAC,MAAM,CAAC,EAAE,YAAY,CAAC;IAC/B,QAAQ,CAAC,EAAE,CAAC,EAAE,UAAU,CAAC;IACzB,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAiD;gBAE1E,OAAO,GAAE;QACnB,MAAM,CAAC,EAAE,cAAc,CAAC;QACxB,EAAE,CAAC,EAAE,cAAc,CAAC;QACpB,MAAM,CAAC,EAAE,YAAY,CAAC;QACtB,EAAE,CAAC,EAAE,UAAU,CAAC;KACZ;IASN,KAAK,IAAI,IAAI;IAKb,IAAI,IAAI,cAAc;IAehB,MAAM,IAAI,OAAO,CAAC,YAAY,CAAC;IAmDrC,gBAAgB,IAAI,oBAAoB;IAKxC,WAAW,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,OAAO,GAAG,cAAc,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;QAAC,aAAa,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,eAAe;IAmDnJ,iBAAiB,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,aAAa,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,qBAAqB;IA4GzH,kBAAkB,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,GAAG,aAAa;IAmChG,mBAAmB,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,GAAG,aAAa;IAiCxF,cAAc,CAClB,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,aAAa,CAAC;IA2InB,mBAAmB,CAAC,MAAM,EAAE;QAChC,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,eAAe,CAAC,EAAE,OAAO,CAAC;QAC1B,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG,OAAO,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC;IAsFlH,aAAa,CAAC,MAAM,EAAE;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG;QAAE,cAAc,EAAE,MAAM,CAAC;QAAC,gBAAgB,EAAE,MAAM,CAAA;KAAE;IAoClD,eAAe,CAAC,MAAM,EAAE;QAC5B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG,OAAO,CAAC,cAAc,CAAC;IAoDrB,iBAAiB,CAAC,MAAM,EAAE;QAC9B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,CAAC,CAAC,EAAE,MAAM,CAAC;QACX,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG,OAAO,CAAC,gBAAgB,CAAC;IA0CvB,gBAAgB,CAAC,MAAM,EAAE;QAC7B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,CAAC,EAAE,UAAU,CAAC;QAClB,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAmHjC,aAAa,CAAC,MAAM,EAAE;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB,GAAG,uBAAuB;IAiD3B,YAAY,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,aAAa,CAAC,EAAE,OAAO,CAAA;KAAE,GAAG,gBAAgB;IAyE1F,iBAAiB,CAAC,MAAM,EAAE;QAC9B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,CAAC,EAAE,OAAO,CAAC;QACf,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KACxC,GAAG,OAAO,CAAC,eAAe,CAAC;IAkD5B,oBAAoB,CAAC,MAAM,EAAE;QAC3B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,IAAI,CAAC,EAAE,kBAAkB,CAAC;QAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,aAAa,CAAC,EAAE,OAAO,CAAC;KACzB,GAAG,wBAAwB;IA8B5B,oBAAoB,CAAC,MAAM,EAAE;QAC3B,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,aAAa,CAAC,EAAE,OAAO,CAAC;KACzB,GAAG,qBAAqB;IAwDzB,cAAc,CAAC,MAAM,EAAE;QACrB,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,IAAI,CAAC,EAAE,kBAAkB,CAAC;QAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,qBAAqB,CAAC,EAAE,OAAO,CAAC;KACjC,GAAG,WAAW;IA2Bf,mBAAmB,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,gBAAgB;IAiExH,kBAAkB,CAAC,MAAM,EAAE;QACzB,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;KAC5B,GAAG,eAAe;IA0Eb,WAAW,CAAC,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,cAAc,CAAC;IAgClE,OAAO,CAAC,kBAAkB;IAmE1B,OAAO,CAAC,oBAAoB;IA4B5B,OAAO,CAAC,eAAe;IA8BvB,OAAO,CAAC,mBAAmB;IAQ3B,OAAO,CAAC,+BAA+B;IAkBvC,OAAO,CAAC,0BAA0B;IAoElC,OAAO,CAAC,kBAAkB;IAiE1B,OAAO,CAAC,uBAAuB;IAuE/B,OAAO,CAAC,wBAAwB;YASlB,mBAAmB;IA4DjC,OAAO,CAAC,SAAS;IAOjB,OAAO,CAAC,aAAa;IAOrB,OAAO,CAAC,iBAAiB;IASzB,OAAO,CAAC,gBAAgB;IAgBxB,OAAO,CAAC,YAAY;YAoEN,uBAAuB;YAmFvB,2BAA2B;IAuGzC,OAAO,CAAC,eAAe;IA0BvB,OAAO,CAAC,eAAe;IAoCvB,OAAO,CAAC,kBAAkB;IAmD1B,OAAO,CAAC,mBAAmB;IAkD3B,OAAO,CAAC,oBAAoB;IAoB5B,OAAO,CAAC,uBAAuB;IAI/B,OAAO,CAAC,uBAAuB;YAKjB,sBAAsB;YAgCtB,2BAA2B;IAiCzC,OAAO,CAAC,mBAAmB;IAmB3B,OAAO,CAAC,mBAAmB;IAwB3B,OAAO,CAAC,oBAAoB;IAc5B,OAAO,CAAC,0BAA0B;IAoBlC,OAAO,CAAC,yBAAyB;IA4BjC,OAAO,CAAC,0BAA0B;IAwDlC,OAAO,CAAC,mBAAmB;IA+C3B,OAAO,CAAC,0BAA0B;IAyClC,OAAO,CAAC,OAAO;YAMD,wBAAwB;IA4GtC,OAAO,CAAC,oBAAoB;IAsB5B,OAAO,CAAC,4BAA4B;IAepC,OAAO,CAAC,oBAAoB;IAQ5B,OAAO,CAAC,iBAAiB;IAsDzB,OAAO,CAAC,mBAAmB;IAI3B,OAAO,CAAC,aAAa;IAarB,OAAO,CAAC,eAAe;IA2BvB,OAAO,CAAC,QAAQ;IAOhB,OAAO,CAAC,SAAS;CAkBlB;AAED,wBAAgB,eAAe,CAAC,GAAG,EAAE,GAAG,GAAG;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAOzE"}
package/dist/service.js CHANGED
@@ -1,19 +1,25 @@
1
1
  import http from 'node:http';
2
2
  import crypto from 'node:crypto';
3
+ import { existsSync } from 'node:fs';
4
+ import os from 'node:os';
5
+ import { fileURLToPath } from 'node:url';
6
+ import { Worker } from 'node:worker_threads';
3
7
  import { IterableMapper } from '@shutterstock/p-map-iterable';
4
8
  import { actionResponseSchema, authorThreadsResponseSchema, closeResponseSchema, clusterDetailResponseSchema, clusterResultSchema, clusterSummariesResponseSchema, clustersResponseSchema, embedResultSchema, healthResponseSchema, neighborsResponseSchema, refreshResponseSchema, repositoriesResponseSchema, searchResponseSchema, syncResultSchema, threadsResponseSchema, } from '@ghcrawl/api-contract';
5
9
  import { buildClusters } from './cluster/build.js';
10
+ import { buildSourceKindEdges } from './cluster/exact-edges.js';
6
11
  import { ensureRuntimeDirs, isLikelyGitHubToken, isLikelyOpenAiApiKey, loadConfig, requireGithubToken, requireOpenAiKey, } from './config.js';
7
12
  import { migrate } from './db/migrate.js';
8
13
  import { openDb } from './db/sqlite.js';
9
14
  import { buildCanonicalDocument, isBotLikeAuthor } from './documents/normalize.js';
10
15
  import { makeGitHubClient } from './github/client.js';
11
16
  import { OpenAiProvider } from './openai/provider.js';
12
- import { cosineSimilarity, rankNearestNeighbors } from './search/exact.js';
17
+ import { cosineSimilarity, normalizeEmbedding, rankNearestNeighbors } from './search/exact.js';
13
18
  const SYNC_BATCH_SIZE = 100;
14
19
  const SYNC_BATCH_DELAY_MS = 5000;
15
20
  const STALE_CLOSED_SWEEP_LIMIT = 1000;
16
21
  const CLUSTER_PROGRESS_INTERVAL_MS = 5000;
22
+ const CLUSTER_PARALLEL_MIN_EMBEDDINGS = 5000;
17
23
  const EMBED_ESTIMATED_CHARS_PER_TOKEN = 3;
18
24
  const EMBED_MAX_ITEM_TOKENS = 7000;
19
25
  const EMBED_MAX_BATCH_TOKENS = 250000;
@@ -697,24 +703,15 @@ export class GHCrawlService {
697
703
  throw error;
698
704
  }
699
705
  }
700
- clusterRepository(params) {
706
+ async clusterRepository(params) {
701
707
  const repository = this.requireRepository(params.owner, params.repo);
702
708
  const runId = this.startRun('cluster_runs', repository.id, repository.fullName);
703
709
  const minScore = params.minScore ?? 0.82;
704
710
  const k = params.k ?? 6;
705
711
  try {
706
- const rows = this.loadParsedStoredEmbeddings(repository.id);
707
- const threadMeta = new Map();
708
- for (const row of rows) {
709
- threadMeta.set(row.id, { number: row.number, title: row.title });
710
- }
711
- const items = Array.from(threadMeta.entries()).map(([id, meta]) => ({
712
- id,
713
- number: meta.number,
714
- title: meta.title,
715
- }));
716
- params.onProgress?.(`[cluster] loaded ${items.length} embedded thread(s) across ${new Set(rows.map((row) => row.source_kind)).size} source kind(s) for ${repository.fullName} k=${k} minScore=${minScore}`);
717
- const aggregatedEdges = this.aggregateRepositoryEdges(rows, {
712
+ const { items, sourceKinds } = this.loadClusterableThreadMeta(repository.id);
713
+ params.onProgress?.(`[cluster] loaded ${items.length} embedded thread(s) across ${sourceKinds.length} source kind(s) for ${repository.fullName} k=${k} minScore=${minScore}`);
714
+ const aggregatedEdges = await this.aggregateRepositoryEdges(repository.id, sourceKinds, {
718
715
  limit: k,
719
716
  minScore,
720
717
  onProgress: params.onProgress,
@@ -954,7 +951,7 @@ export class GHCrawlService {
954
951
  });
955
952
  }
956
953
  if (selected.cluster) {
957
- cluster = this.clusterRepository({
954
+ cluster = await this.clusterRepository({
958
955
  owner: params.owner,
959
956
  repo: params.repo,
960
957
  onProgress: params.onProgress,
@@ -1013,6 +1010,7 @@ export class GHCrawlService {
1013
1010
  owner: params.owner,
1014
1011
  repo: params.repo,
1015
1012
  clusterId: params.clusterId,
1013
+ clusterRunId: snapshot.clusterRunId ?? undefined,
1016
1014
  });
1017
1015
  const members = detail.members.slice(0, params.memberLimit ?? detail.members.length).map((member) => {
1018
1016
  const threadDetail = this.getTuiThreadDetail({
@@ -1055,7 +1053,7 @@ export class GHCrawlService {
1055
1053
  const stats = this.getTuiRepoStats(repository.id);
1056
1054
  const latestRun = this.getLatestClusterRun(repository.id);
1057
1055
  if (!latestRun) {
1058
- return { repository, stats, clusters: [] };
1056
+ return { repository, stats, clusterRunId: null, clusters: [] };
1059
1057
  }
1060
1058
  const includeClosedClusters = params.includeClosedClusters ?? true;
1061
1059
  const clusters = this.listRawTuiClusters(repository.id, latestRun.id)
@@ -1071,16 +1069,18 @@ export class GHCrawlService {
1071
1069
  return {
1072
1070
  repository,
1073
1071
  stats,
1072
+ clusterRunId: latestRun.id,
1074
1073
  clusters,
1075
1074
  };
1076
1075
  }
1077
1076
  getTuiClusterDetail(params) {
1078
1077
  const repository = this.requireRepository(params.owner, params.repo);
1079
- const latestRun = this.getLatestClusterRun(repository.id);
1080
- if (!latestRun) {
1078
+ const clusterRunId = params.clusterRunId ??
1079
+ (this.getLatestClusterRun(repository.id)?.id ?? null);
1080
+ if (!clusterRunId) {
1081
1081
  throw new Error(`No completed cluster run found for ${repository.fullName}. Run cluster first.`);
1082
1082
  }
1083
- const summary = this.listRawTuiClusters(repository.id, latestRun.id).find((cluster) => cluster.clusterId === params.clusterId);
1083
+ const summary = this.getRawTuiClusterSummary(repository.id, clusterRunId, params.clusterId);
1084
1084
  if (!summary) {
1085
1085
  throw new Error(`Cluster ${params.clusterId} was not found for ${repository.fullName}.`);
1086
1086
  }
@@ -1204,7 +1204,7 @@ export class GHCrawlService {
1204
1204
  });
1205
1205
  }
1206
1206
  case 'cluster': {
1207
- const result = this.clusterRepository(request);
1207
+ const result = await this.clusterRepository(request);
1208
1208
  return actionResponseSchema.parse({
1209
1209
  ok: true,
1210
1210
  action: request.action,
@@ -1400,6 +1400,8 @@ export class GHCrawlService {
1400
1400
  group by
1401
1401
  c.id,
1402
1402
  c.member_count,
1403
+ c.closed_at_local,
1404
+ c.close_reason_local,
1403
1405
  c.representative_thread_id,
1404
1406
  rt.number,
1405
1407
  rt.kind,
@@ -1421,6 +1423,56 @@ export class GHCrawlService {
1421
1423
  searchText: `${(row.representative_title ?? '').toLowerCase()} ${row.search_text ?? ''}`.trim(),
1422
1424
  }));
1423
1425
  }
1426
+ getRawTuiClusterSummary(repoId, clusterRunId, clusterId) {
1427
+ const row = this.db
1428
+ .prepare(`select
1429
+ c.id as cluster_id,
1430
+ c.member_count,
1431
+ c.closed_at_local,
1432
+ c.close_reason_local,
1433
+ c.representative_thread_id,
1434
+ rt.number as representative_number,
1435
+ rt.kind as representative_kind,
1436
+ rt.title as representative_title,
1437
+ max(coalesce(t.updated_at_gh, t.updated_at)) as latest_updated_at,
1438
+ sum(case when t.kind = 'issue' then 1 else 0 end) as issue_count,
1439
+ sum(case when t.kind = 'pull_request' then 1 else 0 end) as pull_request_count,
1440
+ sum(case when t.state != 'open' or t.closed_at_local is not null then 1 else 0 end) as closed_member_count,
1441
+ group_concat(lower(coalesce(t.title, '')), ' ') as search_text
1442
+ from clusters c
1443
+ left join threads rt on rt.id = c.representative_thread_id
1444
+ join cluster_members cm on cm.cluster_id = c.id
1445
+ join threads t on t.id = cm.thread_id
1446
+ where c.repo_id = ? and c.cluster_run_id = ? and c.id = ?
1447
+ group by
1448
+ c.id,
1449
+ c.member_count,
1450
+ c.closed_at_local,
1451
+ c.close_reason_local,
1452
+ c.representative_thread_id,
1453
+ rt.number,
1454
+ rt.kind,
1455
+ rt.title`)
1456
+ .get(repoId, clusterRunId, clusterId);
1457
+ if (!row) {
1458
+ return null;
1459
+ }
1460
+ return {
1461
+ clusterId: row.cluster_id,
1462
+ displayTitle: row.representative_title ?? `Cluster ${row.cluster_id}`,
1463
+ isClosed: row.close_reason_local !== null || row.closed_member_count >= row.member_count,
1464
+ closedAtLocal: row.closed_at_local,
1465
+ closeReasonLocal: row.close_reason_local,
1466
+ totalCount: row.member_count,
1467
+ issueCount: row.issue_count,
1468
+ pullRequestCount: row.pull_request_count,
1469
+ latestUpdatedAt: row.latest_updated_at,
1470
+ representativeThreadId: row.representative_thread_id,
1471
+ representativeNumber: row.representative_number,
1472
+ representativeKind: row.representative_kind,
1473
+ searchText: `${(row.representative_title ?? '').toLowerCase()} ${row.search_text ?? ''}`.trim(),
1474
+ };
1475
+ }
1424
1476
  compareTuiClusterSummary(left, right, sort) {
1425
1477
  const leftTime = left.latestUpdatedAt ? Date.parse(left.latestUpdatedAt) : 0;
1426
1478
  const rightTime = right.latestUpdatedAt ? Date.parse(right.latestUpdatedAt) : 0;
@@ -1895,13 +1947,39 @@ export class GHCrawlService {
1895
1947
  if (cached) {
1896
1948
  return cached;
1897
1949
  }
1898
- const parsed = this.loadStoredEmbeddings(repoId).map((row) => ({
1899
- ...row,
1900
- embedding: JSON.parse(row.embedding_json),
1901
- }));
1950
+ const parsed = this.loadStoredEmbeddings(repoId).map((row) => {
1951
+ const embedding = JSON.parse(row.embedding_json);
1952
+ const normalized = normalizeEmbedding(embedding);
1953
+ return {
1954
+ ...row,
1955
+ embedding,
1956
+ normalizedEmbedding: normalized.normalized,
1957
+ embeddingNorm: normalized.norm,
1958
+ };
1959
+ });
1902
1960
  this.parsedEmbeddingCache.set(repoId, parsed);
1903
1961
  return parsed;
1904
1962
  }
1963
+ loadClusterableThreadMeta(repoId) {
1964
+ const rows = this.db
1965
+ .prepare(`select t.id, t.number, t.title, e.source_kind
1966
+ from threads t
1967
+ join document_embeddings e on e.thread_id = t.id
1968
+ where t.repo_id = ?
1969
+ and t.state = 'open'
1970
+ and t.closed_at_local is null`)
1971
+ .all(repoId);
1972
+ const itemsById = new Map();
1973
+ const sourceKinds = new Set();
1974
+ for (const row of rows) {
1975
+ itemsById.set(row.id, { id: row.id, number: row.number, title: row.title });
1976
+ sourceKinds.add(row.source_kind);
1977
+ }
1978
+ return {
1979
+ items: Array.from(itemsById.values()),
1980
+ sourceKinds: Array.from(sourceKinds.values()),
1981
+ };
1982
+ }
1905
1983
  listStoredClusterNeighbors(repoId, threadId, limit) {
1906
1984
  const latestRun = this.getLatestClusterRun(repoId);
1907
1985
  if (!latestRun) {
@@ -2017,50 +2095,119 @@ export class GHCrawlService {
2017
2095
  const right = Math.max(leftThreadId, rightThreadId);
2018
2096
  return `${left}:${right}`;
2019
2097
  }
2020
- aggregateRepositoryEdges(rows, params) {
2021
- const bySource = new Map();
2022
- for (const row of rows) {
2023
- const list = bySource.get(row.source_kind) ?? [];
2024
- list.push({ id: row.id, embedding: row.embedding });
2025
- bySource.set(row.source_kind, list);
2026
- }
2098
+ async aggregateRepositoryEdges(repoId, sourceKinds, params) {
2027
2099
  const aggregated = new Map();
2028
- const totalItems = Array.from(bySource.values()).reduce((sum, items) => sum + items.length, 0);
2029
- let processedItems = 0;
2030
- let lastProgressAt = Date.now();
2031
- for (const [sourceKind, items] of bySource.entries()) {
2032
- for (const item of items) {
2033
- const neighbors = rankNearestNeighbors(items, {
2034
- targetEmbedding: item.embedding,
2100
+ const totalItems = sourceKinds.reduce((sum, sourceKind) => sum + this.countEmbeddingsForSourceKind(repoId, sourceKind), 0);
2101
+ if (sourceKinds.length === 0 || totalItems === 0) {
2102
+ return aggregated;
2103
+ }
2104
+ const shouldParallelize = sourceKinds.length > 1 && totalItems >= CLUSTER_PARALLEL_MIN_EMBEDDINGS && os.availableParallelism() > 1;
2105
+ if (!shouldParallelize) {
2106
+ const rows = this.loadParsedStoredEmbeddings(repoId);
2107
+ const bySource = new Map();
2108
+ for (const row of rows) {
2109
+ const list = bySource.get(row.source_kind) ?? [];
2110
+ list.push({ id: row.id, normalizedEmbedding: row.normalizedEmbedding });
2111
+ bySource.set(row.source_kind, list);
2112
+ }
2113
+ let processedItems = 0;
2114
+ for (const sourceKind of sourceKinds) {
2115
+ const items = bySource.get(sourceKind) ?? [];
2116
+ const edges = buildSourceKindEdges(items, {
2035
2117
  limit: params.limit,
2036
2118
  minScore: params.minScore,
2037
- skipId: item.id,
2119
+ progressIntervalMs: CLUSTER_PROGRESS_INTERVAL_MS,
2120
+ onProgress: (progress) => {
2121
+ if (!params.onProgress)
2122
+ return;
2123
+ params.onProgress(`[cluster] identifying similarity edges ${processedItems + progress.processedItems}/${totalItems} source embeddings processed current_edges~=${aggregated.size + progress.currentEdgeEstimate}`);
2124
+ },
2038
2125
  });
2039
- for (const neighbor of neighbors) {
2040
- const key = this.edgeKey(item.id, neighbor.item.id);
2041
- const existing = aggregated.get(key);
2042
- if (existing) {
2043
- existing.score = Math.max(existing.score, neighbor.score);
2044
- existing.sourceKinds.add(sourceKind);
2045
- continue;
2046
- }
2047
- aggregated.set(key, {
2048
- leftThreadId: Math.min(item.id, neighbor.item.id),
2049
- rightThreadId: Math.max(item.id, neighbor.item.id),
2050
- score: neighbor.score,
2051
- sourceKinds: new Set([sourceKind]),
2126
+ processedItems += items.length;
2127
+ this.mergeSourceKindEdges(aggregated, edges, sourceKind);
2128
+ }
2129
+ return aggregated;
2130
+ }
2131
+ const workerUrl = this.resolveEdgeWorkerUrl();
2132
+ const progressBySource = new Map();
2133
+ const edgeSets = await Promise.all(sourceKinds.map((sourceKind) => new Promise((resolve, reject) => {
2134
+ const worker = new Worker(workerUrl, {
2135
+ workerData: {
2136
+ dbPath: this.config.dbPath,
2137
+ repoId,
2138
+ sourceKind,
2139
+ limit: params.limit,
2140
+ minScore: params.minScore,
2141
+ },
2142
+ });
2143
+ worker.on('message', (message) => {
2144
+ if (!message || typeof message !== 'object') {
2145
+ return;
2146
+ }
2147
+ const typed = message;
2148
+ if (typed.type === 'progress') {
2149
+ progressBySource.set(typed.sourceKind, {
2150
+ processedItems: typed.processedItems,
2151
+ totalItems: typed.totalItems,
2152
+ currentEdgeEstimate: typed.currentEdgeEstimate,
2052
2153
  });
2154
+ if (params.onProgress) {
2155
+ const processedItems = Array.from(progressBySource.values()).reduce((sum, value) => sum + value.processedItems, 0);
2156
+ const currentEdgeEstimate = Array.from(progressBySource.values()).reduce((sum, value) => sum + value.currentEdgeEstimate, 0);
2157
+ params.onProgress(`[cluster] identifying similarity edges ${processedItems}/${totalItems} source embeddings processed current_edges~=${aggregated.size + currentEdgeEstimate}`);
2158
+ }
2159
+ return;
2053
2160
  }
2054
- processedItems += 1;
2055
- const now = Date.now();
2056
- if (params.onProgress && now - lastProgressAt >= CLUSTER_PROGRESS_INTERVAL_MS) {
2057
- params.onProgress(`[cluster] identifying similarity edges ${processedItems}/${totalItems} source embeddings processed current_edges=${aggregated.size}`);
2058
- lastProgressAt = now;
2161
+ resolve(typed.edges);
2162
+ });
2163
+ worker.on('error', reject);
2164
+ worker.on('exit', (code) => {
2165
+ if (code !== 0) {
2166
+ reject(new Error(`edge worker for ${sourceKind} exited with code ${code}`));
2059
2167
  }
2060
- }
2168
+ });
2169
+ })));
2170
+ for (const [index, edges] of edgeSets.entries()) {
2171
+ this.mergeSourceKindEdges(aggregated, edges, sourceKinds[index]);
2061
2172
  }
2062
2173
  return aggregated;
2063
2174
  }
2175
+ mergeSourceKindEdges(aggregated, edges, sourceKind) {
2176
+ for (const edge of edges) {
2177
+ const key = this.edgeKey(edge.leftThreadId, edge.rightThreadId);
2178
+ const existing = aggregated.get(key);
2179
+ if (existing) {
2180
+ existing.score = Math.max(existing.score, edge.score);
2181
+ existing.sourceKinds.add(sourceKind);
2182
+ continue;
2183
+ }
2184
+ aggregated.set(key, {
2185
+ leftThreadId: edge.leftThreadId,
2186
+ rightThreadId: edge.rightThreadId,
2187
+ score: edge.score,
2188
+ sourceKinds: new Set([sourceKind]),
2189
+ });
2190
+ }
2191
+ }
2192
+ countEmbeddingsForSourceKind(repoId, sourceKind) {
2193
+ const row = this.db
2194
+ .prepare(`select count(*) as count
2195
+ from document_embeddings e
2196
+ join threads t on t.id = e.thread_id
2197
+ where t.repo_id = ?
2198
+ and t.state = 'open'
2199
+ and t.closed_at_local is null
2200
+ and e.source_kind = ?`)
2201
+ .get(repoId, sourceKind);
2202
+ return row.count;
2203
+ }
2204
+ resolveEdgeWorkerUrl() {
2205
+ const jsUrl = new URL('./cluster/edge-worker.js', import.meta.url);
2206
+ if (existsSync(fileURLToPath(jsUrl))) {
2207
+ return jsUrl;
2208
+ }
2209
+ return new URL('./cluster/edge-worker.ts', import.meta.url);
2210
+ }
2064
2211
  persistClusterRun(repoId, runId, aggregatedEdges, clusters) {
2065
2212
  const insertEdge = this.db.prepare(`insert into similarity_edges (repo_id, cluster_run_id, left_thread_id, right_thread_id, method, score, explanation_json, created_at)
2066
2213
  values (?, ?, ?, ?, ?, ?, ?, ?)`);