@neuralsea/workspace-indexer 0.1.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,3 +1,118 @@
1
+ type SymbolRange = {
2
+ startLine: number;
3
+ startCharacter: number;
4
+ endLine: number;
5
+ endCharacter: number;
6
+ };
7
+ type SymbolKind = "file" | "module" | "namespace" | "package" | "class" | "interface" | "enum" | "type" | "function" | "method" | "property" | "field" | "variable" | "constant" | "constructor" | "parameter" | "unknown";
8
+ type SymbolEdgeKind = "contains" | "definition" | "reference" | "implementation" | "typeDefinition";
9
+ type GraphSymbol = {
10
+ /**
11
+ * Stable symbol identifier.
12
+ * If you don't have one, you may return an empty string and the consumer can derive one.
13
+ */
14
+ id: string;
15
+ repoRoot: string;
16
+ path: string;
17
+ language: string;
18
+ name: string;
19
+ kind: SymbolKind;
20
+ range: SymbolRange;
21
+ containerName?: string;
22
+ detail?: string;
23
+ };
24
+ type GraphEdge = {
25
+ repoRoot: string;
26
+ fromId: string;
27
+ toId: string;
28
+ kind: SymbolEdgeKind;
29
+ /** Optional repo-relative path hints for persistence/querying. */
30
+ fromPath?: string;
31
+ toPath?: string;
32
+ };
33
+ type SymbolGraphIndexInput = {
34
+ repoRoot: string;
35
+ path: string;
36
+ language: string;
37
+ text: string;
38
+ /** Optional caller-provided content hash for caching. */
39
+ contentHash?: string;
40
+ };
41
+ type SymbolGraphIndexOutput = {
42
+ symbols: GraphSymbol[];
43
+ edges: GraphEdge[];
44
+ };
45
+ /**
46
+ * Optional provider for "full fidelity" symbol graphs.
47
+ *
48
+ * In a VS Code extension, an implementation can call VS Code providers (often LSP-backed)
49
+ * and return a normalized set of symbols + edges.
50
+ */
51
+ interface SymbolGraphProvider {
52
+ id: string;
53
+ supports(language: string): boolean;
54
+ indexDocument(input: SymbolGraphIndexInput): Promise<SymbolGraphIndexOutput>;
55
+ /**
56
+ * Optional staged/on-demand expansion: compute cross-file edges for a document.
57
+ * Implementations should be idempotent and safe to call repeatedly.
58
+ */
59
+ expandDocumentEdges?(input: SymbolGraphIndexInput, opts?: {
60
+ signal?: AbortSignal;
61
+ }): Promise<GraphEdge[]>;
62
+ }
63
+
64
+ type AnnMetric = "cosine" | "ip" | "l2";
65
+ type AnnPoint = {
66
+ id: string;
67
+ vector: Float32Array;
68
+ };
69
+ type AnnSearchHit = {
70
+ id: string;
71
+ score: number;
72
+ };
73
+ type AnnIndexInit = {
74
+ repoId: string;
75
+ repoRoot: string;
76
+ commit: string;
77
+ branch: string;
78
+ cacheDir: string;
79
+ dimension: number;
80
+ metric: AnnMetric;
81
+ /**
82
+ * Identifier for the embedding model/provider and settings that produced vectors.
83
+ * Changing this should force rebuild/retrain.
84
+ */
85
+ embedderId: string;
86
+ };
87
+ /**
88
+ * Interface for an optional quantized ANN layer (e.g. FAISS IVF+PQ/OPQ).
89
+ *
90
+ * This is intentionally separate from VectorIndex:
91
+ * - VectorIndex can remain exact/float32 (or remote DB),
92
+ * - ANN can be an additional candidate generator for scale.
93
+ */
94
+ interface AnnIndex {
95
+ readonly id: string;
96
+ readonly kind: string;
97
+ init(init: AnnIndexInit): Promise<void>;
98
+ upsert(points: AnnPoint[]): Promise<void>;
99
+ remove(ids: string[]): Promise<void>;
100
+ search(query: Float32Array, k: number): Promise<AnnSearchHit[]>;
101
+ flush(): Promise<void>;
102
+ close(): Promise<void>;
103
+ }
104
+ type AnnConfig = {
105
+ provider?: "noop";
106
+ } | {
107
+ provider: "faiss-pq";
108
+ faiss?: {
109
+ nlist?: number;
110
+ m?: number;
111
+ nbits?: number;
112
+ opq?: boolean;
113
+ };
114
+ };
115
+
1
116
  type RepoId = string;
2
117
  type RetrievalProfileName = "search" | "refactor" | "review" | "architecture" | "rca" | "custom";
3
118
  interface RetrievalWeights {
@@ -100,10 +215,168 @@ interface VectorConfig {
100
215
  faiss?: FaissVectorConfig;
101
216
  custom?: CustomVectorConfig;
102
217
  }
218
+ type IndexerProgressEvent = {
219
+ type: "workspace/open";
220
+ workspaceRoot: string;
221
+ repoRoots: string[];
222
+ } | {
223
+ type: "workspace/index/start";
224
+ workspaceRoot: string;
225
+ repoCount: number;
226
+ } | {
227
+ type: "workspace/index/done";
228
+ workspaceRoot: string;
229
+ repoCount: number;
230
+ ms: number;
231
+ } | {
232
+ type: "workspace/watch/start";
233
+ workspaceRoot: string;
234
+ repoCount: number;
235
+ } | {
236
+ type: "workspace/watch/done";
237
+ workspaceRoot: string;
238
+ repoCount: number;
239
+ ms: number;
240
+ } | {
241
+ type: "workspace/retrieve/start";
242
+ workspaceRoot: string;
243
+ profile: string;
244
+ query: string;
245
+ } | {
246
+ type: "workspace/retrieve/done";
247
+ workspaceRoot: string;
248
+ profile: string;
249
+ ms: number;
250
+ hits: number;
251
+ candidates: {
252
+ vector: number;
253
+ lexical: number;
254
+ merged: number;
255
+ };
256
+ } | {
257
+ type: "repo/open";
258
+ repoRoot: string;
259
+ repoId: string;
260
+ commit: string;
261
+ branch: string;
262
+ } | {
263
+ type: "repo/index/start";
264
+ repoRoot: string;
265
+ fileCount: number;
266
+ } | {
267
+ type: "repo/index/file/start";
268
+ repoRoot: string;
269
+ path: string;
270
+ } | {
271
+ type: "repo/index/file/skip";
272
+ repoRoot: string;
273
+ path: string;
274
+ reason: string;
275
+ } | {
276
+ type: "repo/index/file/done";
277
+ repoRoot: string;
278
+ path: string;
279
+ chunkCount: number;
280
+ ms: number;
281
+ } | {
282
+ type: "repo/index/embed/batch";
283
+ repoRoot: string;
284
+ batchSize: number;
285
+ batchIndex: number;
286
+ batchesTotal: number;
287
+ } | {
288
+ type: "repo/index/vector/upsert";
289
+ repoRoot: string;
290
+ points: number;
291
+ } | {
292
+ type: "repo/index/vector/remove";
293
+ repoRoot: string;
294
+ ids: number;
295
+ } | {
296
+ type: "repo/vector/rebuild/start";
297
+ repoRoot: string;
298
+ kind: string;
299
+ points: number;
300
+ } | {
301
+ type: "repo/vector/rebuild/done";
302
+ repoRoot: string;
303
+ kind: string;
304
+ ms: number;
305
+ } | {
306
+ type: "repo/vector/flush";
307
+ repoRoot: string;
308
+ kind: string;
309
+ } | {
310
+ type: "repo/watch/start";
311
+ repoRoot: string;
312
+ } | {
313
+ type: "repo/watch/event";
314
+ repoRoot: string;
315
+ event: "add" | "change" | "unlink" | "head";
316
+ path: string;
317
+ } | {
318
+ type: "repo/index/done";
319
+ repoRoot: string;
320
+ fileCount: number;
321
+ ms: number;
322
+ } | {
323
+ type: "error";
324
+ scope: "workspace" | "repo";
325
+ message: string;
326
+ repoRoot?: string;
327
+ stack?: string;
328
+ };
329
+ type IndexerProgressHandler = (event: IndexerProgressEvent) => void;
330
+ interface IndexerProgressSink {
331
+ emit(event: IndexerProgressEvent): void;
332
+ }
333
+ type IndexerProgress = IndexerProgressHandler | IndexerProgressSink;
103
334
  interface IndexerConfig extends ProfilesConfig {
104
335
  cacheDir?: string;
105
336
  /** Vector index backend (ANN) configuration. */
106
337
  vector?: VectorConfig;
338
+ /** Optional quantized ANN candidate generator (interface-driven; noop by default). */
339
+ ann?: AnnConfig;
340
+ /** Workspace-only settings (used by WorkspaceIndexer). */
341
+ workspace?: {
342
+ /**
343
+ * Repo discovery knobs.
344
+ * - include/exclude are workspace-root-relative globs (POSIX-style)
345
+ * - maxDepth limits directory traversal depth
346
+ */
347
+ discovery?: {
348
+ include?: string[];
349
+ exclude?: string[];
350
+ maxDepth?: number;
351
+ /** If true (default), do not descend into discovered repo roots. */
352
+ stopAtRepoRoot?: boolean;
353
+ /** If true (default), add submodule repos discovered via `.gitmodules`. */
354
+ includeSubmodules?: boolean;
355
+ };
356
+ /**
357
+ * Per-repo config overrides, matched by repo-root-relative glob.
358
+ * Later matches win.
359
+ */
360
+ repoOverrides?: Array<{
361
+ match: string;
362
+ config: IndexerConfig;
363
+ }>;
364
+ /** Optional persistent graph store for cross-file/cross-language relationships. */
365
+ graph?: {
366
+ provider: "neo4j";
367
+ neo4j: {
368
+ uri: string;
369
+ user: string;
370
+ password: string;
371
+ database?: string;
372
+ /**
373
+ * Optional label prefix to isolate multiple workspaces in one Neo4j instance.
374
+ * Default: "Petri".
375
+ */
376
+ labelPrefix?: string;
377
+ };
378
+ };
379
+ };
107
380
  /** Extensions to index (lowercase, include the dot). */
108
381
  includeExtensions?: string[];
109
382
  /** Safety limit to avoid indexing huge binaries by accident. */
@@ -123,6 +396,10 @@ interface IndexerConfig extends ProfilesConfig {
123
396
  watch?: {
124
397
  debounceMs?: number;
125
398
  };
399
+ /** Optional progress sink (callback or observable). */
400
+ progress?: IndexerProgress;
401
+ /** Optional symbol graph provider (e.g. VS Code/LSP-backed) for high-fidelity cross-file symbol data. */
402
+ symbolGraphProvider?: SymbolGraphProvider;
126
403
  /** Extra ignore files (in addition to git’s excludes). */
127
404
  ignoreFiles?: string[];
128
405
  /**
@@ -345,6 +622,478 @@ interface VectorIndex {
345
622
  */
346
623
  declare function createVectorIndex(vector?: VectorConfig): Promise<VectorIndex>;
347
624
 
625
+ interface ChunkingConfig {
626
+ maxChars: number;
627
+ maxLines: number;
628
+ overlapLines: number;
629
+ }
630
+ interface Chunk {
631
+ startLine: number;
632
+ endLine: number;
633
+ text: string;
634
+ contentHash: string;
635
+ tokens: number;
636
+ }
637
+ declare function languageFromPath(posixPath: string): string;
638
+ declare function chunkSource(posixPath: string, sourceText: string, cfg: ChunkingConfig): {
639
+ language: string;
640
+ chunks: Chunk[];
641
+ };
642
+
643
+ type Unsubscribe = () => void;
644
+ declare class IndexerProgressObservable implements IndexerProgressSink {
645
+ private handlers;
646
+ subscribe(handler: (e: IndexerProgressEvent) => void): Unsubscribe;
647
+ on<TType extends IndexerProgressEvent["type"]>(type: TType, handler: (e: Extract<IndexerProgressEvent, {
648
+ type: TType;
649
+ }>) => void): Unsubscribe;
650
+ emit(event: IndexerProgressEvent): void;
651
+ }
652
+ declare function asProgressSink(progress?: IndexerProgress): IndexerProgressSink | null;
653
+
654
+ type RepoDiscoveryOptions = {
655
+ include?: string[];
656
+ exclude?: string[];
657
+ maxDepth?: number;
658
+ stopAtRepoRoot?: boolean;
659
+ includeSubmodules?: boolean;
660
+ };
661
+ declare function discoverGitRepos(workspaceRoot: string, opts?: RepoDiscoveryOptions): string[];
662
+ declare function pickRepoOverride(base: any, repoRoot: string, workspaceRoot: string, overrides?: Array<{
663
+ match: string;
664
+ config: any;
665
+ }>): any;
666
+ declare function mergeIndexerConfig(target: any, patch: any): void;
667
+
668
+ type WorkspaceChunkRow = {
669
+ id: string;
670
+ repo_id: string;
671
+ repo_root: string;
672
+ path: string;
673
+ language: string;
674
+ kind: string;
675
+ start_line: number;
676
+ end_line: number;
677
+ content_hash: string;
678
+ tokens: number;
679
+ file_mtime: number;
680
+ text: string;
681
+ embedding: Buffer;
682
+ };
683
+
684
+ type WorkspaceStoreOptions = {
685
+ /** "auto" (default) tries FTS5; "off" disables it. */
686
+ fts?: "auto" | "off";
687
+ };
688
+
689
+ /**
690
+ * Workspace-level unified store (SQLite) for multi-repo indexing.
691
+ *
692
+ * Design goals:
693
+ * - Facade API for callers (stable)
694
+ * - Repository pattern for separation of concerns
695
+ * - Unit-of-work for transactional updates
696
+ * - Strategy for optional features (FTS)
697
+ * - Schema versioning + migrations
698
+ */
699
+ declare class WorkspaceStore {
700
+ private readonly dbPath;
701
+ private readonly db;
702
+ private readonly uow;
703
+ private readonly meta;
704
+ private readonly repoHeads;
705
+ private readonly files;
706
+ private readonly edges;
707
+ private readonly repoLinks;
708
+ private readonly symbols;
709
+ private readonly chunks;
710
+ private readonly opts;
711
+ constructor(dbPath: string, opts?: WorkspaceStoreOptions);
712
+ setMeta(k: string, v: string): void;
713
+ getMeta(k: string): string | null;
714
+ /**
715
+ * Set repo head. If commit changes, clears prior rows for that repo_id to keep the workspace index "current".
716
+ */
717
+ setRepoHead(repoId: string, repoRoot: string, commit: string, branch: string): void;
718
+ deleteFile(repoId: string, posixPath: string): void;
719
+ upsertFile(repoId: string, posixPath: string, hash: string, mtime: number, language: string, size: number): void;
720
+ replaceChunksForFile(repoId: string, repoRoot: string, posixPath: string, rows: Array<{
721
+ id: string;
722
+ language: string;
723
+ kind: "chunk" | "synopsis";
724
+ startLine: number;
725
+ endLine: number;
726
+ contentHash: string;
727
+ text: string;
728
+ ftsText: string;
729
+ tokens: number;
730
+ fileMtime: number;
731
+ embedding: Float32Array;
732
+ }>): void;
733
+ setEdges(repoId: string, fromPath: string, kind: string, values: string[]): void;
734
+ replaceSymbolsForFile(repoId: string, repoRoot: string, posixPath: string, rows: Array<{
735
+ id: string;
736
+ language: string;
737
+ name: string;
738
+ kind: string;
739
+ startLine: number;
740
+ startCharacter: number;
741
+ endLine: number;
742
+ endCharacter: number;
743
+ containerName?: string;
744
+ detail?: string;
745
+ }>, edges: Array<{
746
+ fromId: string;
747
+ toId: string;
748
+ kind: string;
749
+ fromPath: string;
750
+ toPath: string;
751
+ }>): void;
752
+ replaceSymbolEdgesFromFile(repoId: string, fromPath: string, edges: Array<{
753
+ fromId: string;
754
+ toId: string;
755
+ kind: string;
756
+ toPath: string;
757
+ }>): void;
758
+ replaceRepoLinks(fromRepoId: string, links: Array<{
759
+ toRepoId: string;
760
+ kind: string;
761
+ detail?: string;
762
+ }>): void;
763
+ listRepoLinks(fromRepoId: string): Array<{
764
+ toRepoId: string;
765
+ kind: string;
766
+ detail: string;
767
+ }>;
768
+ listChunksForFile(repoId: string, posixPath: string, kind?: "chunk" | "synopsis"): Array<{
769
+ id: string;
770
+ start_line: number;
771
+ end_line: number;
772
+ kind: string;
773
+ }>;
774
+ listEdges(repoId: string, fromPath: string, kind: string): string[];
775
+ getChunkById(id: string): WorkspaceChunkRow | null;
776
+ searchFts(ftq: string, limit: number, repoIds?: string[]): Array<{
777
+ id: string;
778
+ bm25: number;
779
+ }>;
780
+ /**
781
+ * If text is omitted from storage, fall back to slicing from disk.
782
+ * The chunk boundaries are approximate; the stored row includes start/end line.
783
+ */
784
+ getChunkTextFallback(row: WorkspaceChunkRow): string;
785
+ close(): void;
786
+ }
787
+
788
+ declare function stableSymbolId(input: {
789
+ repoRoot: string;
790
+ path: string;
791
+ language: string;
792
+ kind: SymbolKind;
793
+ name: string;
794
+ range: SymbolRange;
795
+ }): string;
796
+
797
+ type LspPosition = {
798
+ line: number;
799
+ character: number;
800
+ };
801
+ type LspRange = {
802
+ start: LspPosition;
803
+ end: LspPosition;
804
+ };
805
+ type LspDocument = {
806
+ uri: string;
807
+ fsPath: string;
808
+ languageId: string;
809
+ version?: number;
810
+ };
811
+ type LspDocumentSymbol = {
812
+ name: string;
813
+ kind: SymbolKind;
814
+ range: LspRange;
815
+ selectionRange?: LspRange;
816
+ detail?: string;
817
+ containerName?: string;
818
+ children?: LspDocumentSymbol[];
819
+ };
820
+ type LspTargetLocation = {
821
+ uri: string;
822
+ fsPath: string;
823
+ range: LspRange;
824
+ };
825
+ interface LspFacade {
826
+ openTextDocument(absPath: string): Promise<LspDocument>;
827
+ documentSymbols(doc: LspDocument): Promise<LspDocumentSymbol[]>;
828
+ resolveEdges(doc: LspDocument, pos: LspPosition, kind: Exclude<SymbolEdgeKind, "contains">): Promise<LspTargetLocation[]>;
829
+ }
830
+
831
+ type SymbolGraphIndexingMode = "symbols-only" | "staged" | "full";
832
+ type SymbolGraphStrategyOptions = {
833
+ mode?: SymbolGraphIndexingMode;
834
+ maxEdgesPerDocument?: number;
835
+ };
836
+ type Cancellation = {
837
+ signal?: AbortSignal;
838
+ };
839
+ declare class SymbolGraphIndexer {
840
+ private readonly lsp;
841
+ private readonly opts;
842
+ private cache;
843
+ constructor(lsp: LspFacade, opts?: SymbolGraphStrategyOptions);
844
+ indexDocument(input: SymbolGraphIndexInput, cancel?: Cancellation): Promise<SymbolGraphIndexOutput>;
845
+ /**
846
+ * Compute cross-file edges for a document (definition/reference/implementation/typeDefinition).
847
+ * Intended for staged indexing and on-demand expansion during retrieval.
848
+ */
849
+ expandDocumentEdges(input: SymbolGraphIndexInput, cancel?: Cancellation): Promise<GraphEdge[]>;
850
+ private bestEffortTargetSymbolId;
851
+ }
852
+
853
+ /**
854
+ * Create a SymbolGraphProvider that runs in a VS Code extension host.
855
+ *
856
+ * Strategy pattern:
857
+ * - `mode: "symbols-only"`: only document symbols + contains edges
858
+ * - `mode: "staged"` (default): symbols+contains during indexing; compute cross-file edges via `expandDocumentEdges` on demand
859
+ * - `mode: "full"`: compute edges during indexing (can be expensive)
860
+ */
861
+ declare function createVSCodeSymbolGraphProvider(opts?: {
862
+ languages?: string[];
863
+ mode?: SymbolGraphIndexingMode;
864
+ maxEdgesPerDocument?: number;
865
+ }): Promise<SymbolGraphProvider | null>;
866
+
867
+ type GraphFileUpdate = {
868
+ repoId: string;
869
+ repoRoot: string;
870
+ commit: string;
871
+ branch: string;
872
+ path: string;
873
+ language: string;
874
+ imports: string[];
875
+ exports: string[];
876
+ symbols: Array<Pick<GraphSymbol, "id" | "name" | "kind" | "range" | "containerName" | "detail" | "language">>;
877
+ symbolEdges: Array<Pick<GraphEdge, "fromId" | "toId" | "kind">>;
878
+ };
879
+ interface GraphStore {
880
+ id: string;
881
+ setRepoHead(args: {
882
+ repoId: string;
883
+ repoRoot: string;
884
+ commit: string;
885
+ branch: string;
886
+ }): Promise<void>;
887
+ deleteFile(args: {
888
+ repoId: string;
889
+ path: string;
890
+ }): Promise<void>;
891
+ replaceFileGraph(update: GraphFileUpdate): Promise<void>;
892
+ /**
893
+ * Optional staged/on-demand update: replace outgoing symbol edges originating from a file's symbols.
894
+ * Implementations should preserve incoming edges.
895
+ */
896
+ replaceOutgoingSymbolEdgesFromFile?(args: {
897
+ repoId: string;
898
+ fromPath: string;
899
+ edges: Array<{
900
+ fromId: string;
901
+ toId: string;
902
+ kind: string;
903
+ toPath?: string;
904
+ }>;
905
+ }): Promise<void>;
906
+ /**
907
+ * Optional workspace-level linking (cross-repo relationships).
908
+ */
909
+ replaceRepoLinks?(args: {
910
+ links: Array<{
911
+ fromRepoId: string;
912
+ toRepoId: string;
913
+ kind: string;
914
+ detail?: string;
915
+ }>;
916
+ }): Promise<void>;
917
+ /**
918
+ * Optional graph-assisted retrieval expansion.
919
+ */
920
+ neighborFiles?(args: {
921
+ seeds: Array<{
922
+ repoId: string;
923
+ path: string;
924
+ }>;
925
+ maxPerSeed?: number;
926
+ limit?: number;
927
+ kinds?: string[];
928
+ }): Promise<Array<{
929
+ repoId: string;
930
+ path: string;
931
+ weight: number;
932
+ }>>;
933
+ shortestFilePath?(args: {
934
+ from: {
935
+ repoId: string;
936
+ path: string;
937
+ };
938
+ to: {
939
+ repoId: string;
940
+ path: string;
941
+ };
942
+ maxRels?: number;
943
+ }): Promise<Array<{
944
+ repoId: string;
945
+ path: string;
946
+ }> | null>;
947
+ extractFileSubgraph?(args: {
948
+ seeds: Array<{
949
+ repoId: string;
950
+ path: string;
951
+ }>;
952
+ maxRels?: number;
953
+ limitEdges?: number;
954
+ }): Promise<{
955
+ nodes: Array<{
956
+ repoId: string;
957
+ path: string;
958
+ }>;
959
+ edges: Array<{
960
+ from: {
961
+ repoId: string;
962
+ path: string;
963
+ };
964
+ to: {
965
+ repoId: string;
966
+ path: string;
967
+ };
968
+ kind: string;
969
+ }>;
970
+ }>;
971
+ close(): Promise<void>;
972
+ }
973
+
974
+ type Neo4jLikeDriver = {
975
+ session: (opts?: any) => {
976
+ run: (query: string, params?: any) => Promise<any>;
977
+ close: () => Promise<void>;
978
+ };
979
+ close: () => Promise<void>;
980
+ };
981
+ type Neo4jConfig = {
982
+ uri: string;
983
+ user: string;
984
+ password: string;
985
+ database?: string;
986
+ labelPrefix?: string;
987
+ };
988
+ declare class Neo4jGraphStore implements GraphStore {
989
+ private driver;
990
+ private cfg;
991
+ id: string;
992
+ private labelPrefix;
993
+ private schemaVersionLatest;
994
+ constructor(driver: Neo4jLikeDriver, cfg: Neo4jConfig);
995
+ private labels;
996
+ init(): Promise<void>;
997
+ private getSchemaVersion;
998
+ private setSchemaVersion;
999
+ private runMigrations;
1000
+ setRepoHead(args: {
1001
+ repoId: string;
1002
+ repoRoot: string;
1003
+ commit: string;
1004
+ branch: string;
1005
+ }): Promise<void>;
1006
+ deleteFile(args: {
1007
+ repoId: string;
1008
+ path: string;
1009
+ }): Promise<void>;
1010
+ replaceOutgoingSymbolEdgesFromFile(args: {
1011
+ repoId: string;
1012
+ fromPath: string;
1013
+ edges: Array<{
1014
+ fromId: string;
1015
+ toId: string;
1016
+ kind: string;
1017
+ toPath?: string;
1018
+ }>;
1019
+ }): Promise<void>;
1020
+ replaceFileGraph(update: GraphFileUpdate): Promise<void>;
1021
+ replaceRepoLinks(args: {
1022
+ links: Array<{
1023
+ fromRepoId: string;
1024
+ toRepoId: string;
1025
+ kind: string;
1026
+ detail?: string;
1027
+ }>;
1028
+ }): Promise<void>;
1029
+ neighborFiles(args: {
1030
+ seeds: Array<{
1031
+ repoId: string;
1032
+ path: string;
1033
+ }>;
1034
+ maxPerSeed?: number;
1035
+ limit?: number;
1036
+ kinds?: string[];
1037
+ }): Promise<Array<{
1038
+ repoId: string;
1039
+ path: string;
1040
+ weight: number;
1041
+ }>>;
1042
+ shortestFilePath(args: {
1043
+ from: {
1044
+ repoId: string;
1045
+ path: string;
1046
+ };
1047
+ to: {
1048
+ repoId: string;
1049
+ path: string;
1050
+ };
1051
+ maxRels?: number;
1052
+ }): Promise<Array<{
1053
+ repoId: string;
1054
+ path: string;
1055
+ }> | null>;
1056
+ extractFileSubgraph(args: {
1057
+ seeds: Array<{
1058
+ repoId: string;
1059
+ path: string;
1060
+ }>;
1061
+ maxRels?: number;
1062
+ limitEdges?: number;
1063
+ }): Promise<{
1064
+ nodes: Array<{
1065
+ repoId: string;
1066
+ path: string;
1067
+ }>;
1068
+ edges: Array<{
1069
+ from: {
1070
+ repoId: string;
1071
+ path: string;
1072
+ };
1073
+ to: {
1074
+ repoId: string;
1075
+ path: string;
1076
+ };
1077
+ kind: string;
1078
+ }>;
1079
+ }>;
1080
+ close(): Promise<void>;
1081
+ }
1082
+ declare function createNeo4jGraphStore(cfg: Neo4jConfig): Promise<Neo4jGraphStore>;
1083
+
1084
+ declare class NoopAnnIndex implements AnnIndex {
1085
+ readonly id = "noop";
1086
+ readonly kind = "noop";
1087
+ init(_init: AnnIndexInit): Promise<void>;
1088
+ upsert(_points: AnnPoint[]): Promise<void>;
1089
+ remove(_ids: string[]): Promise<void>;
1090
+ search(_query: Float32Array, _k: number): Promise<AnnSearchHit[]>;
1091
+ flush(): Promise<void>;
1092
+ close(): Promise<void>;
1093
+ }
1094
+
1095
+ declare function createAnnIndex(config?: AnnConfig): AnnIndex;
1096
+
348
1097
  interface StoredChunkRow {
349
1098
  id: string;
350
1099
  path: string;
@@ -422,34 +1171,35 @@ declare class RepoIndexer {
422
1171
  private readonly embedder;
423
1172
  readonly repoRoot: string;
424
1173
  readonly repoId: string;
425
- private config;
1174
+ private readonly rawConfig;
1175
+ private readonly config;
1176
+ private readonly progress;
1177
+ private readonly embeddingCache;
1178
+ private readonly symbolGraphProvider;
426
1179
  private store;
427
- private vec;
428
- private vecFlushTimer;
429
- private vecFlushInFlight;
430
- private embeddingCache;
1180
+ private workspaceStore;
1181
+ private graphStore;
1182
+ private vector;
1183
+ private ann;
1184
+ private retriever;
1185
+ private fileIndexer;
1186
+ private watcher;
431
1187
  private currentCommit;
432
1188
  private currentBranch;
433
- private watcher;
434
1189
  private fileIgnore;
435
1190
  private serial;
436
- private chunkCache;
437
- constructor(repoRoot: string, embedder: EmbeddingsProvider, config?: IndexerConfig);
1191
+ constructor(repoRoot: string, embedder: EmbeddingsProvider, config?: IndexerConfig, workspaceStore?: WorkspaceStore, graphStore?: GraphStore);
1192
+ private emitProgress;
438
1193
  getCommit(): string | null;
439
1194
  getBranch(): string | null;
440
1195
  getStore(): RepoStore | null;
441
1196
  private dbPathForCommit;
442
- private shouldIndexPath;
443
- private applyRedactions;
444
1197
  private vectorMetric;
445
1198
  private vectorFlushDebounceMs;
446
- private scheduleVectorFlush;
447
- private flushVectorNow;
448
1199
  private ensureVectorIndex;
449
1200
  openForCurrentHead(): Promise<void>;
450
1201
  indexAll(): Promise<void>;
451
1202
  indexFile(posixRelPath: string): Promise<void>;
452
- private deleteFileInner;
453
1203
  deleteFile(posixRelPath: string): Promise<void>;
454
1204
  vectorCandidates(queryEmbedding: Float32Array, k: number, includePaths?: string[]): Promise<Array<{
455
1205
  id: string;
@@ -459,17 +1209,11 @@ declare class RepoIndexer {
459
1209
  id: string;
460
1210
  score: number;
461
1211
  }>>;
462
- private getChunkRowCached;
463
- private readChunkTextFallback;
464
1212
  getChunkRecord(id: string): ChunkRecord | null;
465
1213
  getChunkMeta(id: string): Omit<ChunkRecord, "text"> | null;
466
1214
  getChunkText(id: string): string;
467
1215
  getChunkPreview(id: string): string;
468
- /**
469
- * Expand context around a hit:
470
- * - adjacency (previous/next chunks in file)
471
- * - follow relative imports to include imported file synopses/headers
472
- */
1216
+ getRepresentativeChunkIdForFile(posixPath: string, preferSynopsis?: boolean): Promise<string | null>;
473
1217
  expandContext(chunkId: string, opts: {
474
1218
  adjacentChunks: number;
475
1219
  followImports: number;
@@ -478,6 +1222,14 @@ declare class RepoIndexer {
478
1222
  id: string;
479
1223
  reason: string;
480
1224
  }>>;
1225
+ /**
1226
+ * Staged symbol-graph expansion: compute expensive cross-file edges only for the specified files.
1227
+ * Safe to call repeatedly; provider caching should avoid redundant work.
1228
+ */
1229
+ warmSymbolGraphEdges(posixPaths: string[], opts?: {
1230
+ signal?: AbortSignal;
1231
+ maxFiles?: number;
1232
+ }): Promise<void>;
481
1233
  watch(): Promise<void>;
482
1234
  closeAsync(): Promise<void>;
483
1235
  close(): void;
@@ -488,7 +1240,11 @@ declare class WorkspaceIndexer {
488
1240
  private readonly embedder;
489
1241
  private repos;
490
1242
  private config;
1243
+ private progress;
1244
+ private workspaceStore;
1245
+ private graphStore;
491
1246
  constructor(workspaceRoot: string, embedder: EmbeddingsProvider, config?: IndexerConfig);
1247
+ private emitProgress;
492
1248
  open(): Promise<void>;
493
1249
  indexAll(): Promise<void>;
494
1250
  watch(): Promise<void>;
@@ -500,6 +1256,71 @@ declare class WorkspaceIndexer {
500
1256
  close(): void;
501
1257
  }
502
1258
 
1259
+ type RepoInfo = {
1260
+ repoId: string;
1261
+ repoRoot: string;
1262
+ absRoot: string;
1263
+ };
1264
+ type RepoLink = {
1265
+ fromRepoId: string;
1266
+ toRepoId: string;
1267
+ kind: string;
1268
+ detail?: string;
1269
+ };
1270
+ type WorkspaceLinkContext = {
1271
+ workspaceRoot: string;
1272
+ repos: RepoInfo[];
1273
+ npmNameToRepoId: Map<string, string>;
1274
+ goModuleToRepoId: Map<string, string>;
1275
+ };
1276
+ interface WorkspaceLinkStrategy {
1277
+ id: string;
1278
+ collect(ctx: WorkspaceLinkContext): RepoLink[];
1279
+ }
1280
+ declare class NestedRepoLinkStrategy implements WorkspaceLinkStrategy {
1281
+ id: string;
1282
+ collect(ctx: WorkspaceLinkContext): RepoLink[];
1283
+ }
1284
+ declare class NpmDependencyLinkStrategy implements WorkspaceLinkStrategy {
1285
+ id: string;
1286
+ collect(ctx: WorkspaceLinkContext): RepoLink[];
1287
+ }
1288
+ declare class GoModuleLinkStrategy implements WorkspaceLinkStrategy {
1289
+ id: string;
1290
+ collect(ctx: WorkspaceLinkContext): RepoLink[];
1291
+ }
1292
+ /**
1293
+ * Link repos based on VS Code extension contributions.
1294
+ *
1295
+ * This is intentionally generic: it supports the Petri languages extension (CTL/LTL/ISL/Colour Algebra)
1296
+ * and makes it straightforward to add additional language extensions later (just by contributing languages).
1297
+ */
1298
+ declare class VsCodeContributesLanguageLinkStrategy implements WorkspaceLinkStrategy {
1299
+ private readonly opts;
1300
+ id: string;
1301
+ constructor(opts?: {
1302
+ targetLanguageIds?: string[];
1303
+ });
1304
+ collect(ctx: WorkspaceLinkContext): RepoLink[];
1305
+ }
1306
+ declare class WorkspaceLinker {
1307
+ private readonly strategies;
1308
+ constructor(strategies?: WorkspaceLinkStrategy[]);
1309
+ static defaultStrategies(): WorkspaceLinkStrategy[];
1310
+ buildContext(workspaceRoot: string, repoRoots: string[]): WorkspaceLinkContext;
1311
+ buildLinks(ctx: WorkspaceLinkContext): RepoLink[];
1312
+ }
1313
+ declare function linkWorkspaceRepos(args: {
1314
+ workspaceRoot: string;
1315
+ repoRoots: string[];
1316
+ workspaceStore?: WorkspaceStore | null;
1317
+ graphStore?: GraphStore | null;
1318
+ strategies?: WorkspaceLinkStrategy[];
1319
+ }): Promise<{
1320
+ repos: RepoInfo[];
1321
+ links: RepoLink[];
1322
+ }>;
1323
+
503
1324
  declare function loadConfigFile(filePath: string): IndexerConfig;
504
1325
 
505
- export { type ChunkRecord, type ContextBundle, type CustomVectorConfig, DEFAULT_PROFILES, type EmbeddingsProvider, type FaissVectorConfig, HashEmbeddingsProvider, type HnswlibVectorConfig, type IndexerConfig, OllamaEmbeddingsProvider, OpenAIEmbeddingsProvider, type ProfilesConfig, type QdrantVectorConfig, type RelatedContextOptions, type RepoId, RepoIndexer, type RetrievalProfile, type RetrievalProfileName, type RetrievalScope, type RetrievalWeights, type RetrieveOptions, type SearchHit, type SearchOptions, type VectorConfig, type VectorIndex, type VectorIndexInit, type VectorMetric, type VectorPoint, type VectorProviderKind, type VectorSearchHit, WorkspaceIndexer, createVectorIndex, deepMergeProfile, loadConfigFile };
1326
+ export { type AnnConfig, type AnnIndex, type AnnIndexInit, type AnnMetric, type AnnPoint, type AnnSearchHit, type Cancellation, type Chunk, type ChunkRecord, type ChunkingConfig, type ContextBundle, type CustomVectorConfig, DEFAULT_PROFILES, type EmbeddingsProvider, type FaissVectorConfig, GoModuleLinkStrategy, type GraphEdge, type GraphFileUpdate, type GraphStore, type GraphSymbol, HashEmbeddingsProvider, type HnswlibVectorConfig, type IndexerConfig, type IndexerProgress, type IndexerProgressEvent, type IndexerProgressHandler, IndexerProgressObservable, type IndexerProgressSink, type LspDocument, type LspDocumentSymbol, type LspFacade, type LspPosition, type LspRange, type LspTargetLocation, Neo4jGraphStore, NestedRepoLinkStrategy, NoopAnnIndex, NpmDependencyLinkStrategy, OllamaEmbeddingsProvider, OpenAIEmbeddingsProvider, type ProfilesConfig, type QdrantVectorConfig, type RelatedContextOptions, type RepoDiscoveryOptions, type RepoId, RepoIndexer, type RepoInfo, type RepoLink, type RetrievalProfile, type RetrievalProfileName, type RetrievalScope, type RetrievalWeights, type RetrieveOptions, type SearchHit, type SearchOptions, type SymbolEdgeKind, type SymbolGraphIndexInput, type SymbolGraphIndexOutput, SymbolGraphIndexer, type SymbolGraphIndexingMode, type SymbolGraphProvider, type SymbolGraphStrategyOptions, type SymbolKind, type SymbolRange, type VectorConfig, type VectorIndex, type VectorIndexInit, type VectorMetric, type VectorPoint, type VectorProviderKind, type VectorSearchHit, VsCodeContributesLanguageLinkStrategy, type WorkspaceChunkRow, WorkspaceIndexer, type WorkspaceLinkContext, type WorkspaceLinkStrategy, WorkspaceLinker, WorkspaceStore, asProgressSink, chunkSource, createAnnIndex, createNeo4jGraphStore, createVSCodeSymbolGraphProvider, createVectorIndex, deepMergeProfile, discoverGitRepos, languageFromPath, linkWorkspaceRepos, loadConfigFile, mergeIndexerConfig, pickRepoOverride, stableSymbolId };