@neuralsea/workspace-indexer 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # @petri-ai/workspace-indexer
1
+ # @neuralsea/workspace-indexer
2
2
 
3
3
  A **local-first**, **multi-repo** workspace indexer for AI agents (e.g. your custom agent “Damocles”).
4
4
 
@@ -31,17 +31,21 @@ This package is designed so Damocles can use the same index in different problem
31
31
  npm i @neuralsea/workspace-indexer
32
32
  ```
33
33
 
34
- Node 18+ recommended.
34
+ Node 18+ required.
35
35
 
36
36
  ---
37
37
 
38
38
  ## Quick start (library)
39
39
 
40
40
  ```ts
41
- import { WorkspaceIndexer, OllamaEmbeddingsProvider } from "@neuralsea/workspace-indexer";
41
+ import { WorkspaceIndexer, OllamaEmbeddingsProvider, IndexerProgressObservable } from "@neuralsea/workspace-indexer";
42
42
 
43
43
  const embedder = new OllamaEmbeddingsProvider({ model: "nomic-embed-text" });
44
- const ix = new WorkspaceIndexer("/path/to/workspace", embedder);
44
+
45
+ const progress = new IndexerProgressObservable();
46
+ progress.subscribe(e => console.log(e.type, e));
47
+
48
+ const ix = new WorkspaceIndexer("/path/to/workspace", embedder, { progress });
45
49
 
46
50
  await ix.indexAll();
47
51
 
@@ -726,6 +726,41 @@ async function createVectorIndex(vector) {
726
726
  return new BruteForceVectorIndex();
727
727
  }
728
728
 
729
+ // src/progress.ts
730
+ function toHandler(progress) {
731
+ if (!progress) return null;
732
+ if (typeof progress === "function") return progress;
733
+ if (typeof progress.emit === "function") return (e) => progress.emit(e);
734
+ return null;
735
+ }
736
+ var IndexerProgressObservable = class {
737
+ handlers = /* @__PURE__ */ new Set();
738
+ subscribe(handler) {
739
+ this.handlers.add(handler);
740
+ return () => this.handlers.delete(handler);
741
+ }
742
+ on(type, handler) {
743
+ const wrapped = (e) => {
744
+ if (e.type !== type) return;
745
+ handler(e);
746
+ };
747
+ return this.subscribe(wrapped);
748
+ }
749
+ emit(event) {
750
+ for (const h of this.handlers) {
751
+ try {
752
+ h(event);
753
+ } catch {
754
+ }
755
+ }
756
+ }
757
+ };
758
+ function asProgressSink(progress) {
759
+ const handler = toHandler(progress);
760
+ if (!handler) return null;
761
+ return { emit: handler };
762
+ }
763
+
729
764
  // src/indexer/repoIndexer.ts
730
765
  import fs6 from "fs";
731
766
  import path8 from "path";
@@ -1359,6 +1394,7 @@ var RepoIndexer = class {
1359
1394
  this.embedder = embedder;
1360
1395
  this.repoRoot = path8.resolve(repoRoot);
1361
1396
  this.repoId = repoIdFromRoot(this.repoRoot);
1397
+ this.progress = asProgressSink(config.progress);
1362
1398
  const cacheDir = config.cacheDir ?? defaultCacheDir();
1363
1399
  this.config = {
1364
1400
  cacheDir,
@@ -1423,6 +1459,7 @@ var RepoIndexer = class {
1423
1459
  repoRoot;
1424
1460
  repoId;
1425
1461
  config;
1462
+ progress = asProgressSink();
1426
1463
  store = null;
1427
1464
  vec = null;
1428
1465
  vecFlushTimer = null;
@@ -1434,6 +1471,12 @@ var RepoIndexer = class {
1434
1471
  fileIgnore = null;
1435
1472
  serial = pLimit(1);
1436
1473
  chunkCache = /* @__PURE__ */ new Map();
1474
+ emitProgress(event) {
1475
+ try {
1476
+ this.progress?.emit(event);
1477
+ } catch {
1478
+ }
1479
+ }
1437
1480
  getCommit() {
1438
1481
  return this.currentCommit;
1439
1482
  }
@@ -1557,6 +1600,7 @@ var RepoIndexer = class {
1557
1600
  this.store.setMeta("commit", commit);
1558
1601
  this.store.setMeta("branch", branch);
1559
1602
  this.store.setMeta("embedderId", this.embedder.id);
1603
+ this.emitProgress({ type: "repo/open", repoRoot: this.repoRoot, repoId: this.repoId, commit, branch });
1560
1604
  const dim = this.embedder.dimension ?? this.store.getAnyEmbeddingDimension();
1561
1605
  if (!dim) return;
1562
1606
  const vec = await this.ensureVectorIndex(dim);
@@ -1573,9 +1617,11 @@ var RepoIndexer = class {
1573
1617
  }
1574
1618
  }
1575
1619
  async indexAll() {
1620
+ const startedAt = Date.now();
1576
1621
  await this.openForCurrentHead();
1577
1622
  if (!this.store) throw new Error("RepoStore not initialised");
1578
1623
  const files = (await listWorkingFiles(this.repoRoot)).filter((p) => this.shouldIndexPath(p));
1624
+ this.emitProgress({ type: "repo/index/start", repoRoot: this.repoRoot, fileCount: files.length });
1579
1625
  const currentSet = new Set(files);
1580
1626
  for (const known of this.store.listAllFiles()) {
1581
1627
  if (!currentSet.has(known)) {
@@ -1585,29 +1631,53 @@ var RepoIndexer = class {
1585
1631
  const limit = pLimit(this.config.embed.concurrency);
1586
1632
  await Promise.all(files.map((f) => limit(() => this.indexFile(f))));
1587
1633
  if (this.vec) await this.flushVectorNow();
1634
+ this.emitProgress({
1635
+ type: "repo/index/done",
1636
+ repoRoot: this.repoRoot,
1637
+ fileCount: files.length,
1638
+ ms: Date.now() - startedAt
1639
+ });
1588
1640
  }
1589
1641
  async indexFile(posixRelPath) {
1590
1642
  return this.serial(async () => {
1643
+ const startedAt = Date.now();
1591
1644
  await this.openForCurrentHead();
1592
1645
  if (!this.store) throw new Error("RepoStore not initialised");
1593
- if (!this.shouldIndexPath(posixRelPath)) return;
1646
+ if (!this.shouldIndexPath(posixRelPath)) {
1647
+ this.emitProgress({ type: "repo/index/file/skip", repoRoot: this.repoRoot, path: posixRelPath, reason: "filtered" });
1648
+ return;
1649
+ }
1650
+ this.emitProgress({ type: "repo/index/file/start", repoRoot: this.repoRoot, path: posixRelPath });
1594
1651
  const abs = path8.join(this.repoRoot, fromPosixPath(posixRelPath));
1595
1652
  let stat;
1596
1653
  try {
1597
1654
  stat = fs6.statSync(abs);
1598
1655
  } catch {
1656
+ this.emitProgress({ type: "repo/index/file/skip", repoRoot: this.repoRoot, path: posixRelPath, reason: "missing" });
1599
1657
  this.deleteFile(posixRelPath);
1600
1658
  return;
1601
1659
  }
1602
- if (!stat.isFile()) return;
1603
- if (stat.size > this.config.maxFileBytes) return;
1660
+ if (!stat.isFile()) {
1661
+ this.emitProgress({ type: "repo/index/file/skip", repoRoot: this.repoRoot, path: posixRelPath, reason: "not_a_file" });
1662
+ return;
1663
+ }
1664
+ if (stat.size > this.config.maxFileBytes) {
1665
+ this.emitProgress({ type: "repo/index/file/skip", repoRoot: this.repoRoot, path: posixRelPath, reason: "too_large" });
1666
+ return;
1667
+ }
1604
1668
  const buf = fs6.readFileSync(abs);
1605
- if (looksBinary(buf)) return;
1669
+ if (looksBinary(buf)) {
1670
+ this.emitProgress({ type: "repo/index/file/skip", repoRoot: this.repoRoot, path: posixRelPath, reason: "binary" });
1671
+ return;
1672
+ }
1606
1673
  const raw = buf.toString("utf8");
1607
1674
  const redacted = this.applyRedactions(raw);
1608
1675
  const fileHash = sha256Hex(redacted);
1609
1676
  const prev = this.store.getFileHash(posixRelPath);
1610
- if (prev === fileHash) return;
1677
+ if (prev === fileHash) {
1678
+ this.emitProgress({ type: "repo/index/file/skip", repoRoot: this.repoRoot, path: posixRelPath, reason: "unchanged" });
1679
+ return;
1680
+ }
1611
1681
  const { language, chunks } = chunkSource(posixRelPath, redacted, this.config.chunk);
1612
1682
  if (language === "typescript" || language === "javascript") {
1613
1683
  const rel = extractTsRelations(posixRelPath, redacted);
@@ -1666,6 +1736,13 @@ ${ch.text}`
1666
1736
  for (let start = 0; start < embedTexts.length; start += batchSize) {
1667
1737
  const end = Math.min(embedTexts.length, start + batchSize);
1668
1738
  const batch = embedTexts.slice(start, end);
1739
+ this.emitProgress({
1740
+ type: "repo/index/embed/batch",
1741
+ repoRoot: this.repoRoot,
1742
+ batchSize: batch.length,
1743
+ batchIndex: Math.floor(start / batchSize),
1744
+ batchesTotal: Math.ceil(embedTexts.length / batchSize)
1745
+ });
1669
1746
  const vecs = await this.embedder.embed(batch);
1670
1747
  for (let j = 0; j < vecs.length; j++) {
1671
1748
  const plan = embedPlan[start + j];
@@ -1705,11 +1782,22 @@ ${ch.text}`
1705
1782
  if (dim) {
1706
1783
  await this.ensureVectorIndex(dim);
1707
1784
  if (this.vec) {
1708
- await this.vec.remove(oldChunkIds);
1785
+ if (oldChunkIds.length) {
1786
+ this.emitProgress({ type: "repo/index/vector/remove", repoRoot: this.repoRoot, ids: oldChunkIds.length });
1787
+ await this.vec.remove(oldChunkIds);
1788
+ }
1789
+ this.emitProgress({ type: "repo/index/vector/upsert", repoRoot: this.repoRoot, points: points.length });
1709
1790
  await this.vec.upsert(points);
1710
1791
  this.scheduleVectorFlush();
1711
1792
  }
1712
1793
  }
1794
+ this.emitProgress({
1795
+ type: "repo/index/file/done",
1796
+ repoRoot: this.repoRoot,
1797
+ path: posixRelPath,
1798
+ chunkCount: combined.length,
1799
+ ms: Date.now() - startedAt
1800
+ });
1713
1801
  });
1714
1802
  }
1715
1803
  async deleteFileInner(posixRelPath) {
@@ -2064,16 +2152,33 @@ var WorkspaceIndexer = class {
2064
2152
  this.embedder = embedder;
2065
2153
  this.config = { ...config };
2066
2154
  if (!this.config.cacheDir) this.config.cacheDir = defaultCacheDir();
2155
+ this.progress = asProgressSink(this.config.progress);
2067
2156
  }
2068
2157
  repos = [];
2069
2158
  config;
2159
+ progress = asProgressSink();
2160
+ emitProgress(event) {
2161
+ try {
2162
+ this.progress?.emit(event);
2163
+ } catch {
2164
+ }
2165
+ }
2070
2166
  async open() {
2071
2167
  const repoRoots = findGitRepos(this.workspaceRoot);
2168
+ this.emitProgress({ type: "workspace/open", workspaceRoot: this.workspaceRoot, repoRoots });
2072
2169
  this.repos = repoRoots.map((r) => new RepoIndexer(r, this.embedder, this.config));
2073
2170
  }
2074
2171
  async indexAll() {
2075
2172
  if (this.repos.length === 0) await this.open();
2173
+ const startedAt = Date.now();
2174
+ this.emitProgress({ type: "workspace/index/start", workspaceRoot: this.workspaceRoot, repoCount: this.repos.length });
2076
2175
  await Promise.all(this.repos.map((r) => r.indexAll()));
2176
+ this.emitProgress({
2177
+ type: "workspace/index/done",
2178
+ workspaceRoot: this.workspaceRoot,
2179
+ repoCount: this.repos.length,
2180
+ ms: Date.now() - startedAt
2181
+ });
2077
2182
  }
2078
2183
  async watch() {
2079
2184
  if (this.repos.length === 0) await this.open();
@@ -2363,6 +2468,8 @@ function loadConfigFile(filePath) {
2363
2468
 
2364
2469
  export {
2365
2470
  createVectorIndex,
2471
+ IndexerProgressObservable,
2472
+ asProgressSink,
2366
2473
  RepoIndexer,
2367
2474
  DEFAULT_PROFILES,
2368
2475
  deepMergeProfile,
package/dist/cli.js CHANGED
@@ -5,7 +5,7 @@ import {
5
5
  OpenAIEmbeddingsProvider,
6
6
  WorkspaceIndexer,
7
7
  loadConfigFile
8
- } from "./chunk-QPQCSCBN.js";
8
+ } from "./chunk-Z3BRYSQM.js";
9
9
 
10
10
  // src/cli.ts
11
11
  import yargs from "yargs";
package/dist/index.d.ts CHANGED
@@ -100,6 +100,75 @@ interface VectorConfig {
100
100
  faiss?: FaissVectorConfig;
101
101
  custom?: CustomVectorConfig;
102
102
  }
103
+ type IndexerProgressEvent = {
104
+ type: "workspace/open";
105
+ workspaceRoot: string;
106
+ repoRoots: string[];
107
+ } | {
108
+ type: "workspace/index/start";
109
+ workspaceRoot: string;
110
+ repoCount: number;
111
+ } | {
112
+ type: "workspace/index/done";
113
+ workspaceRoot: string;
114
+ repoCount: number;
115
+ ms: number;
116
+ } | {
117
+ type: "repo/open";
118
+ repoRoot: string;
119
+ repoId: string;
120
+ commit: string;
121
+ branch: string;
122
+ } | {
123
+ type: "repo/index/start";
124
+ repoRoot: string;
125
+ fileCount: number;
126
+ } | {
127
+ type: "repo/index/file/start";
128
+ repoRoot: string;
129
+ path: string;
130
+ } | {
131
+ type: "repo/index/file/skip";
132
+ repoRoot: string;
133
+ path: string;
134
+ reason: string;
135
+ } | {
136
+ type: "repo/index/file/done";
137
+ repoRoot: string;
138
+ path: string;
139
+ chunkCount: number;
140
+ ms: number;
141
+ } | {
142
+ type: "repo/index/embed/batch";
143
+ repoRoot: string;
144
+ batchSize: number;
145
+ batchIndex: number;
146
+ batchesTotal: number;
147
+ } | {
148
+ type: "repo/index/vector/upsert";
149
+ repoRoot: string;
150
+ points: number;
151
+ } | {
152
+ type: "repo/index/vector/remove";
153
+ repoRoot: string;
154
+ ids: number;
155
+ } | {
156
+ type: "repo/index/done";
157
+ repoRoot: string;
158
+ fileCount: number;
159
+ ms: number;
160
+ } | {
161
+ type: "error";
162
+ scope: "workspace" | "repo";
163
+ message: string;
164
+ repoRoot?: string;
165
+ stack?: string;
166
+ };
167
+ type IndexerProgressHandler = (event: IndexerProgressEvent) => void;
168
+ interface IndexerProgressSink {
169
+ emit(event: IndexerProgressEvent): void;
170
+ }
171
+ type IndexerProgress = IndexerProgressHandler | IndexerProgressSink;
103
172
  interface IndexerConfig extends ProfilesConfig {
104
173
  cacheDir?: string;
105
174
  /** Vector index backend (ANN) configuration. */
@@ -123,6 +192,8 @@ interface IndexerConfig extends ProfilesConfig {
123
192
  watch?: {
124
193
  debounceMs?: number;
125
194
  };
195
+ /** Optional progress sink (callback or observable). */
196
+ progress?: IndexerProgress;
126
197
  /** Extra ignore files (in addition to git’s excludes). */
127
198
  ignoreFiles?: string[];
128
199
  /**
@@ -345,6 +416,17 @@ interface VectorIndex {
345
416
  */
346
417
  declare function createVectorIndex(vector?: VectorConfig): Promise<VectorIndex>;
347
418
 
419
+ type Unsubscribe = () => void;
420
+ declare class IndexerProgressObservable implements IndexerProgressSink {
421
+ private handlers;
422
+ subscribe(handler: (e: IndexerProgressEvent) => void): Unsubscribe;
423
+ on<TType extends IndexerProgressEvent["type"]>(type: TType, handler: (e: Extract<IndexerProgressEvent, {
424
+ type: TType;
425
+ }>) => void): Unsubscribe;
426
+ emit(event: IndexerProgressEvent): void;
427
+ }
428
+ declare function asProgressSink(progress?: IndexerProgress): IndexerProgressSink | null;
429
+
348
430
  interface StoredChunkRow {
349
431
  id: string;
350
432
  path: string;
@@ -423,6 +505,7 @@ declare class RepoIndexer {
423
505
  readonly repoRoot: string;
424
506
  readonly repoId: string;
425
507
  private config;
508
+ private progress;
426
509
  private store;
427
510
  private vec;
428
511
  private vecFlushTimer;
@@ -435,6 +518,7 @@ declare class RepoIndexer {
435
518
  private serial;
436
519
  private chunkCache;
437
520
  constructor(repoRoot: string, embedder: EmbeddingsProvider, config?: IndexerConfig);
521
+ private emitProgress;
438
522
  getCommit(): string | null;
439
523
  getBranch(): string | null;
440
524
  getStore(): RepoStore | null;
@@ -488,7 +572,9 @@ declare class WorkspaceIndexer {
488
572
  private readonly embedder;
489
573
  private repos;
490
574
  private config;
575
+ private progress;
491
576
  constructor(workspaceRoot: string, embedder: EmbeddingsProvider, config?: IndexerConfig);
577
+ private emitProgress;
492
578
  open(): Promise<void>;
493
579
  indexAll(): Promise<void>;
494
580
  watch(): Promise<void>;
@@ -502,4 +588,4 @@ declare class WorkspaceIndexer {
502
588
 
503
589
  declare function loadConfigFile(filePath: string): IndexerConfig;
504
590
 
505
- export { type ChunkRecord, type ContextBundle, type CustomVectorConfig, DEFAULT_PROFILES, type EmbeddingsProvider, type FaissVectorConfig, HashEmbeddingsProvider, type HnswlibVectorConfig, type IndexerConfig, OllamaEmbeddingsProvider, OpenAIEmbeddingsProvider, type ProfilesConfig, type QdrantVectorConfig, type RelatedContextOptions, type RepoId, RepoIndexer, type RetrievalProfile, type RetrievalProfileName, type RetrievalScope, type RetrievalWeights, type RetrieveOptions, type SearchHit, type SearchOptions, type VectorConfig, type VectorIndex, type VectorIndexInit, type VectorMetric, type VectorPoint, type VectorProviderKind, type VectorSearchHit, WorkspaceIndexer, createVectorIndex, deepMergeProfile, loadConfigFile };
591
+ export { type ChunkRecord, type ContextBundle, type CustomVectorConfig, DEFAULT_PROFILES, type EmbeddingsProvider, type FaissVectorConfig, HashEmbeddingsProvider, type HnswlibVectorConfig, type IndexerConfig, type IndexerProgress, type IndexerProgressEvent, type IndexerProgressHandler, IndexerProgressObservable, type IndexerProgressSink, OllamaEmbeddingsProvider, OpenAIEmbeddingsProvider, type ProfilesConfig, type QdrantVectorConfig, type RelatedContextOptions, type RepoId, RepoIndexer, type RetrievalProfile, type RetrievalProfileName, type RetrievalScope, type RetrievalWeights, type RetrieveOptions, type SearchHit, type SearchOptions, type VectorConfig, type VectorIndex, type VectorIndexInit, type VectorMetric, type VectorPoint, type VectorProviderKind, type VectorSearchHit, WorkspaceIndexer, asProgressSink, createVectorIndex, deepMergeProfile, loadConfigFile };
package/dist/index.js CHANGED
@@ -1,21 +1,25 @@
1
1
  import {
2
2
  DEFAULT_PROFILES,
3
3
  HashEmbeddingsProvider,
4
+ IndexerProgressObservable,
4
5
  OllamaEmbeddingsProvider,
5
6
  OpenAIEmbeddingsProvider,
6
7
  RepoIndexer,
7
8
  WorkspaceIndexer,
9
+ asProgressSink,
8
10
  createVectorIndex,
9
11
  deepMergeProfile,
10
12
  loadConfigFile
11
- } from "./chunk-QPQCSCBN.js";
13
+ } from "./chunk-Z3BRYSQM.js";
12
14
  export {
13
15
  DEFAULT_PROFILES,
14
16
  HashEmbeddingsProvider,
17
+ IndexerProgressObservable,
15
18
  OllamaEmbeddingsProvider,
16
19
  OpenAIEmbeddingsProvider,
17
20
  RepoIndexer,
18
21
  WorkspaceIndexer,
22
+ asProgressSink,
19
23
  createVectorIndex,
20
24
  deepMergeProfile,
21
25
  loadConfigFile
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@neuralsea/workspace-indexer",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "Local-first multi-repo workspace indexer (semantic embeddings + git-aware incremental updates + hybrid retrieval profiles) for AI agents.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",