npm - @neuralsea/workspace-indexer - Versions diffs - 0.4.1 → 0.5.0 - Mend

@neuralsea/workspace-indexer 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +12 -0
package/dist/browser/index.js +442 -0
package/dist/{chunk-TQTWTPPG.js → chunk-7B5W6SSN.js} +101 -43
package/dist/cli.cjs +100 -39
package/dist/cli.js +1 -1
package/dist/index.cjs +145 -84
package/dist/index.d.cts +2 -2
package/dist/index.d.ts +2 -2
package/dist/index.js +1 -1
package/package.json +14 -3

package/README.md CHANGED Viewed

@@ -33,6 +33,18 @@ npm i @neuralsea/workspace-indexer
 Node 18+ required.
+Docs: `docs/README.md`
+### Browser / VS Code webview
+This package publishes a browser-safe entrypoint for use in browsers and VS Code webviews:
+```ts
+import { chunkSource, OpenAIEmbeddingsProvider } from "@neuralsea/workspace-indexer/browser";
+```
+The full indexer (`WorkspaceIndexer`, file watching, git scanning, sqlite-on-disk, etc.) is Node-only and should run in the VS Code extension host (send data to the webview via `postMessage`).
 ---
 ## Quick start (library)

package/dist/browser/index.js ADDED Viewed

@@ -0,0 +1,442 @@
+// src/embeddings/openai.ts
+var OpenAIEmbeddingsProvider = class {
+  constructor(opts) {
+    this.opts = opts;
+    this.id = `openai:${opts.model}`;
+  }
+  id;
+  dimension = null;
+  async embed(texts) {
+    const baseUrl = this.opts.baseUrl ?? "https://api.openai.com";
+    const res = await fetch(`${baseUrl}/v1/embeddings`, {
+      method: "POST",
+      headers: {
+        "Authorization": `Bearer ${this.opts.apiKey}`,
+        "Content-Type": "application/json"
+      },
+      body: JSON.stringify({
+        model: this.opts.model,
+        input: texts
+      })
+    });
+    if (!res.ok) {
+      const errText = await res.text().catch(() => "");
+      throw new Error(`OpenAI embeddings failed: ${res.status} ${res.statusText} ${errText}`);
+    }
+    const json = await res.json();
+    const arr = json.data.map((d) => Float32Array.from(d.embedding));
+    if (arr.length > 0) this.dimension = arr[0].length;
+    return arr;
+  }
+};
+// src/embeddings/ollama.ts
+import pLimit from "p-limit";
+var OllamaEmbeddingsProvider = class {
+  id;
+  dimension = null;
+  baseUrl;
+  model;
+  concurrency;
+  constructor(opts) {
+    this.model = opts.model;
+    this.baseUrl = opts.baseUrl ?? "http://localhost:11434";
+    this.concurrency = opts.concurrency ?? 4;
+    this.id = `ollama:${this.model}`;
+  }
+  async tryBatchEndpoint(texts) {
+    const res = await fetch(`${this.baseUrl}/api/embed`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ model: this.model, input: texts })
+    }).catch(() => null);
+    if (!res || !res.ok) return null;
+    const json = await res.json();
+    const embeddings = json.embeddings;
+    if (!embeddings) return null;
+    const out = embeddings.map((v) => Float32Array.from(v));
+    if (out.length > 0) this.dimension = out[0].length;
+    return out;
+  }
+  async embedOne(text) {
+    const res = await fetch(`${this.baseUrl}/api/embeddings`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ model: this.model, prompt: text })
+    });
+    if (!res.ok) {
+      const errText = await res.text().catch(() => "");
+      throw new Error(`Ollama embeddings failed: ${res.status} ${res.statusText} ${errText}`);
+    }
+    const json = await res.json();
+    const emb = Float32Array.from(json.embedding);
+    this.dimension = emb.length;
+    return emb;
+  }
+  async embed(texts) {
+    const batch = await this.tryBatchEndpoint(texts);
+    if (batch) return batch;
+    const limit = pLimit(this.concurrency);
+    const out = await Promise.all(texts.map((t) => limit(() => this.embedOne(t))));
+    return out;
+  }
+};
+// src/embeddings/hash.ts
+import { sha256 } from "@noble/hashes/sha256";
+var HashEmbeddingsProvider = class {
+  id;
+  dimension;
+  constructor(dimension = 384) {
+    this.dimension = dimension;
+    this.id = `hash:${dimension}`;
+  }
+  async embed(texts) {
+    return texts.map((t) => this.embedOne(t));
+  }
+  embedOne(text) {
+    const v = new Float32Array(this.dimension);
+    const tokens = text.split(/[^A-Za-z0-9_]+/).filter(Boolean).slice(0, 6e3);
+    for (const tok of tokens) {
+      const h = sha256(new TextEncoder().encode(tok));
+      const u32 = ((h[0] ?? 0) | (h[1] ?? 0) << 8 | (h[2] ?? 0) << 16 | (h[3] ?? 0) << 24) >>> 0;
+      const idx = u32 % this.dimension;
+      const sign = (h[4] ?? 0) & 1 ? 1 : -1;
+      v[idx] += sign;
+    }
+    let sumSq = 0;
+    for (let i = 0; i < v.length; i++) sumSq += v[i] * v[i];
+    const norm = Math.sqrt(sumSq) || 1;
+    for (let i = 0; i < v.length; i++) v[i] /= norm;
+    return v;
+  }
+};
+// src/profiles.ts
+var DEFAULT_PROFILES = {
+  search: {
+    name: "search",
+    k: 10,
+    weights: { vector: 0.65, lexical: 0.35, recency: 0 },
+    expand: { adjacentChunks: 0, followImports: 0, includeFileSynopsis: false },
+    candidates: { vectorK: 25, lexicalK: 25, maxMergedCandidates: 60 }
+  },
+  refactor: {
+    name: "refactor",
+    k: 15,
+    weights: { vector: 0.55, lexical: 0.35, recency: 0.1 },
+    expand: { adjacentChunks: 1, followImports: 2, includeFileSynopsis: true },
+    candidates: { vectorK: 60, lexicalK: 40, maxMergedCandidates: 140 }
+  },
+  review: {
+    name: "review",
+    k: 20,
+    weights: { vector: 0.45, lexical: 0.35, recency: 0.2 },
+    expand: { adjacentChunks: 1, followImports: 1, includeFileSynopsis: true },
+    candidates: { vectorK: 80, lexicalK: 60, maxMergedCandidates: 180 }
+  },
+  architecture: {
+    name: "architecture",
+    k: 20,
+    weights: { vector: 0.7, lexical: 0.2, recency: 0.1 },
+    expand: { adjacentChunks: 0, followImports: 3, includeFileSynopsis: true },
+    candidates: { vectorK: 120, lexicalK: 40, maxMergedCandidates: 220 }
+  },
+  rca: {
+    name: "rca",
+    k: 25,
+    weights: { vector: 0.5, lexical: 0.25, recency: 0.25 },
+    expand: { adjacentChunks: 2, followImports: 1, includeFileSynopsis: true },
+    candidates: { vectorK: 140, lexicalK: 80, maxMergedCandidates: 260 }
+  },
+  custom: {
+    name: "custom",
+    k: 10,
+    weights: { vector: 0.65, lexical: 0.35, recency: 0 },
+    expand: { adjacentChunks: 0, followImports: 0, includeFileSynopsis: false },
+    candidates: { vectorK: 25, lexicalK: 25, maxMergedCandidates: 60 }
+  }
+};
+function deepMergeProfile(base, patch) {
+  if (!patch) return base;
+  const merged = {
+    ...base,
+    ...patch,
+    weights: { ...base.weights, ...patch.weights ?? {} },
+    expand: { ...base.expand, ...patch.expand ?? {} },
+    candidates: { ...base.candidates, ...patch.candidates ?? {} }
+  };
+  return merged;
+}
+// src/util.ts
+import { sha256 as sha2562 } from "@noble/hashes/sha256";
+import { bytesToHex } from "@noble/hashes/utils";
+function toBytes(data) {
+  return typeof data === "string" ? new TextEncoder().encode(data) : data;
+}
+function sha256Hex(data) {
+  return bytesToHex(sha2562(toBytes(data)));
+}
+function toPosixPath(p) {
+  return p.replace(/\\/g, "/");
+}
+function fromPosixPath(p) {
+  if (typeof process !== "undefined" && process.platform === "win32") return p.replace(/\//g, "\\");
+  return p;
+}
+function estimateTokens(text) {
+  return Math.max(1, Math.ceil(text.length / 4));
+}
+function normalise(vec) {
+  let sumSq = 0;
+  for (let i = 0; i < vec.length; i++) sumSq += vec[i] * vec[i];
+  const norm = Math.sqrt(sumSq) || 1;
+  const out = new Float32Array(vec.length);
+  for (let i = 0; i < vec.length; i++) out[i] = vec[i] / norm;
+  return out;
+}
+function dot(a, b) {
+  const n = Math.min(a.length, b.length);
+  let s = 0;
+  for (let i = 0; i < n; i++) s += a[i] * b[i];
+  return s;
+}
+function clamp(x, lo, hi) {
+  return Math.min(hi, Math.max(lo, x));
+}
+function makePreview(text, maxLen = 240) {
+  const oneLine = text.replace(/\s+/g, " ").trim();
+  return oneLine.length <= maxLen ? oneLine : oneLine.slice(0, maxLen - 1) + "\u2026";
+}
+function nowMs() {
+  return Date.now();
+}
+function recencyScore(fileMtimeMs, halfLifeDays = 14) {
+  const ageMs = Math.max(0, nowMs() - fileMtimeMs);
+  const halfLifeMs = halfLifeDays * 24 * 60 * 60 * 1e3;
+  const score = Math.pow(0.5, ageMs / Math.max(1, halfLifeMs));
+  return clamp(score, 0, 1);
+}
+// src/retrieval/fts.ts
+function ftsQueryFromText(input) {
+  const tokens = (input.match(/[A-Za-z0-9_]{2,}/g) ?? []).slice(0, 24);
+  if (tokens.length === 0) return "";
+  const parts = tokens.map((t) => {
+    const safe = t.replace(/"/g, '""');
+    return `${safe}*`;
+  });
+  return parts.join(" OR ");
+}
+function bm25ToScore01(bm25) {
+  if (!Number.isFinite(bm25)) return 0;
+  const s = 1 / (1 + Math.max(0, bm25));
+  return clamp(s, 0, 1);
+}
+function vectorCosineToScore01(cosine) {
+  if (!Number.isFinite(cosine)) return 0;
+  return clamp((cosine + 1) / 2, 0, 1);
+}
+// src/retrieval/tokens.ts
+function uniq(xs) {
+  const out = [];
+  const s = /* @__PURE__ */ new Set();
+  for (const x of xs) {
+    if (!x) continue;
+    if (s.has(x)) continue;
+    s.add(x);
+    out.push(x);
+  }
+  return out;
+}
+function extractLexicalTokens(text, maxTokens = 2500) {
+  const toks = text.match(/[A-Za-z_][A-Za-z0-9_]{1,}/g) ?? [];
+  const u = uniq(toks).slice(0, maxTokens);
+  return u.join(" ");
+}
+// src/progress.ts
+function toHandler(progress) {
+  if (!progress) return null;
+  if (typeof progress === "function") return progress;
+  if (typeof progress.emit === "function") return (e) => progress.emit(e);
+  return null;
+}
+var IndexerProgressObservable = class {
+  handlers = /* @__PURE__ */ new Set();
+  subscribe(handler) {
+    this.handlers.add(handler);
+    return () => this.handlers.delete(handler);
+  }
+  on(type, handler) {
+    const wrapped = (e) => {
+      if (e.type !== type) return;
+      handler(e);
+    };
+    return this.subscribe(wrapped);
+  }
+  emit(event) {
+    for (const h of this.handlers) {
+      try {
+        h(event);
+      } catch {
+      }
+    }
+  }
+};
+function asProgressSink(progress) {
+  const handler = toHandler(progress);
+  if (!handler) return null;
+  return { emit: handler };
+}
+// src/vector/bruteforce.ts
+var BruteForceVectorIndex = class {
+  kind = "bruteforce";
+  metric = "cosine";
+  dimension = 0;
+  ids = [];
+  vecs = [];
+  pos = /* @__PURE__ */ new Map();
+  async init(init) {
+    this.metric = init.metric;
+    this.dimension = init.dimension;
+  }
+  async upsert(points) {
+    for (const p of points) this.upsertOne(p.id, p.vector);
+  }
+  upsertOne(id, vec) {
+    const v = normalise(vec);
+    const existing = this.pos.get(id);
+    if (existing !== void 0) {
+      this.vecs[existing] = v;
+      return;
+    }
+    this.pos.set(id, this.ids.length);
+    this.ids.push(id);
+    this.vecs.push(v);
+  }
+  async remove(ids) {
+    for (const id of ids) this.removeOne(id);
+  }
+  removeOne(id) {
+    const i = this.pos.get(id);
+    if (i === void 0) return;
+    const last = this.ids.length - 1;
+    if (i !== last) {
+      const lastId = this.ids[last];
+      this.ids[i] = lastId;
+      this.vecs[i] = this.vecs[last];
+      this.pos.set(lastId, i);
+    }
+    this.ids.pop();
+    this.vecs.pop();
+    this.pos.delete(id);
+  }
+  async rebuild(points) {
+    this.ids = [];
+    this.vecs = [];
+    this.pos = /* @__PURE__ */ new Map();
+    for (const p of points) this.upsertOne(p.id, p.vector);
+  }
+  async search(query, k) {
+    const q = normalise(query);
+    const top = [];
+    for (let i = 0; i < this.ids.length; i++) {
+      const s = dot(q, this.vecs[i]);
+      if (top.length < k) {
+        top.push({ id: this.ids[i], score: s });
+        top.sort((a, b) => b.score - a.score);
+      } else if (s > top[top.length - 1].score) {
+        top[top.length - 1] = { id: this.ids[i], score: s };
+        top.sort((a, b) => b.score - a.score);
+      }
+    }
+    return top;
+  }
+  async count() {
+    return this.ids.length;
+  }
+  async flush() {
+  }
+  async close() {
+  }
+};
+// src/browser/chunker.ts
+function languageFromPath(posixPath) {
+  const ext = posixPath.toLowerCase().split(".").pop() ?? "";
+  if (["ts", "tsx"].includes(ext)) return "typescript";
+  if (["js", "jsx", "mjs", "cjs"].includes(ext)) return "javascript";
+  if (["py"].includes(ext)) return "python";
+  if (["go"].includes(ext)) return "go";
+  if (["ctl", "ltl", "isl"].includes(ext)) return ext;
+  if (["ispec"].includes(ext)) return "isl";
+  if (["ca", "cpnexpr"].includes(ext)) return "colour-algebra";
+  if (["rs"].includes(ext)) return "rust";
+  if (["java"].includes(ext)) return "java";
+  if (["kt", "kts"].includes(ext)) return "kotlin";
+  if (["cs"].includes(ext)) return "csharp";
+  if (["md"].includes(ext)) return "markdown";
+  if (["json", "yaml", "yml", "toml"].includes(ext)) return "config";
+  return "text";
+}
+function chunkByLines(sourceText, cfg) {
+  const lines = sourceText.split(/\r?\n/);
+  const out = [];
+  const step = Math.max(1, cfg.maxLines - cfg.overlapLines);
+  for (let start = 0; start < lines.length; start += step) {
+    const end = Math.min(lines.length, start + cfg.maxLines);
+    const text = lines.slice(start, end).join("\n");
+    if (!text.trim()) continue;
+    out.push({
+      startLine: start + 1,
+      endLine: end,
+      text,
+      contentHash: sha256Hex(text),
+      tokens: estimateTokens(text)
+    });
+  }
+  return out;
+}
+function splitIfTooLarge(ch, cfg) {
+  if (ch.text.length <= cfg.maxChars) return [ch];
+  return chunkByLines(ch.text, cfg).map((sub) => ({
+    ...sub,
+    startLine: ch.startLine + (sub.startLine - 1),
+    endLine: ch.startLine + (sub.endLine - 1)
+  }));
+}
+function chunkSource(posixPath, sourceText, cfg) {
+  const language = languageFromPath(posixPath);
+  const chunks = chunkByLines(sourceText, cfg).flatMap((ch) => splitIfTooLarge(ch, cfg));
+  return { language, chunks };
+}
+export {
+  BruteForceVectorIndex,
+  DEFAULT_PROFILES,
+  HashEmbeddingsProvider,
+  IndexerProgressObservable,
+  OllamaEmbeddingsProvider,
+  OpenAIEmbeddingsProvider,
+  asProgressSink,
+  bm25ToScore01,
+  chunkSource,
+  clamp,
+  deepMergeProfile,
+  dot,
+  estimateTokens,
+  extractLexicalTokens,
+  fromPosixPath,
+  ftsQueryFromText,
+  languageFromPath,
+  makePreview,
+  normalise,
+  nowMs,
+  recencyScore,
+  sha256Hex,
+  toPosixPath,
+  vectorCosineToScore01
+};