@proxysoul/soulforge 2.11.0 → 2.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23304,7 +23304,7 @@ var init_tree_sitter2 = __esm(() => {
23304
23304
  language
23305
23305
  };
23306
23306
  }
23307
- static MIN_HASH_LINES = 5;
23307
+ static MIN_HASH_LINES = 12;
23308
23308
  static HASHABLE_KEYWORDS = ["function", "method", "class", "impl", "struct", "trait", "module", "constructor"];
23309
23309
  static isHashableType(nodeType) {
23310
23310
  return TreeSitterBackend.HASHABLE_KEYWORDS.some((kw) => nodeType.includes(kw));
@@ -24194,6 +24194,14 @@ import { stat as statAsync } from "fs/promises";
24194
24194
  import { dirname as dirname2, extname as extname2, join as join6, relative, resolve as resolve4 } from "path";
24195
24195
 
24196
24196
  class RepoMap {
24197
+ static testFileMatch(alias = "f") {
24198
+ const a2 = alias;
24199
+ return `(${a2}.path LIKE 'tests/%' OR ${a2}.path LIKE 'test/%' OR ${a2}.path LIKE 'spec/%' OR ${a2}.path LIKE 'src/test/%' OR ${a2}.path LIKE '%.test.%' OR ${a2}.path LIKE '%.spec.%' OR ${a2}.path LIKE '%_test.%' OR ${a2}.path LIKE '%_spec.%' OR ${a2}.path LIKE '%/test_%' OR ${a2}.path LIKE '%/__tests__/%')`;
24200
+ }
24201
+ static notTestFile(alias = "f") {
24202
+ const a2 = alias;
24203
+ return `${a2}.path NOT LIKE 'tests/%' AND ${a2}.path NOT LIKE 'test/%' AND ${a2}.path NOT LIKE 'spec/%' AND ${a2}.path NOT LIKE 'src/test/%' AND ${a2}.path NOT LIKE '%.test.%' AND ${a2}.path NOT LIKE '%.spec.%' AND ${a2}.path NOT LIKE '%_test.%' AND ${a2}.path NOT LIKE '%_spec.%' AND ${a2}.path NOT LIKE '%/test_%' AND ${a2}.path NOT LIKE '%/__tests__/%'`;
24204
+ }
24197
24205
  db;
24198
24206
  cwd;
24199
24207
  scanPromise = null;
@@ -25370,11 +25378,8 @@ class RepoMap {
25370
25378
  }
25371
25379
  linkTestFiles() {
25372
25380
  const testFiles = this.db.query(`SELECT f.id, f.path FROM files f
25373
- WHERE (f.path LIKE '%.test.%' OR f.path LIKE '%.spec.%'
25374
- OR f.path LIKE '%_test.%' OR f.path LIKE '%_spec.%'
25375
- OR f.path LIKE 'tests/%' OR f.path LIKE 'test/%'
25376
- OR f.path LIKE '%/__tests__/%')
25377
- AND NOT EXISTS (SELECT 1 FROM edges WHERE source_file_id = f.id)`).all();
25381
+ WHERE ${RepoMap.testFileMatch()}
25382
+ AND NOT EXISTS (SELECT 1 FROM edges WHERE source_file_id = f.id)`).all();
25378
25383
  if (testFiles.length === 0)
25379
25384
  return;
25380
25385
  const insert = this.db.prepare("INSERT OR IGNORE INTO edges (source_file_id, target_file_id, weight, confidence) VALUES (?, ?, 1.0, 1)");
@@ -26835,7 +26840,7 @@ class RepoMap {
26835
26840
  WHERE s2.name = s.name AND s2.is_exported = 1
26836
26841
  ) = 1)
26837
26842
  )
26838
- AND (rf.path LIKE 'tests/%' OR rf.path LIKE '%.test.%' OR rf.path LIKE '%.spec.%' OR rf.path LIKE '%/__tests__/%')
26843
+ AND ${RepoMap.testFileMatch("rf")}
26839
26844
  )
26840
26845
  AND NOT EXISTS (
26841
26846
  SELECT 1 FROM refs r
@@ -26848,7 +26853,7 @@ class RepoMap {
26848
26853
  WHERE s2.name = s.name AND s2.is_exported = 1
26849
26854
  ) = 1)
26850
26855
  )
26851
- AND rf.path NOT LIKE 'tests/%' AND rf.path NOT LIKE '%.test.%' AND rf.path NOT LIKE '%.spec.%' AND rf.path NOT LIKE '%/__tests__/%'
26856
+ AND ${RepoMap.notTestFile("rf")}
26852
26857
  )
26853
26858
  ORDER BY f.path`).all().map((r4) => ({
26854
26859
  name: r4.name,
@@ -26984,13 +26989,14 @@ class RepoMap {
26984
26989
  });
26985
26990
  tx();
26986
26991
  }
26987
- getNearDuplicates(threshold = 0.7, limit = 20) {
26992
+ getNearDuplicates(threshold = 0.8, limit = 20) {
26988
26993
  if (!this.ready)
26989
26994
  return [];
26990
26995
  const rows = this.db.query(`SELECT ts.name, COALESCE(s.kind, '') as kind, s.signature as sig, f.path, ts.line, ts.end_line, ts.minhash
26991
26996
  FROM token_signatures ts
26992
26997
  JOIN files f ON f.id = ts.file_id
26993
26998
  LEFT JOIN symbols s ON s.file_id = ts.file_id AND s.name = ts.name AND s.line = ts.line
26999
+ WHERE ${RepoMap.notTestFile()}
26994
27000
  ORDER BY f.pagerank DESC
26995
27001
  LIMIT 500`).all();
26996
27002
  const pairs = [];
@@ -27070,8 +27076,9 @@ class RepoMap {
27070
27076
  FROM token_fragments tf
27071
27077
  JOIN files f ON f.id = tf.file_id
27072
27078
  WHERE tf.hash = ?
27079
+ AND ${RepoMap.notTestFile()}
27073
27080
  ORDER BY f.path, tf.line
27074
- LIMIT 20`).all(cluster.hash);
27081
+ LIMIT 20`).all(cluster.hash);
27075
27082
  const uniqueFiles = new Set(locs.map((l3) => `${l3.path}:${l3.name}`));
27076
27083
  if (uniqueFiles.size < 2)
27077
27084
  continue;
@@ -27089,13 +27096,15 @@ class RepoMap {
27089
27096
  getDuplicateStructures(limit = 20) {
27090
27097
  if (!this.ready)
27091
27098
  return [];
27092
- const clusters = this.db.query(`SELECT shape_hash, kind, node_count, COUNT(*) as cnt
27093
- FROM shape_hashes
27094
- WHERE node_count >= ?
27095
- GROUP BY shape_hash
27099
+ const clusters = this.db.query(`SELECT sh.shape_hash, sh.kind, sh.node_count, COUNT(*) as cnt
27100
+ FROM shape_hashes sh
27101
+ JOIN files f ON f.id = sh.file_id
27102
+ WHERE sh.node_count >= ?
27103
+ AND ${RepoMap.notTestFile()}
27104
+ GROUP BY sh.shape_hash
27096
27105
  HAVING cnt > 1
27097
- ORDER BY node_count * cnt DESC
27098
- LIMIT ?`).all(10, limit);
27106
+ ORDER BY sh.node_count * cnt DESC
27107
+ LIMIT ?`).all(20, limit);
27099
27108
  const results = [];
27100
27109
  for (const cluster of clusters) {
27101
27110
  const members = this.db.query(`SELECT sh.name, f.path, sh.line, sh.end_line
@@ -27123,22 +27132,84 @@ class RepoMap {
27123
27132
  const fileRow = this.db.query("SELECT id FROM files WHERE path = ?").get(relPath);
27124
27133
  if (!fileRow)
27125
27134
  return [];
27126
- const hashes = this.db.query("SELECT name, line, shape_hash FROM shape_hashes WHERE file_id = ?").all(fileRow.id);
27135
+ const rawSigs = this.db.query("SELECT name, line, end_line, minhash FROM token_signatures WHERE file_id = ?").all(fileRow.id);
27136
+ const seen = new Set;
27137
+ const sigs = rawSigs.filter((s2) => {
27138
+ const key2 = `${s2.name}:${String(s2.line)}`;
27139
+ if (seen.has(key2))
27140
+ return false;
27141
+ seen.add(key2);
27142
+ return true;
27143
+ });
27144
+ if (sigs.length === 0)
27145
+ return [];
27146
+ const candidates = this.db.query(`SELECT ts.name, f.path, ts.line, ts.end_line, ts.minhash, s.signature as sig, ts.file_id
27147
+ FROM token_signatures ts
27148
+ JOIN files f ON f.id = ts.file_id
27149
+ LEFT JOIN symbols s ON s.file_id = ts.file_id AND s.name = ts.name AND s.line = ts.line
27150
+ WHERE ts.file_id != ?
27151
+ AND ${RepoMap.notTestFile()}`).all(fileRow.id);
27152
+ const toSig = (buf) => {
27153
+ if (buf.byteOffset % 4 === 0) {
27154
+ return new Uint32Array(buf.buffer, buf.byteOffset, 128);
27155
+ }
27156
+ const copy = new Uint32Array(128);
27157
+ new Uint8Array(copy.buffer).set(new Uint8Array(buf.buffer, buf.byteOffset, 512));
27158
+ return copy;
27159
+ };
27127
27160
  const results = [];
27128
- for (const h2 of hashes) {
27129
- const clones = this.db.query(`SELECT sh.name, f.path, sh.line
27130
- FROM shape_hashes sh
27131
- JOIN files f ON f.id = sh.file_id
27132
- WHERE sh.shape_hash = ? AND sh.file_id != ?
27133
- ORDER BY f.pagerank DESC`).all(h2.shape_hash, fileRow.id);
27161
+ for (const s2 of sigs) {
27162
+ const sigA = toSig(s2.minhash);
27163
+ const clones = [];
27164
+ let bestSim = 0;
27165
+ for (const c of candidates) {
27166
+ if (c.path === relPath) {
27167
+ if (s2.line <= c.line && s2.end_line >= c.end_line)
27168
+ continue;
27169
+ if (c.line <= s2.line && c.end_line >= s2.end_line)
27170
+ continue;
27171
+ }
27172
+ const sigB = toSig(c.minhash);
27173
+ let matches2 = 0;
27174
+ for (let i4 = 0;i4 < 128; i4++) {
27175
+ if (sigA[i4] === sigB[i4])
27176
+ matches2++;
27177
+ }
27178
+ const sim = matches2 / 128;
27179
+ if (sim < 0.8 || sim >= 1)
27180
+ continue;
27181
+ if (sim < 0.95 && c.sig) {
27182
+ const aSig = this.db.query("SELECT signature FROM symbols WHERE file_id = ? AND name = ? AND line = ? LIMIT 1").get(fileRow.id, s2.name, s2.line);
27183
+ if (aSig?.signature) {
27184
+ const tokA = new Set(aSig.signature.toLowerCase().split(/[\s,(){}:;|&=<>]+/).filter(Boolean));
27185
+ const tokB = new Set(c.sig.toLowerCase().split(/[\s,(){}:;|&=<>]+/).filter(Boolean));
27186
+ let shared = 0;
27187
+ for (const t of tokA)
27188
+ if (tokB.has(t))
27189
+ shared++;
27190
+ const sigSim = shared / Math.max(tokA.size, tokB.size);
27191
+ if (sigSim < 0.3)
27192
+ continue;
27193
+ }
27194
+ }
27195
+ clones.push({
27196
+ name: c.name,
27197
+ path: c.path,
27198
+ line: c.line
27199
+ });
27200
+ if (sim > bestSim)
27201
+ bestSim = sim;
27202
+ }
27134
27203
  if (clones.length > 0) {
27135
27204
  results.push({
27136
- name: h2.name,
27137
- line: h2.line,
27138
- clones
27205
+ name: s2.name,
27206
+ line: s2.line,
27207
+ similarity: bestSim,
27208
+ clones: clones.slice(0, 5)
27139
27209
  });
27140
27210
  }
27141
27211
  }
27212
+ results.sort((a2, b3) => b3.similarity - a2.similarity);
27142
27213
  return results;
27143
27214
  }
27144
27215
  getTopFiles(limit = 20) {
@@ -29480,6 +29551,39 @@ var init_process_tracker = __esm(() => {
29480
29551
  trackedBun = new Set;
29481
29552
  });
29482
29553
 
29554
+ // src/core/intelligence/backends/lsp/pid-tracker.ts
29555
+ import { existsSync as existsSync8, mkdirSync as mkdirSync3, readFileSync as readFileSync5, renameSync, writeFileSync as writeFileSync4 } from "fs";
29556
+ import { homedir as homedir6 } from "os";
29557
+ import { join as join10 } from "path";
29558
+ function ensureDir() {
29559
+ const dir = join10(homedir6(), ".soulforge");
29560
+ if (!existsSync8(dir))
29561
+ mkdirSync3(dir, {
29562
+ recursive: true
29563
+ });
29564
+ }
29565
+ function flush() {
29566
+ try {
29567
+ ensureDir();
29568
+ const tmp = `${PID_FILE}.${String(process.pid)}.tmp`;
29569
+ writeFileSync4(tmp, JSON.stringify([...activePids]), "utf-8");
29570
+ renameSync(tmp, PID_FILE);
29571
+ } catch {}
29572
+ }
29573
+ function trackLspPid(pid) {
29574
+ activePids.add(pid);
29575
+ flush();
29576
+ }
29577
+ function untrackLspPid(pid) {
29578
+ activePids.delete(pid);
29579
+ flush();
29580
+ }
29581
+ var PID_FILE, activePids;
29582
+ var init_pid_tracker = __esm(() => {
29583
+ PID_FILE = join10(homedir6(), ".soulforge", "lsp-pids.json");
29584
+ activePids = new Set;
29585
+ });
29586
+
29483
29587
  // src/core/intelligence/backends/lsp/standalone-client.ts
29484
29588
  import { spawn } from "child_process";
29485
29589
  import { readFile as readFile3 } from "fs/promises";
@@ -29507,11 +29611,16 @@ class StandaloneLspClient {
29507
29611
  async start() {
29508
29612
  this.process = spawn(this.config.command, this.config.args, {
29509
29613
  stdio: ["pipe", "pipe", "pipe"],
29510
- cwd: this.cwd
29614
+ cwd: this.cwd,
29615
+ detached: true
29511
29616
  });
29512
29617
  trackProcess(this.process);
29618
+ if (this.process.pid)
29619
+ trackLspPid(this.process.pid);
29513
29620
  this.process.stdout?.on("data", (chunk) => this.onData(chunk));
29514
29621
  this.process.on("exit", (code, signal) => {
29622
+ if (this.process?.pid)
29623
+ untrackLspPid(this.process.pid);
29515
29624
  this.process = null;
29516
29625
  if (code != null && code !== 0) {
29517
29626
  logBackgroundError(`LSP:${this.config.command}`, `exited with code ${code}`);
@@ -30012,9 +30121,25 @@ class StandaloneLspClient {
30012
30121
  this.notify("exit", null);
30013
30122
  } catch {}
30014
30123
  const proc = this.process;
30015
- if (proc) {
30016
- setTimeout(() => proc.kill("SIGKILL"), 2000);
30017
- proc.kill("SIGTERM");
30124
+ const pid = proc?.pid;
30125
+ if (pid) {
30126
+ try {
30127
+ process.kill(-pid, "SIGTERM");
30128
+ } catch {
30129
+ try {
30130
+ proc.kill("SIGTERM");
30131
+ } catch {}
30132
+ }
30133
+ setTimeout(() => {
30134
+ try {
30135
+ process.kill(-pid, "SIGKILL");
30136
+ } catch {
30137
+ try {
30138
+ proc.kill("SIGKILL");
30139
+ } catch {}
30140
+ }
30141
+ }, 2000);
30142
+ untrackLspPid(pid);
30018
30143
  }
30019
30144
  this.process = null;
30020
30145
  this.initialized = false;
@@ -30050,6 +30175,7 @@ var LANGUAGE_ID_MAP;
30050
30175
  var init_standalone_client = __esm(() => {
30051
30176
  init_errors();
30052
30177
  init_process_tracker();
30178
+ init_pid_tracker();
30053
30179
  init_protocol();
30054
30180
  LANGUAGE_ID_MAP = {
30055
30181
  typescript: "typescript",
@@ -30087,9 +30213,9 @@ var exports_lsp = {};
30087
30213
  __export(exports_lsp, {
30088
30214
  LspBackend: () => LspBackend
30089
30215
  });
30090
- import { existsSync as existsSync8, readdirSync as readdirSync3 } from "fs";
30216
+ import { existsSync as existsSync9, readdirSync as readdirSync3 } from "fs";
30091
30217
  import { readdir as readdir2, readFile as readFile4 } from "fs/promises";
30092
- import { dirname as dirname3, join as join10, resolve as resolve5 } from "path";
30218
+ import { dirname as dirname3, join as join11, resolve as resolve5 } from "path";
30093
30219
  function getNvimBridge() {
30094
30220
  if (_nvimBridge === null) {
30095
30221
  try {
@@ -30116,18 +30242,18 @@ class LspBackend {
30116
30242
  async findAnchorFile() {
30117
30243
  const candidates = ["src/index.ts", "src/main.ts", "src/app.ts", "index.ts", "main.ts", "src/index.js", "src/main.js", "index.js", "main.py", "src/main.py", "main.go", "src/main.go", "src/lib.rs", "src/main.rs"];
30118
30244
  for (const candidate of candidates) {
30119
- const full = join10(this.cwd, candidate);
30120
- if (existsSync8(full))
30245
+ const full = join11(this.cwd, candidate);
30246
+ if (existsSync9(full))
30121
30247
  return full;
30122
30248
  }
30123
- for (const dir of [join10(this.cwd, "src"), this.cwd]) {
30124
- if (!existsSync8(dir))
30249
+ for (const dir of [join11(this.cwd, "src"), this.cwd]) {
30250
+ if (!existsSync9(dir))
30125
30251
  continue;
30126
30252
  try {
30127
30253
  const files = await readdir2(dir);
30128
30254
  const source = files.find((f3) => f3.endsWith(".ts") || f3.endsWith(".js") || f3.endsWith(".py") || f3.endsWith(".go") || f3.endsWith(".rs"));
30129
30255
  if (source)
30130
- return join10(dir, source);
30256
+ return join11(dir, source);
30131
30257
  } catch {}
30132
30258
  }
30133
30259
  return null;
@@ -31278,7 +31404,7 @@ function findProjectRootForLanguage(file, language) {
31278
31404
  } catch {}
31279
31405
  }
31280
31406
  for (const marker of nameMarkers) {
31281
- if (existsSync8(join10(dir, marker)))
31407
+ if (existsSync9(join11(dir, marker)))
31282
31408
  return dir;
31283
31409
  }
31284
31410
  dir = dirname3(dir);
@@ -201914,7 +202040,7 @@ var require_path_browserify = __commonJS((exports, module2) => {
201914
202040
  assertPath(path);
201915
202041
  return path.length > 0 && path.charCodeAt(0) === 47;
201916
202042
  },
201917
- join: function join11() {
202043
+ join: function join12() {
201918
202044
  if (arguments.length === 0)
201919
202045
  return ".";
201920
202046
  var joined;
@@ -30797,6 +30797,26 @@ function isStatFile(stat2) {
30797
30797
  }
30798
30798
  }
30799
30799
 
30800
+ // src/core/llm/model-utils.ts
30801
+ function inferModelGroup(modelId) {
30802
+ const id = modelId.toLowerCase();
30803
+ if (id.startsWith("claude"))
30804
+ return "anthropic";
30805
+ if (id.startsWith("gpt") || id.startsWith("o1-") || id.startsWith("o3-") || id.startsWith("o4-") || id.startsWith("chatgpt"))
30806
+ return "openai";
30807
+ if (id.startsWith("gemini"))
30808
+ return "google";
30809
+ if (id.startsWith("grok"))
30810
+ return "xai";
30811
+ if (id.startsWith("llama") || id.startsWith("meta-"))
30812
+ return "meta";
30813
+ if (id.startsWith("mistral") || id.startsWith("codestral") || id.startsWith("pixtral"))
30814
+ return "mistral";
30815
+ if (id.startsWith("deepseek"))
30816
+ return "deepseek";
30817
+ return "other";
30818
+ }
30819
+
30800
30820
  // src/core/workers/io.worker.ts
30801
30821
  var MAX_READ_LINES = 2000;
30802
30822
  var MAX_LINE_LENGTH = 2000;
@@ -31067,24 +31087,6 @@ var handlers = {
31067
31087
  function tc(s) {
31068
31088
  return s.charAt(0).toUpperCase() + s.slice(1);
31069
31089
  }
31070
- function inferGroup(modelId) {
31071
- const id = modelId.toLowerCase();
31072
- if (id.startsWith("claude"))
31073
- return "anthropic";
31074
- if (id.startsWith("gpt") || id.startsWith("o1-") || id.startsWith("o3-") || id.startsWith("o4-") || id.startsWith("chatgpt"))
31075
- return "openai";
31076
- if (id.startsWith("gemini"))
31077
- return "google";
31078
- if (id.startsWith("grok"))
31079
- return "xai";
31080
- if (id.startsWith("llama") || id.startsWith("meta-"))
31081
- return "meta";
31082
- if (id.startsWith("mistral") || id.startsWith("codestral") || id.startsWith("pixtral"))
31083
- return "mistral";
31084
- if (id.startsWith("deepseek"))
31085
- return "deepseek";
31086
- return "other";
31087
- }
31088
31090
  try {
31089
31091
  const res = await fetch(url2, {
31090
31092
  headers
@@ -31117,11 +31119,11 @@ var handlers = {
31117
31119
  const si = m.id.indexOf("/");
31118
31120
  group = si >= 0 ? m.id.slice(0, si).toLowerCase() : "other";
31119
31121
  } else if (isLG) {
31120
- group = m.family?.toLowerCase() || inferGroup(m.id);
31122
+ group = m.family?.toLowerCase() || inferModelGroup(m.id);
31121
31123
  } else {
31122
31124
  if (m.type && m.type !== "language")
31123
31125
  continue;
31124
- group = m.owned_by ?? inferGroup(m.id);
31126
+ group = m.owned_by ?? inferModelGroup(m.id);
31125
31127
  }
31126
31128
  const arr = gMap[group] || [];
31127
31129
  gMap[group] = arr;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@proxysoul/soulforge",
3
- "version": "2.11.0",
3
+ "version": "2.12.1",
4
4
  "description": "Graph-powered code intelligence — multi-agent coding with codebase-aware AI",
5
5
  "repository": {
6
6
  "type": "git",
@@ -82,6 +82,7 @@
82
82
  "ai": "6.0.154",
83
83
  "ghostty-opentui": "1.4.10",
84
84
  "isbinaryfile": "6.0.0",
85
+ "jsonrepair": "^3.13.3",
85
86
  "linkedom": "0.18.12",
86
87
  "linkify-it": "5.0.0",
87
88
  "marked": "17.0.5",