npm - @jefuriiij/synthra - Versions diffs - 0.2.0 → 0.2.1 - Mend

@jefuriiij/synthra 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/CHANGELOG.md +16 -0
package/dist/cli/index.js +31 -3
package/dist/cli/index.js.map +1 -1
package/dist/dashboard/index.js +1 -1
package/dist/dashboard/index.js.map +1 -1
package/dist/server/index.js +30 -2
package/dist/server/index.js.map +1 -1
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,22 @@ For older versions, see [GitHub Releases](https://github.com/jefuriiij/synthra/r
 ---
+## [0.2.1] — 2026-06-06
+### Changed
+- **Keyword retrieval is now IDF-weighted (BM25's term-rarity component).** A
+  query token that's rare across the repo counts for more than a common one, so
+  on a multi-term query the files matching the *specific* terms rank above those
+  matching generic ones — instead of every keyword match counting the same. The
+  weighting is normalized to the query's mean IDF, so a typical match scores the
+  same as before: overall ranking magnitude — and the confidence / Moat gating
+  that depends on it — is unchanged. Purely an in-repo ranking refinement, no API
+  or data-model change. (TF-saturation / length-norm parts of full BM25 don't
+  apply to the deduped top-N keyword representation.)
+---
 ## [0.2.0] — 2026-06-06
 ### Added

package/dist/cli/index.js CHANGED Viewed

@@ -18,7 +18,7 @@ var init_package = __esm({
   "package.json"() {
     package_default = {
       name: "@jefuriiij/synthra",
-      version: "0.2.0",
+      version: "0.2.1",
       publishConfig: {
         access: "public"
       },
@@ -3880,6 +3880,7 @@ import { appendFile as appendFile3, mkdir as mkdir9 } from "fs/promises";
 import { dirname as dirname10 } from "path";
 // src/graph/rank.ts
+var KW_BASE_WEIGHT = 2;
 var USAGE_BOOST_CAP_DEFAULT = 4;
 function usageBoostCap() {
   const env = Number(process.env.SYN_LEARN_BOOST_CAP);
@@ -3969,14 +3970,41 @@ function scoreFiles(inputs) {
   const importsFrom = indexImportEdges(inputs.graph);
   const seeds = new Set(inputs.sessionKnownPaths ?? []);
   for (const p of inputs.recentlyEditedPaths ?? []) seeds.add(p);
+  const corpusSize = inputs.candidates.length;
+  const queryDf = /* @__PURE__ */ new Map();
+  for (const f of inputs.candidates) {
+    for (const kw of f.keywords) {
+      if (qTokens.has(kw)) queryDf.set(kw, (queryDf.get(kw) ?? 0) + 1);
+    }
+  }
+  const idf = (token) => {
+    const n = queryDf.get(token) ?? 0;
+    if (n <= 0) return 0;
+    return Math.log(1 + (corpusSize - n + 0.5) / (n + 0.5));
+  };
+  let idfSum = 0;
+  let idfCount = 0;
+  for (const t of qTokens) {
+    const v = idf(t);
+    if (v > 0) {
+      idfSum += v;
+      idfCount += 1;
+    }
+  }
+  const refIdf = idfCount > 0 ? idfSum / idfCount : 1;
   const scored = [];
   for (const file of inputs.candidates) {
     const reasons = [];
     let score2 = 0;
     let kwHits = 0;
-    for (const kw of file.keywords) if (qTokens.has(kw)) kwHits += 1;
+    let kwScore = 0;
+    for (const kw of file.keywords) {
+      if (!qTokens.has(kw)) continue;
+      kwHits += 1;
+      kwScore += KW_BASE_WEIGHT * (idf(kw) / refIdf);
+    }
     if (kwHits) {
-      score2 += kwHits * 2;
+      score2 += kwScore;
       reasons.push(`kw=${kwHits}`);
     }
     const symbols = symbolsByFile.get(file.path) ?? [];