@jefuriiij/synthra 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,22 @@ For older versions, see [GitHub Releases](https://github.com/jefuriiij/synthra/r
7
7
 
8
8
  ---
9
9
 
10
+ ## [0.2.1] — 2026-06-06
11
+
12
+ ### Changed
13
+
14
+ - **Keyword retrieval is now IDF-weighted (BM25's term-rarity component).** A
15
+ query token that's rare across the repo counts for more than a common one, so
16
+ on a multi-term query the files matching the *specific* terms rank above those
17
+ matching generic ones — instead of every keyword match counting the same. The
18
+ weighting is normalized to the query's mean IDF, so a typical match scores the
19
+ same as before: overall ranking magnitude — and the confidence / Moat gating
20
+ that depends on it — is unchanged. Purely an in-repo ranking refinement, no API
21
+ or data-model change. (TF-saturation / length-norm parts of full BM25 don't
22
+ apply to the deduped top-N keyword representation.)
23
+
24
+ ---
25
+
10
26
  ## [0.2.0] — 2026-06-06
11
27
 
12
28
  ### Added
package/dist/cli/index.js CHANGED
@@ -18,7 +18,7 @@ var init_package = __esm({
18
18
  "package.json"() {
19
19
  package_default = {
20
20
  name: "@jefuriiij/synthra",
21
- version: "0.2.0",
21
+ version: "0.2.1",
22
22
  publishConfig: {
23
23
  access: "public"
24
24
  },
@@ -3880,6 +3880,7 @@ import { appendFile as appendFile3, mkdir as mkdir9 } from "fs/promises";
3880
3880
  import { dirname as dirname10 } from "path";
3881
3881
 
3882
3882
  // src/graph/rank.ts
3883
+ var KW_BASE_WEIGHT = 2;
3883
3884
  var USAGE_BOOST_CAP_DEFAULT = 4;
3884
3885
  function usageBoostCap() {
3885
3886
  const env = Number(process.env.SYN_LEARN_BOOST_CAP);
@@ -3969,14 +3970,41 @@ function scoreFiles(inputs) {
3969
3970
  const importsFrom = indexImportEdges(inputs.graph);
3970
3971
  const seeds = new Set(inputs.sessionKnownPaths ?? []);
3971
3972
  for (const p of inputs.recentlyEditedPaths ?? []) seeds.add(p);
3973
+ const corpusSize = inputs.candidates.length;
3974
+ const queryDf = /* @__PURE__ */ new Map();
3975
+ for (const f of inputs.candidates) {
3976
+ for (const kw of f.keywords) {
3977
+ if (qTokens.has(kw)) queryDf.set(kw, (queryDf.get(kw) ?? 0) + 1);
3978
+ }
3979
+ }
3980
+ const idf = (token) => {
3981
+ const n = queryDf.get(token) ?? 0;
3982
+ if (n <= 0) return 0;
3983
+ return Math.log(1 + (corpusSize - n + 0.5) / (n + 0.5));
3984
+ };
3985
+ let idfSum = 0;
3986
+ let idfCount = 0;
3987
+ for (const t of qTokens) {
3988
+ const v = idf(t);
3989
+ if (v > 0) {
3990
+ idfSum += v;
3991
+ idfCount += 1;
3992
+ }
3993
+ }
3994
+ const refIdf = idfCount > 0 ? idfSum / idfCount : 1;
3972
3995
  const scored = [];
3973
3996
  for (const file of inputs.candidates) {
3974
3997
  const reasons = [];
3975
3998
  let score2 = 0;
3976
3999
  let kwHits = 0;
3977
- for (const kw of file.keywords) if (qTokens.has(kw)) kwHits += 1;
4000
+ let kwScore = 0;
4001
+ for (const kw of file.keywords) {
4002
+ if (!qTokens.has(kw)) continue;
4003
+ kwHits += 1;
4004
+ kwScore += KW_BASE_WEIGHT * (idf(kw) / refIdf);
4005
+ }
3978
4006
  if (kwHits) {
3979
- score2 += kwHits * 2;
4007
+ score2 += kwScore;
3980
4008
  reasons.push(`kw=${kwHits}`);
3981
4009
  }
3982
4010
  const symbols = symbolsByFile.get(file.path) ?? [];