kiri-mcp-server 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +58 -5
  2. package/config/default.example.yml +9 -0
  3. package/config/scoring-profiles.yml +11 -6
  4. package/dist/config/default.example.yml +9 -0
  5. package/dist/config/scoring-profiles.yml +11 -6
  6. package/dist/package.json +1 -1
  7. package/dist/server/context.js +0 -1
  8. package/dist/server/handlers.js +547 -79
  9. package/dist/server/scoring.js +8 -3
  10. package/dist/shared/duckdb.js +0 -2
  11. package/dist/shared/embedding.js +15 -2
  12. package/dist/shared/tokenizer.js +0 -1
  13. package/dist/shared/utils/simpleYaml.js +0 -1
  14. package/dist/src/server/handlers.d.ts.map +1 -1
  15. package/dist/src/server/handlers.js +234 -26
  16. package/dist/src/server/handlers.js.map +1 -1
  17. package/dist/src/server/rpc.d.ts.map +1 -1
  18. package/dist/src/server/rpc.js +9 -3
  19. package/dist/src/server/rpc.js.map +1 -1
  20. package/dist/src/server/scoring.d.ts +2 -0
  21. package/dist/src/server/scoring.d.ts.map +1 -1
  22. package/dist/src/server/scoring.js +13 -1
  23. package/dist/src/server/scoring.js.map +1 -1
  24. package/dist/src/shared/duckdb.d.ts +1 -0
  25. package/dist/src/shared/duckdb.d.ts.map +1 -1
  26. package/dist/src/shared/duckdb.js +54 -3
  27. package/dist/src/shared/duckdb.js.map +1 -1
  28. package/dist/src/shared/embedding.d.ts.map +1 -1
  29. package/dist/src/shared/embedding.js +2 -8
  30. package/dist/src/shared/embedding.js.map +1 -1
  31. package/dist/src/shared/tokenizer.d.ts +18 -0
  32. package/dist/src/shared/tokenizer.d.ts.map +1 -1
  33. package/dist/src/shared/tokenizer.js +35 -0
  34. package/dist/src/shared/tokenizer.js.map +1 -1
  35. package/package.json +1 -1
@@ -12,7 +12,7 @@ function validateWeights(weights, profileName) {
12
12
  if (typeof weights !== "object" || weights === null) {
13
13
  throw new Error(`Profile '${profileName}' must be an object`);
14
14
  }
15
- const required = ["textMatch", "editingPath", "dependency", "proximity", "structural"];
15
+ const required = ["textMatch", "pathMatch", "editingPath", "dependency", "proximity", "structural"];
16
16
  const obj = weights;
17
17
  for (const key of required) {
18
18
  const value = obj[key];
@@ -28,8 +28,9 @@ function loadProfilesFromConfig() {
28
28
  }
29
29
  try {
30
30
  // 環境変数でカスタムパスを指定可能
31
+ // 本番環境(npm install)では dist/config/ を、開発環境では config/ を参照
31
32
  const configPath = process.env.KIRI_SCORING_CONFIG ??
32
- join(fileURLToPath(import.meta.url), "../../../config/scoring-profiles.yml");
33
+ join(fileURLToPath(import.meta.url), "../../config/scoring-profiles.yml");
33
34
  const configContent = readFileSync(configPath, "utf-8");
34
35
  const parsed = parseSimpleYaml(configContent);
35
36
  // 必須プロファイルの検証とウェイトのバリデーション
@@ -56,6 +57,7 @@ function loadProfilesFromConfig() {
56
57
  profilesCache = {
57
58
  default: {
58
59
  textMatch: 1.0,
60
+ pathMatch: 1.5,
59
61
  editingPath: 2.0,
60
62
  dependency: 0.5,
61
63
  proximity: 0.25,
@@ -63,6 +65,7 @@ function loadProfilesFromConfig() {
63
65
  },
64
66
  bugfix: {
65
67
  textMatch: 1.0,
68
+ pathMatch: 1.5,
66
69
  editingPath: 1.8,
67
70
  dependency: 0.7,
68
71
  proximity: 0.35,
@@ -70,6 +73,7 @@ function loadProfilesFromConfig() {
70
73
  },
71
74
  testfail: {
72
75
  textMatch: 1.0,
76
+ pathMatch: 1.5,
73
77
  editingPath: 1.6,
74
78
  dependency: 0.85,
75
79
  proximity: 0.3,
@@ -77,6 +81,7 @@ function loadProfilesFromConfig() {
77
81
  },
78
82
  typeerror: {
79
83
  textMatch: 1.0,
84
+ pathMatch: 1.5,
80
85
  editingPath: 1.4,
81
86
  dependency: 0.6,
82
87
  proximity: 0.4,
@@ -84,6 +89,7 @@ function loadProfilesFromConfig() {
84
89
  },
85
90
  feature: {
86
91
  textMatch: 1.0,
92
+ pathMatch: 1.5,
87
93
  editingPath: 1.5,
88
94
  dependency: 0.45,
89
95
  proximity: 0.5,
@@ -108,4 +114,3 @@ export function loadScoringProfile(profileName) {
108
114
  }
109
115
  return profiles.default;
110
116
  }
111
- //# sourceMappingURL=scoring.js.map
@@ -20,7 +20,6 @@ function assertNoUndefined(sql, params) {
20
20
  }
21
21
  }
22
22
  export class DuckDBClient {
23
- database;
24
23
  constructor(path) {
25
24
  this.database = new duckdb.Database(path);
26
25
  }
@@ -118,4 +117,3 @@ export class DuckDBClient {
118
117
  });
119
118
  }
120
119
  }
121
- //# sourceMappingURL=duckdb.js.map
@@ -1,9 +1,23 @@
1
1
  import { createHash } from "node:crypto";
2
2
  const DEFAULT_DIMS = 64;
3
+ /**
4
+ * 埋め込み生成用のトークン化
5
+ * keyword extractionと同じ戦略を使用してハイフン区切り用語を保持
6
+ */
3
7
  function tokenize(text) {
8
+ const strategy = process.env.KIRI_TOKENIZATION_STRATEGY?.toLowerCase();
9
+ // レガシーモード: ハイフンも分割(従来の動作)
10
+ if (strategy === "legacy") {
11
+ return text
12
+ .toLowerCase()
13
+ .split(/[^\p{L}\p{N}_]+/u)
14
+ .map((token) => token.trim())
15
+ .filter((token) => token.length > 0);
16
+ }
17
+ // phrase-aware または hybrid モード: ハイフンを保持
4
18
  return text
5
19
  .toLowerCase()
6
- .split(/[^\p{L}\p{N}_]+/u)
20
+ .split(/[^\p{L}\p{N}_-]+/u)
7
21
  .map((token) => token.trim())
8
22
  .filter((token) => token.length > 0);
9
23
  }
@@ -82,4 +96,3 @@ export function structuralSimilarity(a, b) {
82
96
  /** @deprecated Use structuralSimilarity() instead. Kept for backward compatibility. */
83
97
  export const cosineSimilarity = structuralSimilarity;
84
98
  export const EMBEDDING_DIMS = DEFAULT_DIMS;
85
- //# sourceMappingURL=embedding.js.map
@@ -2,4 +2,3 @@ export function encode(text) {
2
2
  const codePoints = Array.from(text);
3
3
  return codePoints.map((_, index) => index);
4
4
  }
5
- //# sourceMappingURL=tokenizer.js.map
@@ -87,4 +87,3 @@ export function parseSimpleYaml(content) {
87
87
  }
88
88
  return root;
89
89
  }
90
- //# sourceMappingURL=simpleYaml.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"handlers.d.ts","sourceRoot":"","sources":["../../../src/server/handlers.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAInD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAG7C,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,SAAS,GAAG,MAAM,GAAG,MAAM,CAAC;CAC7C;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,GAAG,EAAE,MAAM,GAAG,IAAI,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B;AAED,MAAM,WAAW,sBAAsB;IACrC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,sBAAsB,CAAC;IACnC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,SAAS,GAAG,MAAM,GAAG,MAAM,CAAC;IAC5C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,GAAG,EAAE,MAAM,EAAE,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,iBAAiB,EAAE,CAAC;IAC7B,eAAe,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,4BAA4B;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,4BAA4B,EAAE,CAAC;IAC3C,CAAC,CAAC,EAAE,MAAM,CAAC;IACX,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,kBAAkB,EAAE,CAAC;CAClC;AAgDD,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,UAAU,GAAG,SAAS,CAAC;IACnC,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,UAAU,GAAG,SAAS,CAAC;IAClC,KAAK,EAAE,eAAe,EAAE,CAAC;IACzB,KAAK,EAAE,eAAe,EAAE,CAAC;CAC1B;AA4gBD,wBAAsB,WAAW,CAC/B,OAAO,EAAE,aAAa,EACtB,MAAM,EAAE,iBAAiB,GACxB,OAAO,CAAC,iBAAiB,EAAE,CAAC,CA+G9B;AAED,wBAAsB,WAAW,CAC/B,OAAO,EAAE,aAAa,EACtB,MAAM,EAAE,iBAAiB,GACxB,OAAO,CAAC,aAAa,CAAC,CA0GxB;AAED,wBAAsB,aAAa,CACjC,OAAO,EAAE,aAAa,EACtB,MAAM,EAAE,mBAAmB,GAC1B,OAAO,CAAC,mBAAmB,CAAC,CA+S9B;AAED,wBAAsB,cAAc,CAClC,OAAO,EAAE,aAAa,EACtB,MAAM,EAAE,oBAAoB,GAC3B,OAAO,CAAC,oBAAoB,CAAC,CA+E/B;AAED,wBAAsB,WAAW,CAC/B,OAAO,EAAE,aAAa,EACtB,MAAM,EAAE,iBAAiB,GACxB,OAAO,CAAC,iBAAiB,CAAC,CAuJ5B;AAED,wBAAsB,aAAa,CAAC,EAAE,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAqBvF"}
1
+ {"version":3,"file":"handlers.d.ts","sourceRoot":"","sources":["../../../src/server/handlers.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAInD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAG7C,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,SAAS,GAAG,MAAM,GAAG,MAAM,CAAC;CAC7C;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,GAAG,EAAE,MAAM,GAAG,IAAI,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B;AAED,MAAM,WAAW,sBAAsB;IACrC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,sBAAsB,CAAC;IACnC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,SAAS,GAAG,MAAM,GAAG,MAAM,CAAC;IAC5C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,GAAG,EAAE,MAAM,EAAE,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,iBAAiB,EAAE,CAAC;IAC7B,eAAe,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,4BAA4B;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,4BAA4B,EAAE,CAAC;IAC3C,CAAC,CAAC,EAAE,MAAM,CAAC;IACX,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,kBAAkB,EAAE,CAAC;CAClC;AAgDD,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,UAAU,GAAG,SAAS,CAAC;IACnC,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,UAAU,GAAG,SAAS,CAAC;IAClC,KAAK,EAAE,eAAe,EAAE,CAAC;IACzB,KAAK,EAAE,eAAe,EAAE,CAAC;CAC1B;AA6rBD,wBAAsB,WAAW,CAC/B,OAAO,EAAE,aAAa,EACtB,MAAM,EAAE,iBAAiB,GACxB,OAAO,CAAC,iBAAiB,EAAE,CAAC,CA+G9B;AAED,wBAAsB,WAAW,CAC/B,OAAO,EAAE,aAAa,EACtB,MAAM,EAAE,iBAAiB,GACxB,OAAO,CAAC,aAAa,CAAC,CA0GxB;AAED,wBAAsB,aAAa,CACjC,OAAO,EAAE,aAAa,EACtB,MAAM,EAAE,mBAAmB,GAC1B,OAAO,CAAC,mBAAmB,CAAC,CAkZ9B;AAED,wBAAsB,cAAc,CAClC,OAAO,EAAE,aAAa,EACtB,MAAM,EAAE,oBAAoB,GAC3B,OAAO,CAAC,oBAAoB,CAAC,CA+E/B;AAED,wBAAsB,WAAW,CAC/B,OAAO,EAAE,aAAa,EACtB,MAAM,EAAE,iBAAiB,GACxB,OAAO,CAAC,iBAAiB,CAAC,CAuJ5B;AAED,wBAAsB,aAAa,CAAC,EAAE,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAqBvF"}
@@ -1,6 +1,6 @@
1
1
  import path from "node:path";
2
2
  import { generateEmbedding, structuralSimilarity } from "../shared/embedding.js";
3
- import { encode as encodeGPT } from "../shared/tokenizer.js";
3
+ import { encode as encodeGPT, tokenizeText } from "../shared/tokenizer.js";
4
4
  import { coerceProfileName, loadScoringProfile } from "./scoring.js";
5
5
  const DEFAULT_SEARCH_LIMIT = 50;
6
6
  const DEFAULT_SNIPPET_WINDOW = 150;
@@ -73,22 +73,126 @@ function normalizeBundleLimit(limit) {
73
73
  }
74
74
  return Math.min(Math.max(1, Math.floor(limit)), MAX_BUNDLE_LIMIT);
75
75
  }
76
+ /**
77
+ * トークン化戦略を取得
78
+ * 環境変数またはデフォルト値から決定
79
+ */
80
+ function getTokenizationStrategy() {
81
+ const strategy = process.env.KIRI_TOKENIZATION_STRATEGY?.toLowerCase();
82
+ if (strategy === "legacy" || strategy === "hybrid") {
83
+ return strategy;
84
+ }
85
+ return "phrase-aware"; // デフォルト
86
+ }
87
+ /**
88
+ * 引用符で囲まれたフレーズを抽出
89
+ * 例: 'search "page-agent handler" test' → ["page-agent handler"]
90
+ */
91
+ function extractQuotedPhrases(text) {
92
+ const phrases = [];
93
+ const quotePattern = /"([^"]+)"|'([^']+)'/g;
94
+ let match;
95
+ let remaining = text;
96
+ // eslint-disable-next-line no-cond-assign
97
+ while ((match = quotePattern.exec(text)) !== null) {
98
+ const phrase = (match[1] || match[2] || "").trim().toLowerCase();
99
+ if (phrase.length >= 3) {
100
+ phrases.push(phrase);
101
+ }
102
+ remaining = remaining.replace(match[0], " ");
103
+ }
104
+ return { phrases, remaining };
105
+ }
106
+ /**
107
+ * 複合用語を抽出(ハイフンまたはアンダースコア区切り)
108
+ * Unicode文字に対応(日本語、中国語などの複合用語もサポート)
109
+ * 例: "page-agent lambda-handler" → ["page-agent", "lambda-handler"]
110
+ * 例: "user_profile file_embedding" → ["user_profile", "file_embedding"]
111
+ * 例: "app-日本語" → ["app-日本語"]
112
+ */
113
+ function extractCompoundTerms(text) {
114
+ // Unicode対応: ハイフン(-)とアンダースコア(_)の両方をサポート
115
+ // snake_case (Python/Rust) と kebab-case を同等に扱う
116
+ // 注: \b はアンダースコアを単語文字として扱うため、アンダースコアでは機能しない
117
+ // そのため、明示的な境界チェックを使用
118
+ const compoundPattern = /(?:^|\s|[^\p{L}\p{N}_-])([\p{L}\p{N}]+(?:[-_][\p{L}\p{N}]+)+)(?=\s|[^\p{L}\p{N}_-]|$)/giu;
119
+ const matches = Array.from(text.matchAll(compoundPattern)).map((m) => m[1]);
120
+ return matches
121
+ .map((term) => term.toLowerCase())
122
+ .filter((term) => term.length >= 3 && !STOP_WORDS.has(term));
123
+ }
124
+ /**
125
+ * パスライクな用語を抽出
126
+ * Unicode文字に対応
127
+ * 例: "lambda/page-agent/handler" → ["lambda", "page-agent", "handler"]
128
+ */
129
+ function extractPathSegments(text) {
130
+ // Unicode対応: パスセグメントでもUnicode文字をサポート
131
+ const pathPattern = /\b[\p{L}\p{N}_-]+(?:\/[\p{L}\p{N}_-]+)+\b/giu;
132
+ const matches = text.match(pathPattern) || [];
133
+ const segments = [];
134
+ for (const path of matches) {
135
+ const parts = path.toLowerCase().split("/");
136
+ for (const part of parts) {
137
+ if (part.length >= 3 && !STOP_WORDS.has(part) && !segments.includes(part)) {
138
+ segments.push(part);
139
+ }
140
+ }
141
+ }
142
+ return segments;
143
+ }
144
+ /**
145
+ * 通常の単語を抽出
146
+ * 共有トークン化ユーティリティを使用
147
+ */
148
+ function extractRegularWords(text, strategy) {
149
+ const words = tokenizeText(text, strategy).filter((word) => word.length >= 3 && !STOP_WORDS.has(word));
150
+ return words;
151
+ }
152
+ /**
153
+ * テキストからキーワード、フレーズ、パスセグメントを抽出
154
+ * トークン化戦略に基づいて、ハイフン区切り用語の処理方法を変更
155
+ */
76
156
  function extractKeywords(text) {
77
- const words = text
78
- .toLowerCase()
79
- .split(/[^a-z0-9_]+/iu)
80
- .map((word) => word.trim())
81
- .filter((word) => word.length >= 3 && !STOP_WORDS.has(word));
82
- const unique = [];
83
- for (const word of words) {
84
- if (!unique.includes(word)) {
85
- unique.push(word);
86
- if (unique.length >= MAX_KEYWORDS) {
157
+ const strategy = getTokenizationStrategy();
158
+ const result = {
159
+ phrases: [],
160
+ keywords: [],
161
+ pathSegments: [],
162
+ };
163
+ // Phase 1: 引用符で囲まれたフレーズを抽出
164
+ const { phrases: quotedPhrases, remaining: afterQuotes } = extractQuotedPhrases(text);
165
+ result.phrases.push(...quotedPhrases);
166
+ // Phase 2: パスセグメントを抽出
167
+ const pathSegments = extractPathSegments(afterQuotes);
168
+ result.pathSegments.push(...pathSegments);
169
+ // Phase 3: 複合用語を抽出(ハイフン/アンダースコア区切り)(phrase-aware または hybrid モード)
170
+ if (strategy === "phrase-aware" || strategy === "hybrid") {
171
+ const compoundTerms = extractCompoundTerms(afterQuotes);
172
+ result.phrases.push(...compoundTerms);
173
+ // hybrid モードの場合、複合用語を分割したキーワードも追加
174
+ if (strategy === "hybrid") {
175
+ for (const term of compoundTerms) {
176
+ // ハイフンとアンダースコアの両方で分割
177
+ const parts = term
178
+ .split(/[-_]/)
179
+ .filter((part) => part.length >= 3 && !STOP_WORDS.has(part));
180
+ result.keywords.push(...parts);
181
+ }
182
+ }
183
+ }
184
+ // Phase 4: 通常の単語を抽出
185
+ const regularWords = extractRegularWords(afterQuotes, strategy);
186
+ // 重複を除去しながら、最大キーワード数まで追加
187
+ for (const word of regularWords) {
188
+ if (!result.keywords.includes(word) && !result.phrases.includes(word)) {
189
+ result.keywords.push(word);
190
+ if (result.keywords.length >= MAX_KEYWORDS) {
87
191
  break;
88
192
  }
89
193
  }
90
194
  }
91
- return unique;
195
+ return result;
92
196
  }
93
197
  function ensureCandidate(map, filePath) {
94
198
  let candidate = map.get(filePath);
@@ -321,13 +425,41 @@ function applyFileTypeBoost(path, baseScore, profile = "default") {
321
425
  * @param row - ファイル情報(path, ext)
322
426
  * @param profile - ブーストプロファイル
323
427
  */
324
- function applyBoostProfile(candidate, row, profile) {
428
+ function applyBoostProfile(candidate, row, profile, extractedTerms, pathMatchWeight) {
325
429
  if (profile === "none") {
326
430
  return;
327
431
  }
328
432
  const { path, ext } = row;
329
433
  const lowerPath = path.toLowerCase();
330
434
  const fileName = path.split("/").pop() ?? "";
435
+ // パスベースのスコアリング: goalのキーワード/フレーズがファイルパスに含まれる場合にブースト
436
+ if (extractedTerms && pathMatchWeight && pathMatchWeight > 0) {
437
+ // フレーズがパスに完全一致する場合(最高の重み)
438
+ for (const phrase of extractedTerms.phrases) {
439
+ if (lowerPath.includes(phrase)) {
440
+ candidate.score += pathMatchWeight * 1.5; // 1.5倍のブースト
441
+ candidate.reasons.add(`path-phrase:${phrase}`);
442
+ break; // 最初のマッチのみ適用
443
+ }
444
+ }
445
+ // パスセグメントがマッチする場合(中程度の重み)
446
+ const pathParts = lowerPath.split("/");
447
+ for (const segment of extractedTerms.pathSegments) {
448
+ if (pathParts.includes(segment)) {
449
+ candidate.score += pathMatchWeight;
450
+ candidate.reasons.add(`path-segment:${segment}`);
451
+ break; // 最初のマッチのみ適用
452
+ }
453
+ }
454
+ // 通常のキーワードがパスに含まれる場合(低い重み)
455
+ for (const keyword of extractedTerms.keywords) {
456
+ if (lowerPath.includes(keyword)) {
457
+ candidate.score += pathMatchWeight * 0.5; // 0.5倍のブースト
458
+ candidate.reasons.add(`path-keyword:${keyword}`);
459
+ break; // 最初のマッチのみ適用
460
+ }
461
+ }
462
+ }
331
463
  // Blacklisted directories that are almost always irrelevant for code context
332
464
  const blacklistedDirs = [
333
465
  ".cursor/",
@@ -425,7 +557,7 @@ function applyBoostProfile(candidate, row, profile) {
425
557
  // Penalize documentation and other non-code files
426
558
  const docExtensions = [".md", ".yaml", ".yml", ".mdc", ".json"];
427
559
  if (docExtensions.some((docExt) => path.endsWith(docExt))) {
428
- candidate.score -= 1.0; // Strong penalty to overcome structural similarity
560
+ candidate.score -= 2.0; // Strong penalty to overcome doc-heavy keyword matches
429
561
  candidate.reasons.add("penalty:doc-file");
430
562
  }
431
563
  // Boost implementation files, with more specific paths getting higher scores
@@ -649,18 +781,25 @@ export async function contextBundle(context, params) {
649
781
  }
650
782
  const semanticSeed = keywordSources.join(" ");
651
783
  const queryEmbedding = generateEmbedding(semanticSeed)?.values ?? null;
652
- let keywords = extractKeywords(semanticSeed);
653
- if (keywords.length === 0 && artifacts.editing_path) {
784
+ const extractedTerms = extractKeywords(semanticSeed);
785
+ // フォールバック: editing_pathからキーワードを抽出
786
+ if (extractedTerms.phrases.length === 0 &&
787
+ extractedTerms.keywords.length === 0 &&
788
+ artifacts.editing_path) {
654
789
  const pathSegments = artifacts.editing_path
655
790
  .split(/[/_.-]/)
656
791
  .map((segment) => segment.toLowerCase())
657
792
  .filter((segment) => segment.length >= 3 && !STOP_WORDS.has(segment));
658
- keywords = pathSegments.slice(0, MAX_KEYWORDS);
793
+ extractedTerms.pathSegments.push(...pathSegments.slice(0, MAX_KEYWORDS));
659
794
  }
660
795
  const candidates = new Map();
661
796
  const stringMatchSeeds = new Set();
662
797
  const fileCache = new Map();
663
- for (const keyword of keywords) {
798
+ // フレーズマッチング(高い重み: textMatch × 2)- 統合クエリでパフォーマンス改善
799
+ if (extractedTerms.phrases.length > 0) {
800
+ const phrasePlaceholders = extractedTerms.phrases
801
+ .map(() => "b.content ILIKE '%' || ? || '%'")
802
+ .join(" OR ");
664
803
  const rows = await db.all(`
665
804
  SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
666
805
  FROM file f
@@ -670,21 +809,90 @@ export async function contextBundle(context, params) {
670
809
  AND fe.path = f.path
671
810
  WHERE f.repo_id = ?
672
811
  AND f.is_binary = FALSE
673
- AND b.content ILIKE '%' || ? || '%'
812
+ AND (${phrasePlaceholders})
674
813
  ORDER BY f.path
675
814
  LIMIT ?
676
- `, [repoId, keyword, MAX_MATCHES_PER_KEYWORD]);
815
+ `, [repoId, ...extractedTerms.phrases, MAX_MATCHES_PER_KEYWORD * extractedTerms.phrases.length]);
816
+ const boostProfile = params.boost_profile ?? "default";
677
817
  for (const row of rows) {
678
818
  if (row.content === null) {
679
819
  continue;
680
820
  }
821
+ // どのフレーズにマッチしたかをチェック
822
+ const lowerContent = row.content.toLowerCase();
823
+ const matchedPhrases = extractedTerms.phrases.filter((phrase) => lowerContent.includes(phrase));
824
+ if (matchedPhrases.length === 0) {
825
+ continue; // Should not happen, but defensive check
826
+ }
681
827
  const candidate = ensureCandidate(candidates, row.path);
682
- candidate.score += weights.textMatch;
683
- candidate.reasons.add(`text:${keyword}`);
684
- // Apply boost profile to prioritize/penalize files based on type and location
685
- const boostProfile = params.boost_profile ?? "default";
686
- applyBoostProfile(candidate, row, boostProfile);
687
- const { line } = buildPreview(row.content, keyword);
828
+ // 各マッチしたフレーズに対してスコアリング
829
+ for (const phrase of matchedPhrases) {
830
+ // フレーズマッチは通常の2倍のスコア
831
+ candidate.score += weights.textMatch * 2.0;
832
+ candidate.reasons.add(`phrase:${phrase}`);
833
+ }
834
+ // Apply boost profile once per file
835
+ applyBoostProfile(candidate, row, boostProfile, extractedTerms, weights.pathMatch);
836
+ // Use first matched phrase for preview (guaranteed to exist due to length check above)
837
+ const { line } = buildPreview(row.content, matchedPhrases[0]);
838
+ candidate.matchLine =
839
+ candidate.matchLine === null ? line : Math.min(candidate.matchLine, line);
840
+ candidate.content ??= row.content;
841
+ candidate.lang ??= row.lang;
842
+ candidate.ext ??= row.ext;
843
+ candidate.totalLines ??= row.content.length === 0 ? 0 : row.content.split(/\r?\n/).length;
844
+ candidate.embedding ??= parseEmbedding(row.vector_json ?? null, row.vector_dims ?? null);
845
+ stringMatchSeeds.add(row.path);
846
+ if (!fileCache.has(row.path)) {
847
+ fileCache.set(row.path, {
848
+ content: row.content,
849
+ lang: row.lang,
850
+ ext: row.ext,
851
+ totalLines: candidate.totalLines ?? 0,
852
+ embedding: candidate.embedding,
853
+ });
854
+ }
855
+ }
856
+ }
857
+ // キーワードマッチング(通常の重み)- 統合クエリでパフォーマンス改善
858
+ if (extractedTerms.keywords.length > 0) {
859
+ const keywordPlaceholders = extractedTerms.keywords
860
+ .map(() => "b.content ILIKE '%' || ? || '%'")
861
+ .join(" OR ");
862
+ const rows = await db.all(`
863
+ SELECT f.path, f.lang, f.ext, f.is_binary, b.content, fe.vector_json, fe.dims AS vector_dims
864
+ FROM file f
865
+ JOIN blob b ON b.hash = f.blob_hash
866
+ LEFT JOIN file_embedding fe
867
+ ON fe.repo_id = f.repo_id
868
+ AND fe.path = f.path
869
+ WHERE f.repo_id = ?
870
+ AND f.is_binary = FALSE
871
+ AND (${keywordPlaceholders})
872
+ ORDER BY f.path
873
+ LIMIT ?
874
+ `, [repoId, ...extractedTerms.keywords, MAX_MATCHES_PER_KEYWORD * extractedTerms.keywords.length]);
875
+ const boostProfile = params.boost_profile ?? "default";
876
+ for (const row of rows) {
877
+ if (row.content === null) {
878
+ continue;
879
+ }
880
+ // どのキーワードにマッチしたかをチェック
881
+ const lowerContent = row.content.toLowerCase();
882
+ const matchedKeywords = extractedTerms.keywords.filter((keyword) => lowerContent.includes(keyword));
883
+ if (matchedKeywords.length === 0) {
884
+ continue; // Should not happen, but defensive check
885
+ }
886
+ const candidate = ensureCandidate(candidates, row.path);
887
+ // 各マッチしたキーワードに対してスコアリング
888
+ for (const keyword of matchedKeywords) {
889
+ candidate.score += weights.textMatch;
890
+ candidate.reasons.add(`text:${keyword}`);
891
+ }
892
+ // Apply boost profile once per file
893
+ applyBoostProfile(candidate, row, boostProfile, extractedTerms, weights.pathMatch);
894
+ // Use first matched keyword for preview (guaranteed to exist due to length check above)
895
+ const { line } = buildPreview(row.content, matchedKeywords[0]);
688
896
  candidate.matchLine =
689
897
  candidate.matchLine === null ? line : Math.min(candidate.matchLine, line);
690
898
  candidate.content ??= row.content;