kiri-mcp-server 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -5
- package/config/default.example.yml +9 -0
- package/config/scoring-profiles.yml +11 -6
- package/dist/config/default.example.yml +9 -0
- package/dist/config/scoring-profiles.yml +11 -6
- package/dist/package.json +1 -1
- package/dist/server/context.js +0 -1
- package/dist/server/handlers.js +547 -79
- package/dist/server/scoring.js +8 -3
- package/dist/shared/duckdb.js +0 -2
- package/dist/shared/embedding.js +15 -2
- package/dist/shared/tokenizer.js +0 -1
- package/dist/shared/utils/simpleYaml.js +0 -1
- package/dist/src/server/handlers.d.ts.map +1 -1
- package/dist/src/server/handlers.js +234 -26
- package/dist/src/server/handlers.js.map +1 -1
- package/dist/src/server/rpc.d.ts.map +1 -1
- package/dist/src/server/rpc.js +9 -3
- package/dist/src/server/rpc.js.map +1 -1
- package/dist/src/server/scoring.d.ts +2 -0
- package/dist/src/server/scoring.d.ts.map +1 -1
- package/dist/src/server/scoring.js +13 -1
- package/dist/src/server/scoring.js.map +1 -1
- package/dist/src/shared/duckdb.d.ts +1 -0
- package/dist/src/shared/duckdb.d.ts.map +1 -1
- package/dist/src/shared/duckdb.js +54 -3
- package/dist/src/shared/duckdb.js.map +1 -1
- package/dist/src/shared/embedding.d.ts.map +1 -1
- package/dist/src/shared/embedding.js +2 -8
- package/dist/src/shared/embedding.js.map +1 -1
- package/dist/src/shared/tokenizer.d.ts +18 -0
- package/dist/src/shared/tokenizer.d.ts.map +1 -1
- package/dist/src/shared/tokenizer.js +35 -0
- package/dist/src/shared/tokenizer.js.map +1 -1
- package/package.json +1 -1
|
@@ -2,4 +2,39 @@ export function encode(text) {
|
|
|
2
2
|
const codePoints = Array.from(text);
|
|
3
3
|
return codePoints.map((_, index) => index);
|
|
4
4
|
}
|
|
5
|
+
/**
|
|
6
|
+
* 環境変数からトークン化戦略を取得
|
|
7
|
+
*/
|
|
8
|
+
export function getTokenizationStrategy() {
|
|
9
|
+
const strategy = process.env.KIRI_TOKENIZATION_STRATEGY?.toLowerCase();
|
|
10
|
+
if (strategy === "legacy" || strategy === "hybrid") {
|
|
11
|
+
return strategy;
|
|
12
|
+
}
|
|
13
|
+
return "phrase-aware"; // デフォルト
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* テキストをトークンに分割
|
|
17
|
+
* Unicode文字に対応し、戦略に応じてハイフンの扱いを変更
|
|
18
|
+
*
|
|
19
|
+
* @param text - トークン化するテキスト
|
|
20
|
+
* @param strategy - トークン化戦略(省略時は環境変数から取得)
|
|
21
|
+
* @returns トークンの配列
|
|
22
|
+
*/
|
|
23
|
+
export function tokenizeText(text, strategy) {
|
|
24
|
+
const effectiveStrategy = strategy ?? getTokenizationStrategy();
|
|
25
|
+
// レガシーモード: ハイフンも分割(従来の動作)
|
|
26
|
+
if (effectiveStrategy === "legacy") {
|
|
27
|
+
return text
|
|
28
|
+
.toLowerCase()
|
|
29
|
+
.split(/[^\p{L}\p{N}_]+/u)
|
|
30
|
+
.map((token) => token.trim())
|
|
31
|
+
.filter((token) => token.length > 0);
|
|
32
|
+
}
|
|
33
|
+
// phrase-aware または hybrid モード: ハイフンを保持
|
|
34
|
+
return text
|
|
35
|
+
.toLowerCase()
|
|
36
|
+
.split(/[^\p{L}\p{N}_-]+/u)
|
|
37
|
+
.map((token) => token.trim())
|
|
38
|
+
.filter((token) => token.length > 0);
|
|
39
|
+
}
|
|
5
40
|
//# sourceMappingURL=tokenizer.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../../src/shared/tokenizer.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,MAAM,CAAC,IAAY;IACjC,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpC,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC;AAC7C,CAAC"}
|
|
1
|
+
{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../../src/shared/tokenizer.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,MAAM,CAAC,IAAY;IACjC,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpC,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC;AAC7C,CAAC;AASD;;GAEG;AACH,MAAM,UAAU,uBAAuB;IACrC,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,0BAA0B,EAAE,WAAW,EAAE,CAAC;IACvE,IAAI,QAAQ,KAAK,QAAQ,IAAI,QAAQ,KAAK,QAAQ,EAAE,CAAC;QACnD,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,OAAO,cAAc,CAAC,CAAC,QAAQ;AACjC,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,QAA+B;IACxE,MAAM,iBAAiB,GAAG,QAAQ,IAAI,uBAAuB,EAAE,CAAC;IAEhE,0BAA0B;IAC1B,IAAI,iBAAiB,KAAK,QAAQ,EAAE,CAAC;QACnC,OAAO,IAAI;aACR,WAAW,EAAE;aACb,KAAK,CAAC,kBAAkB,CAAC;aACzB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;aAC5B,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACzC,CAAC;IAED,uCAAuC;IACvC,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,mBAAmB,CAAC;SAC1B,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;SAC5B,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACzC,CAAC"}
|