@osqi/cmcc-tariff-rag 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -0
- package/config.example.yaml +18 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +37 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +5 -0
- package/dist/config.js +9 -0
- package/dist/config.js.map +1 -0
- package/dist/database.d.ts +9 -0
- package/dist/database.js +64 -0
- package/dist/database.js.map +1 -0
- package/dist/embedding.d.ts +1 -0
- package/dist/embedding.js +39 -0
- package/dist/embedding.js.map +1 -0
- package/dist/import.d.ts +11 -0
- package/dist/import.js +100 -0
- package/dist/import.js.map +1 -0
- package/dist/mcp-server.d.ts +1 -0
- package/dist/mcp-server.js +22 -0
- package/dist/mcp-server.js.map +1 -0
- package/dist/search.d.ts +16 -0
- package/dist/search.js +74 -0
- package/dist/search.js.map +1 -0
- package/dist/tests/import-search.test.d.ts +1 -0
- package/dist/tests/import-search.test.js +37 -0
- package/dist/tests/import-search.test.js.map +1 -0
- package/dist/types.d.ts +46 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +32 -0
package/README.md
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# CMCC Tariff RAG for Hermes Agent
|
|
2
|
+
|
|
3
|
+
本项目把广东移动资费 JSONL 导入本地 LanceDB,并通过只读 MCP 工具提供给 Hermes Agent。无需 Docker、Python或云数据库。
|
|
4
|
+
|
|
5
|
+
## 安装与首次导入
|
|
6
|
+
|
|
7
|
+
在 PowerShell 中使用 `npm.cmd` 可避开部分 Windows 的 PowerShell 脚本执行策略:
|
|
8
|
+
|
|
9
|
+
```powershell
|
|
10
|
+
cd D:\codex\cmcc\hermes-rag
|
|
11
|
+
npm.cmd install
|
|
12
|
+
npm.cmd run build
|
|
13
|
+
npm.cmd link
|
|
14
|
+
npx.cmd @osqi/cmcc-tariff-rag import D:\codex\cmcc\广东移动资费_RAG.jsonl
|
|
15
|
+
npx.cmd @osqi/cmcc-tariff-rag status
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
首次导入会从 Hugging Face 下载中文模型 `bge-small-zh-v1.5` 到 `models`。该模型生成 512 维向量,后续查询可离线运行。
|
|
19
|
+
|
|
20
|
+
## 更新数据
|
|
21
|
+
|
|
22
|
+
```powershell
|
|
23
|
+
npx.cmd @osqi/cmcc-tariff-rag import D:\codex\cmcc\广东移动资费_RAG.jsonl
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
相同记录跳过向量生成;新增和内容变化的记录才重新生成向量。新版缺失的旧方案保留并标记为非最新版。
|
|
27
|
+
|
|
28
|
+
全量重建会先备份数据库:
|
|
29
|
+
|
|
30
|
+
```powershell
|
|
31
|
+
npx.cmd @osqi/cmcc-tariff-rag rebuild D:\codex\cmcc\广东移动资费_RAG.jsonl
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
从 0.1.x 升级到 0.2.x 时必须执行一次 `rebuild`:旧版使用 384 维 multilingual-e5-small,新版使用 512 维中文 BGE,两个向量库不能混用。
|
|
35
|
+
|
|
36
|
+
## Hermes 配置
|
|
37
|
+
|
|
38
|
+
将 `config.example.yaml` 中的 `mcp_servers` 合并到 Hermes 的 `config.yaml`。如果包只在本机 `npm link`,建议将命令改成绝对路径,避免 Hermes 的 PATH 不同:
|
|
39
|
+
|
|
40
|
+
```yaml
|
|
41
|
+
command: "D:\\Program Files\\nodejs\\npx.cmd"
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
保存后在 Hermes 中执行 `/reload-mcp` 或重启。可先询问:“告诉我当前有哪些 MCP 工具”。
|
|
45
|
+
|
|
46
|
+
## 测试查询
|
|
47
|
+
|
|
48
|
+
```powershell
|
|
49
|
+
npx.cmd @osqi/cmcc-tariff-rag search "广东现在可办的1000M宽带优惠"
|
|
50
|
+
npm.cmd test
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
回答应引用资费名称、方案编号和最新确认时间;当前搜索固定排除过期或非最新版方案。
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
mcp_servers:
|
|
2
|
+
cmcc_tariffs:
|
|
3
|
+
command: "npx"
|
|
4
|
+
args:
|
|
5
|
+
- "--yes"
|
|
6
|
+
- "@osqi/cmcc-tariff-rag"
|
|
7
|
+
- "serve"
|
|
8
|
+
env:
|
|
9
|
+
CMCC_RAG_DB: "D:\\codex\\cmcc\\hermes-rag\\data\\lancedb"
|
|
10
|
+
CMCC_RAG_MODELS: "D:\\codex\\cmcc\\hermes-rag\\models"
|
|
11
|
+
CMCC_RAG_MODEL: "Xenova/bge-small-zh-v1.5"
|
|
12
|
+
tools:
|
|
13
|
+
include:
|
|
14
|
+
- search_current_tariffs
|
|
15
|
+
- search_historical_tariffs
|
|
16
|
+
- get_tariff_by_plan_code
|
|
17
|
+
resources: false
|
|
18
|
+
prompts: false
|
package/dist/cli.d.ts
ADDED
package/dist/cli.js
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { importJsonl } from "./import.js";
|
|
4
|
+
import { DB_PATH } from "./config.js";
|
|
5
|
+
import { readAllRows } from "./database.js";
|
|
6
|
+
import { searchTariffs } from "./search.js";
|
|
7
|
+
import { serveMcp } from "./mcp-server.js";
|
|
8
|
+
function usage() {
|
|
9
|
+
console.error("用法:cmcc-tariff-rag <import|rebuild|status|search|serve> [参数]");
|
|
10
|
+
process.exit(2);
|
|
11
|
+
}
|
|
12
|
+
async function main() {
|
|
13
|
+
const [command, ...args] = process.argv.slice(2);
|
|
14
|
+
if (command === "import" || command === "rebuild") {
|
|
15
|
+
if (!args[0])
|
|
16
|
+
usage();
|
|
17
|
+
const summary = await importJsonl(path.resolve(args[0]), command === "rebuild");
|
|
18
|
+
console.log(JSON.stringify(summary, null, 2));
|
|
19
|
+
}
|
|
20
|
+
else if (command === "status") {
|
|
21
|
+
const rows = await readAllRows();
|
|
22
|
+
const statusCounts = rows.reduce((out, row) => { out[row.status] = (out[row.status] ?? 0) + 1; return out; }, {});
|
|
23
|
+
console.log(JSON.stringify({ database: DB_PATH, total: rows.length, latest: rows.filter((row) => row.is_latest).length, status_counts: statusCounts }, null, 2));
|
|
24
|
+
}
|
|
25
|
+
else if (command === "search") {
|
|
26
|
+
if (!args.length)
|
|
27
|
+
usage();
|
|
28
|
+
console.log(JSON.stringify(await searchTariffs(args.join(" "), { mode: "current", limit: 5 }), null, 2));
|
|
29
|
+
}
|
|
30
|
+
else if (command === "serve") {
|
|
31
|
+
await serveMcp();
|
|
32
|
+
}
|
|
33
|
+
else
|
|
34
|
+
usage();
|
|
35
|
+
}
|
|
36
|
+
main().catch((error) => { console.error(error instanceof Error ? error.stack ?? error.message : String(error)); process.exit(1); });
|
|
37
|
+
//# sourceMappingURL=cli.js.map
|
package/dist/cli.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC1C,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AACtC,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAE3C,SAAS,KAAK;IACZ,OAAO,CAAC,KAAK,CAAC,8DAA8D,CAAC,CAAC;IAC9E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,MAAM,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACjD,IAAI,OAAO,KAAK,QAAQ,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;QAClD,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;YAAE,KAAK,EAAE,CAAC;QACtB,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,OAAO,KAAK,SAAS,CAAC,CAAC;QAChF,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAChD,CAAC;SAAM,IAAI,OAAO,KAAK,QAAQ,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,MAAM,WAAW,EAAE,CAAC;QACjC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAyB,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,GAAG,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC1I,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,MAAM,EAAE,aAAa,EAAE,YAAY,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACnK,CAAC;SAAM,IAAI,OAAO,KAAK,QAAQ,EAAE,CAAC;QAChC,IAAI,CAAC,IAAI,CAAC,MAAM;YAAE,KAAK,EAAE,CAAC;QAC1B,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAC3G,CAAC;SAAM,IAAI,OAAO,KAAK,OAAO,EAAE,CAAC;QAC/B,MAAM,QAAQ,EAAE,CAAC;IACnB,CAAC;;QAAM,KAAK,EAAE,CAAC;AACjB,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC"}
|
package/dist/config.d.ts
ADDED
package/dist/config.js
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import { fileURLToPath } from "node:url";
|
|
3
|
+
const packageRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
|
|
4
|
+
export const TABLE_NAME = "tariffs";
|
|
5
|
+
export const EMBEDDING_MODEL = process.env.CMCC_RAG_MODEL || "Xenova/bge-small-zh-v1.5";
|
|
6
|
+
export const VECTOR_SIZE = 512;
|
|
7
|
+
export const DB_PATH = path.resolve(process.env.CMCC_RAG_DB || path.join(packageRoot, "data", "lancedb"));
|
|
8
|
+
export const MODEL_CACHE = path.resolve(process.env.CMCC_RAG_MODELS || path.join(packageRoot, "models"));
|
|
9
|
+
//# sourceMappingURL=config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEzC,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;AAErF,MAAM,CAAC,MAAM,UAAU,GAAG,SAAS,CAAC;AACpC,MAAM,CAAC,MAAM,eAAe,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,0BAA0B,CAAC;AACxF,MAAM,CAAC,MAAM,WAAW,GAAG,GAAG,CAAC;AAC/B,MAAM,CAAC,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC;AAC1G,MAAM,CAAC,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
2
|
+
import type { TariffRow } from "./types.js";
|
|
3
|
+
export declare function connectDb(): Promise<lancedb.Connection>;
|
|
4
|
+
export declare function tableExists(db: Awaited<ReturnType<typeof connectDb>>): Promise<boolean>;
|
|
5
|
+
export declare function readAllRows(): Promise<TariffRow[]>;
|
|
6
|
+
export declare function replaceRows(rows: TariffRow[]): Promise<void>;
|
|
7
|
+
export declare function openTariffTable(): Promise<lancedb.Table>;
|
|
8
|
+
export declare function backupDatabase(): Promise<string | null>;
|
|
9
|
+
export declare function clearDatabase(): Promise<void>;
|
package/dist/database.js
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
4
|
+
import { DB_PATH, TABLE_NAME } from "./config.js";
|
|
5
|
+
export async function connectDb() {
|
|
6
|
+
await fs.mkdir(DB_PATH, { recursive: true });
|
|
7
|
+
return lancedb.connect(DB_PATH);
|
|
8
|
+
}
|
|
9
|
+
export async function tableExists(db) {
|
|
10
|
+
return (await db.tableNames()).includes(TABLE_NAME);
|
|
11
|
+
}
|
|
12
|
+
export async function readAllRows() {
|
|
13
|
+
const db = await connectDb();
|
|
14
|
+
if (!(await tableExists(db)))
|
|
15
|
+
return [];
|
|
16
|
+
const table = await db.openTable(TABLE_NAME);
|
|
17
|
+
const rows = (await table.query().toArray());
|
|
18
|
+
return rows.map((row) => {
|
|
19
|
+
const value = row.vector;
|
|
20
|
+
let vector;
|
|
21
|
+
if (typeof value?.toArray === "function")
|
|
22
|
+
vector = Array.from(value.toArray());
|
|
23
|
+
else if (typeof value?.get === "function" && typeof value.length === "number")
|
|
24
|
+
vector = Array.from({ length: value.length }, (_, index) => value.get(index));
|
|
25
|
+
else
|
|
26
|
+
vector = Array.from(value);
|
|
27
|
+
return { ...row, vector };
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
export async function replaceRows(rows) {
|
|
31
|
+
if (!rows.length)
|
|
32
|
+
throw new Error("Cannot create an empty tariff database");
|
|
33
|
+
const normalized = rows.map((row) => {
|
|
34
|
+
if (!row.vector || typeof row.vector.length !== "number") {
|
|
35
|
+
throw new Error(`方案 ${row.plan_code} 缺少有效向量`);
|
|
36
|
+
}
|
|
37
|
+
return { ...row, vector: Array.from(row.vector) };
|
|
38
|
+
});
|
|
39
|
+
const db = await connectDb();
|
|
40
|
+
await db.createTable(TABLE_NAME, normalized, { mode: "overwrite" });
|
|
41
|
+
}
|
|
42
|
+
export async function openTariffTable() {
|
|
43
|
+
const db = await connectDb();
|
|
44
|
+
if (!(await tableExists(db)))
|
|
45
|
+
throw new Error("资费库尚未导入,请先运行 import 命令");
|
|
46
|
+
return db.openTable(TABLE_NAME);
|
|
47
|
+
}
|
|
48
|
+
export async function backupDatabase() {
|
|
49
|
+
try {
|
|
50
|
+
await fs.access(DB_PATH);
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
const stamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
56
|
+
const backup = path.join(path.dirname(DB_PATH), "backups", stamp);
|
|
57
|
+
await fs.mkdir(path.dirname(backup), { recursive: true });
|
|
58
|
+
await fs.cp(DB_PATH, backup, { recursive: true });
|
|
59
|
+
return backup;
|
|
60
|
+
}
|
|
61
|
+
export async function clearDatabase() {
|
|
62
|
+
await fs.rm(DB_PATH, { recursive: true, force: true });
|
|
63
|
+
}
|
|
64
|
+
//# sourceMappingURL=database.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"database.js","sourceRoot":"","sources":["../src/database.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,KAAK,OAAO,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGlD,MAAM,CAAC,KAAK,UAAU,SAAS;IAC7B,MAAM,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC7C,OAAO,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;AAClC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,EAAyC;IACzE,OAAO,CAAC,MAAM,EAAE,CAAC,UAAU,EAAE,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;AACtD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW;IAC/B,MAAM,EAAE,GAAG,MAAM,SAAS,EAAE,CAAC;IAC7B,IAAI,CAAC,CAAC,MAAM,WAAW,CAAC,EAAE,CAAC,CAAC;QAAE,OAAO,EAAE,CAAC;IACxC,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IAC7C,MAAM,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,KAAK,EAAE,CAAC,OAAO,EAAE,CAAsD,CAAC;IAClG,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACtB,MAAM,KAAK,GAAG,GAAG,CAAC,MAAiG,CAAC;QACpH,IAAI,MAAgB,CAAC;QACrB,IAAI,OAAO,KAAK,EAAE,OAAO,KAAK,UAAU;YAAE,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;aAC1E,IAAI,OAAO,KAAK,EAAE,GAAG,KAAK,UAAU,IAAI,OAAO,KAAK,CAAC,MAAM,KAAK,QAAQ;YAAE,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,GAAI,CAAC,KAAK,CAAC,CAAC,CAAC;;YACzJ,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,KAA0B,CAAC,CAAC;QACrD,OAAO,EAAE,GAAG,GAAG,EAAE,MAAM,EAAe,CAAC;IACzC,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,IAAiB;IACjD,IAAI,CAAC,IAAI,CAAC,MAAM;QAAE,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;IAC5E,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAClC,IAAI,CAAC,GAAG,CAAC,MAAM,IAAI,OAAQ,GAAG,CAAC,MAAyC,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;YAC7F,MAAM,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,SAAS,SAAS,CAAC,CAAC;QAChD,CAAC;QACD,OAAO,EAAE,GAAG,GAAG,EAAE,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,MAAsC,CAAC,EAAE,CAAC;IACpF,CAAC,CAAC,CAAC;IACH,MAAM,EAAE,GAAG,MAAM,SAAS,EAAE,CAAC;IAC7B,MAAM,EAAE,CAAC,WAAW,CAAC,UAAU,EAAE,UAAkD,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC;AAC9G,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe;IACnC,MAAM,EAAE,GAAG,MAAM,SAAS,EAAE,CAAC;IAC7B,IAAI,CAAC,CAAC,MAAM,WAAW,CAAC,EAAE,CAAC,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;IACxE,OAAO,EAAE,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;AAClC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc;IAClC,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC3B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,KAAK,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IAC7D,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,SAAS,EAAE,KAAK,CAAC,CAAC;IAClE,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC1D,MAAM,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAClD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa;IACjC,MAAM,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;AACzD,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function embedTexts(texts: string[], kind: "query" | "passage"): Promise<number[][]>;
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { EMBEDDING_MODEL, MODEL_CACHE, VECTOR_SIZE } from "./config.js";
|
|
3
|
+
let extractorPromise;
|
|
4
|
+
async function loadExtractor() {
|
|
5
|
+
if (process.env.CMCC_RAG_EMBEDDER === "mock") {
|
|
6
|
+
return async (input) => {
|
|
7
|
+
const texts = Array.isArray(input) ? input : [input];
|
|
8
|
+
const values = new Float32Array(texts.length * VECTOR_SIZE);
|
|
9
|
+
texts.forEach((text, row) => {
|
|
10
|
+
const digest = createHash("sha256").update(text).digest();
|
|
11
|
+
for (let i = 0; i < VECTOR_SIZE; i += 1)
|
|
12
|
+
values[row * VECTOR_SIZE + i] = (digest[i % digest.length] - 127.5) / 127.5;
|
|
13
|
+
});
|
|
14
|
+
return { data: values, dims: [texts.length, VECTOR_SIZE] };
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
const { pipeline, env } = await import("@huggingface/transformers");
|
|
18
|
+
env.cacheDir = MODEL_CACHE;
|
|
19
|
+
return await pipeline("feature-extraction", EMBEDDING_MODEL, { dtype: "q8" });
|
|
20
|
+
}
|
|
21
|
+
async function getExtractor() {
|
|
22
|
+
extractorPromise ??= loadExtractor();
|
|
23
|
+
return extractorPromise;
|
|
24
|
+
}
|
|
25
|
+
export async function embedTexts(texts, kind) {
|
|
26
|
+
if (!texts.length)
|
|
27
|
+
return [];
|
|
28
|
+
const extractor = await getExtractor();
|
|
29
|
+
const prepared = texts.map((text) => kind === "query"
|
|
30
|
+
? `为这个句子生成表示以用于检索相关文章:${text}`
|
|
31
|
+
: text);
|
|
32
|
+
const output = await extractor(prepared, { pooling: "cls", normalize: true });
|
|
33
|
+
const dimensions = output.dims.at(-1) ?? VECTOR_SIZE;
|
|
34
|
+
if (dimensions !== VECTOR_SIZE)
|
|
35
|
+
throw new Error(`向量维度不匹配:模型返回 ${dimensions},期望 ${VECTOR_SIZE}`);
|
|
36
|
+
const flat = Array.from(output.data);
|
|
37
|
+
return prepared.map((_, index) => flat.slice(index * dimensions, (index + 1) * dimensions));
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=embedding.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedding.js","sourceRoot":"","sources":["../src/embedding.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,eAAe,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAGxE,IAAI,gBAAgD,CAAC;AAErD,KAAK,UAAU,aAAa;IAC1B,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,KAAK,MAAM,EAAE,CAAC;QAC7C,OAAO,KAAK,EAAE,KAAK,EAAE,EAAE;YACrB,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACrD,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,GAAG,WAAW,CAAC,CAAC;YAC5D,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE;gBAC1B,MAAM,MAAM,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC;gBAC1D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,IAAI,CAAC;oBAAE,MAAM,CAAC,GAAG,GAAG,WAAW,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC,GAAG,KAAK,CAAC;YACvH,CAAC,CAAC,CAAC;YACH,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,WAAW,CAAC,EAAE,CAAC;QAC7D,CAAC,CAAC;IACJ,CAAC;IACD,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;IACpE,GAAG,CAAC,QAAQ,GAAG,WAAW,CAAC;IAC3B,OAAO,MAAM,QAAQ,CAAC,oBAAoB,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAyB,CAAC;AACxG,CAAC;AAED,KAAK,UAAU,YAAY;IACzB,gBAAgB,KAAK,aAAa,EAAE,CAAC;IACrC,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,KAAe,EAAE,IAAyB;IACzE,IAAI,CAAC,KAAK,CAAC,MAAM;QAAE,OAAO,EAAE,CAAC;IAC7B,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;IACvC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,OAAO;QACnD,CAAC,CAAC,sBAAsB,IAAI,EAAE;QAC9B,CAAC,CAAC,IAAI,CAAC,CAAC;IACV,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC9E,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,WAAW,CAAC;IACrD,IAAI,UAAU,KAAK,WAAW;QAAE,MAAM,IAAI,KAAK,CAAC,gBAAgB,UAAU,OAAO,WAAW,EAAE,CAAC,CAAC;IAChG,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACrC,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,UAAU,EAAE,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC;AAC9F,CAAC"}
|
package/dist/import.d.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export interface ImportSummary {
|
|
2
|
+
total: number;
|
|
3
|
+
added: number;
|
|
4
|
+
updated: number;
|
|
5
|
+
unchanged: number;
|
|
6
|
+
missing: number;
|
|
7
|
+
failed: number;
|
|
8
|
+
errors: string[];
|
|
9
|
+
backup?: string | null;
|
|
10
|
+
}
|
|
11
|
+
export declare function importJsonl(file: string, rebuild?: boolean): Promise<ImportSummary>;
|
package/dist/import.js
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import fs from "node:fs";
|
|
3
|
+
import readline from "node:readline";
|
|
4
|
+
import { backupDatabase, clearDatabase, readAllRows, replaceRows } from "./database.js";
|
|
5
|
+
import { embedTexts } from "./embedding.js";
|
|
6
|
+
function stringValue(value) { return value == null ? "" : String(value); }
|
|
7
|
+
function contentHash(doc) {
|
|
8
|
+
return createHash("sha256").update(JSON.stringify({ title: doc.title, content: doc.content, metadata: doc.metadata, raw_fields: doc.raw_fields })).digest("hex");
|
|
9
|
+
}
|
|
10
|
+
async function readJsonl(file) {
|
|
11
|
+
const docs = [];
|
|
12
|
+
const errors = [];
|
|
13
|
+
const lines = readline.createInterface({ input: fs.createReadStream(file, { encoding: "utf8" }), crlfDelay: Infinity });
|
|
14
|
+
let lineNumber = 0;
|
|
15
|
+
for await (const line of lines) {
|
|
16
|
+
lineNumber += 1;
|
|
17
|
+
if (!line.trim())
|
|
18
|
+
continue;
|
|
19
|
+
try {
|
|
20
|
+
const doc = JSON.parse(line);
|
|
21
|
+
const code = stringValue(doc.metadata?.plan_code);
|
|
22
|
+
if (!code || !doc.title || !doc.content || !doc.metadata?.status || !doc.metadata?.confirmed_at)
|
|
23
|
+
throw new Error("缺少 plan_code/title/content/status/confirmed_at");
|
|
24
|
+
docs.push(doc);
|
|
25
|
+
}
|
|
26
|
+
catch (error) {
|
|
27
|
+
errors.push(`第 ${lineNumber} 行:${error instanceof Error ? error.message : String(error)}`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
return { docs, errors };
|
|
31
|
+
}
|
|
32
|
+
function buildRow(doc, vector, now, batch, firstSeen) {
|
|
33
|
+
const m = doc.metadata;
|
|
34
|
+
return {
|
|
35
|
+
plan_code: stringValue(m.plan_code), document_id: stringValue(doc.document_id), title: doc.title,
|
|
36
|
+
content: doc.content, vector, status: stringValue(m.status), province: stringValue(m.province),
|
|
37
|
+
category_l1: stringValue(m.category_l1), category_l2: stringValue(m.category_l2),
|
|
38
|
+
channel_tags: JSON.stringify(m.channel_tags ?? []), online_date: stringValue(m.online_date),
|
|
39
|
+
offline_date: stringValue(m.offline_date), confirmed_at: stringValue(m.confirmed_at),
|
|
40
|
+
price_raw: stringValue(m.price_raw), content_hash: contentHash(doc), first_seen_at: firstSeen,
|
|
41
|
+
last_seen_at: now, import_batch: batch, is_latest: true,
|
|
42
|
+
metadata_json: JSON.stringify(m), raw_fields_json: JSON.stringify(doc.raw_fields ?? {}),
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
export async function importJsonl(file, rebuild = false) {
|
|
46
|
+
const backup = rebuild ? await backupDatabase() : undefined;
|
|
47
|
+
if (rebuild)
|
|
48
|
+
await clearDatabase();
|
|
49
|
+
const existingRows = rebuild ? [] : await readAllRows();
|
|
50
|
+
const existing = new Map(existingRows.map((row) => [row.plan_code, row]));
|
|
51
|
+
const { docs, errors } = await readJsonl(file);
|
|
52
|
+
const unique = new Map();
|
|
53
|
+
for (const doc of docs) {
|
|
54
|
+
const code = stringValue(doc.metadata.plan_code);
|
|
55
|
+
if (unique.has(code))
|
|
56
|
+
errors.push(`重复方案编号:${code}`);
|
|
57
|
+
else
|
|
58
|
+
unique.set(code, doc);
|
|
59
|
+
}
|
|
60
|
+
const now = new Date().toISOString();
|
|
61
|
+
const batch = now.slice(0, 10);
|
|
62
|
+
const changed = [];
|
|
63
|
+
let added = 0, updated = 0, unchanged = 0;
|
|
64
|
+
for (const doc of unique.values()) {
|
|
65
|
+
const old = existing.get(stringValue(doc.metadata.plan_code));
|
|
66
|
+
if (!old) {
|
|
67
|
+
added += 1;
|
|
68
|
+
changed.push(doc);
|
|
69
|
+
}
|
|
70
|
+
else if (old.content_hash !== contentHash(doc)) {
|
|
71
|
+
updated += 1;
|
|
72
|
+
changed.push(doc);
|
|
73
|
+
}
|
|
74
|
+
else
|
|
75
|
+
unchanged += 1;
|
|
76
|
+
}
|
|
77
|
+
const vectors = [];
|
|
78
|
+
const batchSize = 32;
|
|
79
|
+
for (let i = 0; i < changed.length; i += batchSize) {
|
|
80
|
+
vectors.push(...await embedTexts(changed.slice(i, i + batchSize).map((doc) => doc.content), "passage"));
|
|
81
|
+
const completed = Math.min(i + batchSize, changed.length);
|
|
82
|
+
if (completed === changed.length || completed % 320 === 0)
|
|
83
|
+
console.error(`向量化进度:${completed}/${changed.length}`);
|
|
84
|
+
}
|
|
85
|
+
const vectorByCode = new Map(changed.map((doc, index) => [stringValue(doc.metadata.plan_code), vectors[index]]));
|
|
86
|
+
const rows = [];
|
|
87
|
+
for (const doc of unique.values()) {
|
|
88
|
+
const code = stringValue(doc.metadata.plan_code);
|
|
89
|
+
const old = existing.get(code);
|
|
90
|
+
rows.push(buildRow(doc, vectorByCode.get(code) ?? old.vector, now, batch, old?.first_seen_at ?? now));
|
|
91
|
+
}
|
|
92
|
+
const incomingCodes = new Set(unique.keys());
|
|
93
|
+
const missingRows = existingRows.filter((row) => !incomingCodes.has(row.plan_code)).map((row) => ({ ...row, is_latest: false }));
|
|
94
|
+
rows.push(...missingRows);
|
|
95
|
+
if (!rows.length)
|
|
96
|
+
throw new Error("没有可导入的有效记录");
|
|
97
|
+
await replaceRows(rows);
|
|
98
|
+
return { total: unique.size, added, updated, unchanged, missing: missingRows.length, failed: errors.length, errors, backup };
|
|
99
|
+
}
|
|
100
|
+
//# sourceMappingURL=import.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"import.js","sourceRoot":"","sources":["../src/import.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,QAAQ,MAAM,eAAe,CAAC;AACrC,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AACxF,OAAO,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAc5C,SAAS,WAAW,CAAC,KAAc,IAAY,OAAO,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;AAC3F,SAAS,WAAW,CAAC,GAAgB;IACnC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,GAAG,CAAC,OAAO,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,UAAU,EAAE,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AACnK,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,IAAY;IACnC,MAAM,IAAI,GAAkB,EAAE,CAAC;IAC/B,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,KAAK,GAAG,QAAQ,CAAC,eAAe,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,gBAAgB,CAAC,IAAI,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAC;IACxH,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC/B,UAAU,IAAI,CAAC,CAAC;QAChB,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YAAE,SAAS;QAC3B,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAgB,CAAC;YAC5C,MAAM,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YAClD,IAAI,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,GAAG,CAAC,OAAO,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,YAAY;gBAAE,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;YACnK,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC,KAAK,UAAU,MAAM,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAC7F,CAAC;IACH,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;AAC1B,CAAC;AAED,SAAS,QAAQ,CAAC,GAAgB,EAAE,MAAgB,EAAE,GAAW,EAAE,KAAa,EAAE,SAAiB;IACjG,MAAM,CAAC,GAAG,GAAG,CAAC,QAAQ,CAAC;IACvB,OAAO;QACL,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,WAAW,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK;QAChG,OAAO,EAAE,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC;QAC9F,WAAW,EAAE,WAAW,CAAC,CAAC,CAAC,WAAW,CAAC,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC,CAAC,WAAW,CAAC;QAChF,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,YAAY,IAAI,EAAE,CAAC,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC,CAAC,WAAW,CAAC;QAC3F,YAAY,EAAE,WAAW,CAAC,CAAC,CAAC,YAAY,CAAC,EAAE,YAAY,EAAE,WAAW,CAAC,CAAC,CAAC,YAAY,CAAC;QACpF,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,EAAE,YAAY,EAAE,WAAW,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,SAAS;QAC7F,YAAY,EAAE,GAAG,EAAE,YAAY,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI;QACvD,aAAa,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,eAAe,EAAE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,UAAU,IAAI,EAAE,CAAC;KACxF,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,IAAY,EAAE,OAAO,GAAG,KAAK;IAC7D,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,MAAM,cAAc,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAC5D,IAAI,OAAO;QAAE,MAAM,aAAa,EAAE,CAAC;IACnC,MAAM,YAAY,GAAG,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,WAAW,EAAE,CAAC;IACxD,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;IAC1E,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;IAC/C,MAAM,MAAM,GAAG,IAAI,GAAG,EAAuB,CAAC;IAC9C,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;QACjD,IAAI,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,MAAM,CAAC,IAAI,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC;;YAC/C,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC/B,MAAM,OAAO,GAAkB,EAAE,CAAC;IAClC,IAAI,KAAK,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC,EAAE,SAAS,GAAG,CAAC,CAAC;IAC1C,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;QAClC,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;QAC9D,IAAI,CAAC,GAAG,EAAE,CAAC;YAAC,KAAK,IAAI,CAAC,CAAC;YAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAAC,CAAC;aACvC,IAAI,GAAG,CAAC,YAAY,KAAK,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;YAAC,OAAO,IAAI,CAAC,CAAC;YAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAAC,CAAC;;YAC/E,SAAS,IAAI,CAAC,CAAC;IACtB,CAAC;IAED,MAAM,OAAO,GAAe,EAAE,CAAC;IAC/B,MAAM,SAAS,GAAG,EAAE,CAAC;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QACnD,OAAO,CAAC,IAAI,CAAC,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC;QACxG,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;QAC1D,IAAI,SAAS,KAAK,OAAO,CAAC,MAAM,IAAI,SAAS,GAAG,GAAG,KAAK,CAAC;YAAE,OAAO,CAAC,KAAK,CAAC,SAAS,SAAS,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACnH,CAAC;IACD,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACjH,MAAM,IAAI,GAAgB,EAAE,CAAC;IAC7B,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;QAClC,MAAM,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;QACjD,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAC/B,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,GAAI,CAAC,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,aAAa,IAAI,GAAG,CAAC,CAAC,CAAC;IACzG,CAAC;IACD,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;IAC7C,MAAM,WAAW,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,GAAG,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC;IACjI,IAAI,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;IAC1B,IAAI,CAAC,IAAI,CAAC,MAAM;QAAE,MAAM,IAAI,KAAK,CAAC,YAAY,CAAC,CAAC;IAChD,MAAM,WAAW,CAAC,IAAI,CAAC,CAAC;IACxB,OAAO,EAAE,KAAK,EAAE,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;AAC/H,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function serveMcp(): Promise<void>;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
3
|
+
import { z } from "zod";
|
|
4
|
+
import { getByPlanCode, searchTariffs } from "./search.js";
|
|
5
|
+
function response(value) {
|
|
6
|
+
return { content: [{ type: "text", text: JSON.stringify(value, null, 2) }] };
|
|
7
|
+
}
|
|
8
|
+
export async function serveMcp() {
|
|
9
|
+
const server = new McpServer({ name: "@osqi/cmcc-tariff-rag", version: "0.2.0" });
|
|
10
|
+
const common = {
|
|
11
|
+
query: z.string().min(1).describe("用户的资费查询"),
|
|
12
|
+
region: z.string().optional().describe("省份,例如广东"),
|
|
13
|
+
category: z.string().optional().describe("一级或二级分类"),
|
|
14
|
+
channel: z.string().optional().describe("渠道,例如线上或线下"),
|
|
15
|
+
limit: z.number().int().min(1).max(20).default(5),
|
|
16
|
+
};
|
|
17
|
+
server.registerTool("search_current_tariffs", { description: "搜索当前有效且最新版的广东移动资费。必须引用资费名称、方案编号和确认时间。", inputSchema: common }, async (args) => response(await searchTariffs(args.query, { ...args, mode: "current" })));
|
|
18
|
+
server.registerTool("search_historical_tariffs", { description: "搜索已下线或非最新版的历史广东移动资费。结果不得作为当前可办理方案推荐。", inputSchema: common }, async (args) => response(await searchTariffs(args.query, { ...args, mode: "history" })));
|
|
19
|
+
server.registerTool("get_tariff_by_plan_code", { description: "根据唯一方案编号精确查询资费及办理、退订、违约信息。", inputSchema: { plan_code: z.string().min(1) } }, async ({ plan_code }) => response(await getByPlanCode(plan_code) ?? { error: "未找到该方案编号" }));
|
|
20
|
+
await server.connect(new StdioServerTransport());
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=mcp-server.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mcp-server.js","sourceRoot":"","sources":["../src/mcp-server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE3D,SAAS,QAAQ,CAAC,KAAc;IAC9B,OAAO,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC;AACxF,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,QAAQ;IAC5B,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,IAAI,EAAE,uBAAuB,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC;IAClF,MAAM,MAAM,GAAG;QACb,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC;QAC5C,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC;QACjD,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC;QACnD,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC;QACrD,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;KAClD,CAAC;IACF,MAAM,CAAC,YAAY,CAAC,wBAAwB,EAAE,EAAE,WAAW,EAAE,uCAAuC,EAAE,WAAW,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM,aAAa,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,GAAG,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC;IACtN,MAAM,CAAC,YAAY,CAAC,2BAA2B,EAAE,EAAE,WAAW,EAAE,sCAAsC,EAAE,WAAW,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM,aAAa,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,GAAG,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC;IACxN,MAAM,CAAC,YAAY,CAAC,yBAAyB,EAAE,EAAE,WAAW,EAAE,4BAA4B,EAAE,WAAW,EAAE,EAAE,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM,aAAa,CAAC,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC;IACzO,MAAM,MAAM,CAAC,OAAO,CAAC,IAAI,oBAAoB,EAAE,CAAC,CAAC;AACnD,CAAC"}
|
package/dist/search.d.ts
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { SearchOptions } from "./types.js";
|
|
2
|
+
export interface SearchResult {
|
|
3
|
+
title: string;
|
|
4
|
+
plan_code: string;
|
|
5
|
+
status: string;
|
|
6
|
+
online_date: string;
|
|
7
|
+
offline_date: string;
|
|
8
|
+
confirmed_at: string;
|
|
9
|
+
content: string;
|
|
10
|
+
applicable_scope: unknown;
|
|
11
|
+
cancellation: unknown;
|
|
12
|
+
breach_liability: unknown;
|
|
13
|
+
score: number;
|
|
14
|
+
}
|
|
15
|
+
export declare function searchTariffs(query: string, options: SearchOptions): Promise<SearchResult[]>;
|
|
16
|
+
export declare function getByPlanCode(planCode: string): Promise<SearchResult | null>;
|
package/dist/search.js
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { embedTexts } from "./embedding.js";
|
|
2
|
+
import { readAllRows } from "./database.js";
|
|
3
|
+
function matches(row, options) {
|
|
4
|
+
const statusMatch = options.mode === "current"
|
|
5
|
+
? row.status === "active" && row.is_latest === true
|
|
6
|
+
: row.status === "expired" || row.is_latest === false;
|
|
7
|
+
return statusMatch
|
|
8
|
+
&& (!options.region || row.province === options.region)
|
|
9
|
+
&& (!options.category || row.category_l1 === options.category || row.category_l2 === options.category)
|
|
10
|
+
&& (!options.channel || row.channel_tags.includes(options.channel));
|
|
11
|
+
}
|
|
12
|
+
function tokens(query) {
|
|
13
|
+
return [...new Set(query.match(/[A-Za-z]+\d*|\d+(?:\.\d+)?(?:GB|MB|M|元)?|[\u4e00-\u9fff]{2,}/gi) ?? [])];
|
|
14
|
+
}
|
|
15
|
+
function keywordScore(query, row) {
|
|
16
|
+
const words = tokens(query.toLowerCase());
|
|
17
|
+
if (!words.length)
|
|
18
|
+
return 0;
|
|
19
|
+
const title = row.title.toLowerCase(), content = row.content.toLowerCase(), code = row.plan_code.toLowerCase();
|
|
20
|
+
let score = 0;
|
|
21
|
+
for (const word of words) {
|
|
22
|
+
const w = word.toLowerCase();
|
|
23
|
+
if (code === w)
|
|
24
|
+
score += 8;
|
|
25
|
+
if (title.includes(w))
|
|
26
|
+
score += 4;
|
|
27
|
+
if (content.includes(w))
|
|
28
|
+
score += 1;
|
|
29
|
+
}
|
|
30
|
+
let normalized = score / (words.length * 4);
|
|
31
|
+
const exactTokens = query.match(/\d+(?:\.\d+)?(?:GB|MB|M|元)/gi) ?? [];
|
|
32
|
+
const haystack = `${title}\n${content}`;
|
|
33
|
+
const exactHits = exactTokens.filter((token) => haystack.includes(token.toLowerCase())).length;
|
|
34
|
+
normalized += exactHits;
|
|
35
|
+
if (exactTokens.length > 1 && exactHits === exactTokens.length)
|
|
36
|
+
normalized += 2;
|
|
37
|
+
return normalized;
|
|
38
|
+
}
|
|
39
|
+
function present(row, score) {
|
|
40
|
+
const raw = JSON.parse(row.raw_fields_json || "{}");
|
|
41
|
+
return {
|
|
42
|
+
title: row.title, plan_code: row.plan_code, status: row.status, online_date: row.online_date,
|
|
43
|
+
offline_date: row.offline_date, confirmed_at: row.confirmed_at, content: row.content,
|
|
44
|
+
applicable_scope: raw["适用范围"] ?? "资料未说明", cancellation: raw["退订方式"] ?? "资料未说明",
|
|
45
|
+
breach_liability: raw["违约责任"] ?? "资料未说明", score: Number(score.toFixed(4)),
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
export async function searchTariffs(query, options) {
|
|
49
|
+
if (!query.trim())
|
|
50
|
+
throw new Error("query 不能为空");
|
|
51
|
+
const [queryVector] = await embedTexts([query], "query");
|
|
52
|
+
const rows = (await readAllRows()).filter((row) => matches(row, options));
|
|
53
|
+
const cosine = (a, b) => {
|
|
54
|
+
let dot = 0, normA = 0, normB = 0;
|
|
55
|
+
const length = Math.min(a.length, b.length);
|
|
56
|
+
for (let i = 0; i < length; i += 1) {
|
|
57
|
+
dot += a[i] * b[i];
|
|
58
|
+
normA += a[i] * a[i];
|
|
59
|
+
normB += b[i] * b[i];
|
|
60
|
+
}
|
|
61
|
+
return normA && normB ? dot / Math.sqrt(normA * normB) : 0;
|
|
62
|
+
};
|
|
63
|
+
return rows
|
|
64
|
+
.map((row) => ({ row, semantic: Math.max(0, cosine(queryVector, row.vector)), keyword: keywordScore(query, row) }))
|
|
65
|
+
.map((item) => ({ ...item, score: item.semantic * 0.6 + item.keyword * 0.4 }))
|
|
66
|
+
.sort((a, b) => b.score - a.score)
|
|
67
|
+
.slice(0, Math.min(Math.max(options.limit ?? 5, 1), 20))
|
|
68
|
+
.map((item) => present(item.row, item.score));
|
|
69
|
+
}
|
|
70
|
+
export async function getByPlanCode(planCode) {
|
|
71
|
+
const row = (await readAllRows()).find((item) => item.plan_code === planCode);
|
|
72
|
+
return row ? present(row, 1) : null;
|
|
73
|
+
}
|
|
74
|
+
//# sourceMappingURL=search.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"search.js","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAC5C,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAG5C,SAAS,OAAO,CAAC,GAAc,EAAE,OAAsB;IACrD,MAAM,WAAW,GAAG,OAAO,CAAC,IAAI,KAAK,SAAS;QAC5C,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,SAAS,KAAK,IAAI;QACnD,CAAC,CAAC,GAAG,CAAC,MAAM,KAAK,SAAS,IAAI,GAAG,CAAC,SAAS,KAAK,KAAK,CAAC;IACxD,OAAO,WAAW;WACb,CAAC,CAAC,OAAO,CAAC,MAAM,IAAI,GAAG,CAAC,QAAQ,KAAK,OAAO,CAAC,MAAM,CAAC;WACpD,CAAC,CAAC,OAAO,CAAC,QAAQ,IAAI,GAAG,CAAC,WAAW,KAAK,OAAO,CAAC,QAAQ,IAAI,GAAG,CAAC,WAAW,KAAK,OAAO,CAAC,QAAQ,CAAC;WACnG,CAAC,CAAC,OAAO,CAAC,OAAO,IAAI,GAAG,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;AACxE,CAAC;AAED,SAAS,MAAM,CAAC,KAAa;IAC3B,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,gEAAgE,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;AAC3G,CAAC;AAED,SAAS,YAAY,CAAC,KAAa,EAAE,GAAc;IACjD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;IAC1C,IAAI,CAAC,KAAK,CAAC,MAAM;QAAE,OAAO,CAAC,CAAC;IAC5B,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,WAAW,EAAE,EAAE,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,IAAI,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC;IAC/G,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,CAAC,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAC7B,IAAI,IAAI,KAAK,CAAC;YAAE,KAAK,IAAI,CAAC,CAAC;QAC3B,IAAI,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC;YAAE,KAAK,IAAI,CAAC,CAAC;QAClC,IAAI,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;YAAE,KAAK,IAAI,CAAC,CAAC;IACtC,CAAC;IACD,IAAI,UAAU,GAAG,KAAK,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC5C,MAAM,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,8BAA8B,CAAC,IAAI,EAAE,CAAC;IACtE,MAAM,QAAQ,GAAG,GAAG,KAAK,KAAK,OAAO,EAAE,CAAC;IACxC,MAAM,SAAS,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;IAC/F,UAAU,IAAI,SAAS,CAAC;IACxB,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,IAAI,SAAS,KAAK,WAAW,CAAC,MAAM;QAAE,UAAU,IAAI,CAAC,CAAC;IAChF,OAAO,UAAU,CAAC;AACpB,CAAC;AAQD,SAAS,OAAO,CAAC,GAAuC,EAAE,KAAa;IACrE,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,eAAe,IAAI,IAAI,CAA4B,CAAC;IAC/E,OAAO;QACL,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,SAAS,EAAE,GAAG,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5F,YAAY,EAAE,GAAG,CAAC,YAAY,EAAE,YAAY,EAAE,GAAG,CAAC,YAAY,EAAE,OAAO,EAAE,GAAG,CAAC,OAAO;QACpF,gBAAgB,EAAE,GAAG,CAAC,MAAM,CAAC,IAAI,OAAO,EAAE,YAAY,EAAE,GAAG,CAAC,MAAM,CAAC,IAAI,OAAO;QAC9E,gBAAgB,EAAE,GAAG,CAAC,MAAM,CAAC,IAAI,OAAO,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;KAC1E,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,KAAa,EAAE,OAAsB;IACvE,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE;QAAE,MAAM,IAAI,KAAK,CAAC,YAAY,CAAC,CAAC;IACjD,MAAM,CAAC,WAAW,CAAC,GAAG,MAAM,UAAU,CAAC,CAAC,KAAK,CAAC,EAAE,OAAO,CAAC,CAAC;IACzD,MAAM,IAAI,GAAG,CAAC,MAAM,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,CAAC;IAC1E,MAAM,MAAM,GAAG,CAAC,CAAW,EAAE,CAA+B,EAAU,EAAE;QACtE,IAAI,GAAG,GAAG,CAAC,EAAE,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,CAAC,CAAC;QAClC,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAAC,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAAC,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAAC,CAAC;QACvG,OAAO,KAAK,IAAI,KAAK,CAAC,CAAC,CAAC,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7D,CAAC,CAAC;IACF,OAAO,IAAI;SACR,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC,EAAE,OAAO,EAAE,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;SAClH,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,QAAQ,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,GAAG,GAAG,EAAE,CAAC,CAAC;SAC7E,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;SACjC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;SACvD,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;AAClD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,QAAgB;IAClD,MAAM,GAAG,GAAG,CAAC,MAAM,WAAW,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC;IAC9E,OAAO,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AACtC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import os from "node:os";
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
import test from "node:test";
|
|
6
|
+
process.env.CMCC_RAG_EMBEDDER = "mock";
|
|
7
|
+
process.env.CMCC_RAG_DB = await fs.mkdtemp(path.join(os.tmpdir(), "cmcc-rag-test-"));
|
|
8
|
+
const { importJsonl } = await import("../import.js");
|
|
9
|
+
const { getByPlanCode, searchTariffs } = await import("../search.js");
|
|
10
|
+
const { embedTexts } = await import("../embedding.js");
|
|
11
|
+
function doc(code, status, content) {
|
|
12
|
+
return { document_id: `id-${code}`, title: `${code}资费`, content, metadata: { plan_code: code, status, province: "广东", category_l1: "套餐", category_l2: "个人业务", channel_tags: ["线上"], confirmed_at: "2026-07-01 00:00:00", online_date: "2025-01-01", offline_date: status === "expired" ? "2026-01-01" : "2030-01-01", price_raw: "59元/月" }, raw_fields: { 适用范围: "广东客户", 退订方式: "10086", 违约责任: "无" } };
|
|
13
|
+
}
|
|
14
|
+
async function writeJsonl(file, docs) { await fs.writeFile(file, docs.map((item) => JSON.stringify(item)).join("\n") + "\n", "utf8"); }
|
|
15
|
+
test("BGE embeddings use 512 dimensions", async () => {
|
|
16
|
+
const [vector] = await embedTexts(["广东宽带优惠"], "query");
|
|
17
|
+
assert.equal(vector.length, 512);
|
|
18
|
+
});
|
|
19
|
+
test("imports, incrementally updates, soft-expires and searches", async () => {
|
|
20
|
+
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "cmcc-jsonl-"));
|
|
21
|
+
const first = path.join(dir, "first.jsonl"), second = path.join(dir, "second.jsonl");
|
|
22
|
+
await writeJsonl(first, [doc("A1", "active", "广东1000M宽带优惠"), doc("B1", "expired", "历史20GB流量")]);
|
|
23
|
+
const initial = await importJsonl(first);
|
|
24
|
+
assert.deepEqual({ added: initial.added, updated: initial.updated, unchanged: initial.unchanged }, { added: 2, updated: 0, unchanged: 0 });
|
|
25
|
+
const repeat = await importJsonl(first);
|
|
26
|
+
assert.equal(repeat.unchanged, 2);
|
|
27
|
+
await writeJsonl(second, [doc("A1", "active", "广东1000M宽带优惠,升级版"), doc("C1", "active", "59元20GB套餐")]);
|
|
28
|
+
const changed = await importJsonl(second);
|
|
29
|
+
assert.deepEqual({ added: changed.added, updated: changed.updated, missing: changed.missing }, { added: 1, updated: 1, missing: 1 });
|
|
30
|
+
const exact = await getByPlanCode("A1");
|
|
31
|
+
assert.match(exact.content, /升级版/);
|
|
32
|
+
const current = await searchTariffs("1000M宽带", { mode: "current" });
|
|
33
|
+
assert.equal(current[0].plan_code, "A1");
|
|
34
|
+
const history = await searchTariffs("历史20GB", { mode: "history" });
|
|
35
|
+
assert.equal(history[0].plan_code, "B1");
|
|
36
|
+
});
|
|
37
|
+
//# sourceMappingURL=import-search.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"import-search.test.js","sourceRoot":"","sources":["../../src/tests/import-search.test.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,oBAAoB,CAAC;AACxC,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,MAAM,CAAC;AACvC,OAAO,CAAC,GAAG,CAAC,WAAW,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,gBAAgB,CAAC,CAAC,CAAC;AAErF,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC;AACrD,MAAM,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC;AACtE,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;AAEvD,SAAS,GAAG,CAAC,IAAY,EAAE,MAAc,EAAE,OAAe;IACxD,OAAO,EAAE,WAAW,EAAE,MAAM,IAAI,EAAE,EAAE,KAAK,EAAE,GAAG,IAAI,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,IAAI,CAAC,EAAE,YAAY,EAAE,qBAAqB,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE,EAAE,UAAU,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC;AACvY,CAAC;AAED,KAAK,UAAU,UAAU,CAAC,IAAY,EAAE,IAAe,IAAI,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;AAE1J,IAAI,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;IACnD,MAAM,CAAC,MAAM,CAAC,GAAG,MAAM,UAAU,CAAC,CAAC,QAAQ,CAAC,EAAE,OAAO,CAAC,CAAC;IACvD,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AACnC,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,2DAA2D,EAAE,KAAK,IAAI,EAAE;IAC3E,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,aAAa,CAAC,CAAC,CAAC;IACpE,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;IACrF,MAAM,UAAU,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,QAAQ,EAAE,aAAa,CAAC,EAAE,GAAG,CAAC,IAAI,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;IAChG,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,KAAK,CAAC,CAAC;IACzC,MAAM,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,SAAS,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;IAC3I,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,KAAK,CAAC,CAAC;IACxC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;IAClC,MAAM,UAAU,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,QAAQ,EAAE,iBAAiB,CAAC,EAAE,GAAG,CAAC,IAAI,EAAE,QAAQ,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC;IACrG,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,MAAM,CAAC,CAAC;IAC1C,MAAM,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;IACrI,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,CAAC,KAAK,CAAC,KAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;IACpC,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,CAAC;IACpE,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IACzC,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,CAAC;IACnE,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;AAC3C,CAAC,CAAC,CAAC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
export interface RagDocument {
|
|
2
|
+
document_id: string;
|
|
3
|
+
title: string;
|
|
4
|
+
content: string;
|
|
5
|
+
metadata: Record<string, unknown> & {
|
|
6
|
+
plan_code?: string;
|
|
7
|
+
status?: string;
|
|
8
|
+
province?: string | null;
|
|
9
|
+
category_l1?: string;
|
|
10
|
+
category_l2?: string;
|
|
11
|
+
confirmed_at?: string;
|
|
12
|
+
online_date?: string | null;
|
|
13
|
+
offline_date?: string | null;
|
|
14
|
+
};
|
|
15
|
+
raw_fields: Record<string, unknown>;
|
|
16
|
+
}
|
|
17
|
+
export interface TariffRow {
|
|
18
|
+
plan_code: string;
|
|
19
|
+
document_id: string;
|
|
20
|
+
title: string;
|
|
21
|
+
content: string;
|
|
22
|
+
vector: number[];
|
|
23
|
+
status: string;
|
|
24
|
+
province: string;
|
|
25
|
+
category_l1: string;
|
|
26
|
+
category_l2: string;
|
|
27
|
+
channel_tags: string;
|
|
28
|
+
online_date: string;
|
|
29
|
+
offline_date: string;
|
|
30
|
+
confirmed_at: string;
|
|
31
|
+
price_raw: string;
|
|
32
|
+
content_hash: string;
|
|
33
|
+
first_seen_at: string;
|
|
34
|
+
last_seen_at: string;
|
|
35
|
+
import_batch: string;
|
|
36
|
+
is_latest: boolean;
|
|
37
|
+
metadata_json: string;
|
|
38
|
+
raw_fields_json: string;
|
|
39
|
+
}
|
|
40
|
+
export interface SearchOptions {
|
|
41
|
+
mode: "current" | "history";
|
|
42
|
+
region?: string;
|
|
43
|
+
category?: string;
|
|
44
|
+
channel?: string;
|
|
45
|
+
limit?: number;
|
|
46
|
+
}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
|
package/package.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@osqi/cmcc-tariff-rag",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "Local LanceDB RAG and MCP server for Guangdong CMCC tariffs",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"cmcc-tariff-rag": "./dist/cli.js"
|
|
8
|
+
},
|
|
9
|
+
"files": ["dist", "README.md", "config.example.yaml"],
|
|
10
|
+
"publishConfig": {
|
|
11
|
+
"access": "public"
|
|
12
|
+
},
|
|
13
|
+
"scripts": {
|
|
14
|
+
"build": "tsc -p tsconfig.json",
|
|
15
|
+
"start": "node dist/cli.js serve",
|
|
16
|
+
"test": "npm run build && node --test dist/tests/*.test.js"
|
|
17
|
+
},
|
|
18
|
+
"engines": {
|
|
19
|
+
"node": ">=20"
|
|
20
|
+
},
|
|
21
|
+
"dependencies": {
|
|
22
|
+
"@huggingface/transformers": "4.2.0",
|
|
23
|
+
"@lancedb/lancedb": "0.30.0",
|
|
24
|
+
"@modelcontextprotocol/sdk": "1.29.0",
|
|
25
|
+
"zod": "4.4.3"
|
|
26
|
+
},
|
|
27
|
+
"devDependencies": {
|
|
28
|
+
"@types/node": "^24.0.0",
|
|
29
|
+
"typescript": "6.0.3"
|
|
30
|
+
},
|
|
31
|
+
"license": "MIT"
|
|
32
|
+
}
|