nodejieba-plus 3.5.13 → 3.5.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -76,15 +76,59 @@ describe("nodejieba.loadUserDict", function() {
76
76
  });
77
77
 
78
78
  it("nodejieba.loadUserDict should filter empty strings", function() {
79
- // 测试空字符串被过滤,不会导致断言失败
80
79
  var dictLines = [
81
80
  "有效词1",
82
- "", // 空字符串
81
+ "",
83
82
  "有效词2",
84
- "", // 空字符串
85
- " " // 只有空格的字符串(也会被保留,因为不是完全空)
83
+ "",
84
+ " ",
85
+ "\t",
86
+ "\n",
87
+ "有效词3"
86
88
  ];
87
89
  var loadResult = nodejieba.loadUserDict(dictLines);
88
90
  loadResult.should.eql(true);
89
91
  });
92
+
93
+ it("nodejieba.loadUserDict with space-containing keywords", function() {
94
+ var dictLines = [
95
+ "深度 学习",
96
+ "机器 学习 200 n",
97
+ "人工 智能 300 nz"
98
+ ];
99
+ var loadResult = nodejieba.loadUserDict(dictLines);
100
+ loadResult.should.eql(true);
101
+ });
102
+
103
+ it("nodejieba.loadUserDict should throw error for non-string array elements", function() {
104
+ var dictLines = [
105
+ "有效词",
106
+ 123,
107
+ "另一个有效词"
108
+ ];
109
+
110
+ (function() {
111
+ nodejieba.loadUserDict(dictLines);
112
+ }).should.throw();
113
+ });
114
+
115
+ it("nodejieba.loadUserDict should return false for null", function() {
116
+ var loadResult = nodejieba.loadUserDict(null);
117
+ loadResult.should.eql(false);
118
+ });
119
+
120
+ it("nodejieba.loadUserDict should return false for undefined", function() {
121
+ var loadResult = nodejieba.loadUserDict(undefined);
122
+ loadResult.should.eql(false);
123
+ });
124
+
125
+ it("nodejieba.loadUserDict should throw TypeError for invalid type", function() {
126
+ (function() {
127
+ nodejieba.loadUserDict(123);
128
+ }).should.throw(TypeError);
129
+
130
+ (function() {
131
+ nodejieba.loadUserDict({});
132
+ }).should.throw(TypeError);
133
+ });
90
134
  });
@@ -0,0 +1,86 @@
1
+ var nodejieba = require("./index.js");
2
+ var fs = require("fs");
3
+
4
+ console.log("=== 测试 1.3 倍 IDF 权重 ===\n");
5
+
6
+ const content = "疯狂动物城 疯狂动物城 疯狂动物城 这是一个二次开发的项目,整合了原版的动画及Open Claw打包制作了MAC安装包,它可以出Open Claw现在你的系统桌面的任何地方,也会随互动有特定的动作,还蛮有意思的项目地址:https://github.com/justaLoli/VPet-Mac云盘:https://pan.quark.cn/s/62596470429a功能:✅开始、关闭、正常效果的动画播放✅拖动效果✅「互动」菜单里的互动,即睡觉、学习、工作等(带计时器,但没有经验、金钱加成)✅自动事件(发呆、待机、睡觉等)✅桌宠自动移动✅摸头预览";
7
+
8
+ console.log("【测试 1: 未加载用户词典】");
9
+ nodejieba.load();
10
+ var result1 = nodejieba.extract(content, 10);
11
+ console.log("关键词及权重:");
12
+ result1.forEach((item, i) => {
13
+ console.log(` ${i + 1}. ${item.word}: ${item.weight.toFixed(2)}`);
14
+ });
15
+
16
+ console.log("\n【测试 2: 加载用户词典(自动 1.3 倍权重)】");
17
+ nodejieba.loadUserDict("Open Claw 10 n");
18
+ var result2 = nodejieba.extract(content, 10);
19
+ console.log("关键词及权重:");
20
+ result2.forEach((item, i) => {
21
+ console.log(` ${i + 1}. ${item.word}: ${item.weight.toFixed(2)}`);
22
+ });
23
+
24
+ console.log("\n【验证 1.3 倍权重】");
25
+ var openClawWeight1 = result1.find(r => r.word === "Open" || r.word === "Claw");
26
+ var openClawWeight2 = result2.find(r => r.word === "Open Claw");
27
+ if (openClawWeight1 && openClawWeight2) {
28
+ console.log(`未加载时权重: ${openClawWeight1.weight.toFixed(2)}`);
29
+ console.log(`加载后权重: ${openClawWeight2.weight.toFixed(2)}`);
30
+ console.log(`权重倍数: ${(openClawWeight2.weight / openClawWeight1.weight).toFixed(2)} ✅`);
31
+ }
32
+
33
+ console.log("\n=== 测试 IDF 词典支持空格关键词 ===\n");
34
+
35
+ console.log("【测试 3: 创建包含空格关键词的 IDF 词典】");
36
+ var testIdfPath = "./test_idf_with_spaces.txt";
37
+ var testIdfContent = "Open Claw 30.0\nMachine Learning 25.0\nDeep Learning 28.0\n互动 12.0\n";
38
+ fs.writeFileSync(testIdfPath, testIdfContent, "utf8");
39
+ console.log("测试 IDF 词典内容:");
40
+ console.log(testIdfContent);
41
+
42
+ console.log("\n【测试 4: 加载包含空格的 IDF 词典 + 用户词典】");
43
+ delete require.cache[require.resolve('./index.js')];
44
+ var nodejieba2 = require("./index.js");
45
+
46
+ nodejieba2.load({
47
+ idfDict: testIdfPath
48
+ });
49
+
50
+ nodejieba2.loadUserDict([
51
+ "Open Claw",
52
+ "Machine Learning",
53
+ "Deep Learning"
54
+ ]);
55
+
56
+ console.log("\n【测试 5: 验证空格关键词识别】");
57
+ var testContent = "Open Claw和Machine Learning都是Deep Learning的基础";
58
+ var keywords = nodejieba2.extract(testContent, 5);
59
+ console.log("测试文本:", testContent);
60
+ console.log("关键词提取结果:");
61
+ keywords.forEach((item, i) => {
62
+ console.log(` ${i + 1}. ${item.word}: ${item.weight.toFixed(2)}`);
63
+ });
64
+
65
+ console.log("\n【验证结果】");
66
+ var hasOpenClaw = keywords.some(r => r.word === "Open Claw");
67
+ var hasMachineLearning = keywords.some(r => r.word === "Machine Learning");
68
+ var hasDeepLearning = keywords.some(r => r.word === "Deep Learning");
69
+
70
+ console.log(`识别到 'Open Claw': ${hasOpenClaw ? '✅' : '❌'}`);
71
+ console.log(`识别到 'Machine Learning': ${hasMachineLearning ? '✅' : '❌'}`);
72
+ console.log(`识别到 'Deep Learning': ${hasDeepLearning ? '✅' : '❌'}`);
73
+
74
+ if (hasOpenClaw) {
75
+ var openClaw = keywords.find(r => r.word === "Open Claw");
76
+ console.log(`\n'Open Claw' 权重: ${openClaw.weight.toFixed(2)}`);
77
+ console.log(`IDF 词典中设置的权重: 30.0`);
78
+ console.log(`用户词典自动提升倍数: 1.3`);
79
+ console.log(`理论权重: ${(30.0 * 1.3).toFixed(2)} (30.0 × 1.3)`);
80
+ }
81
+
82
+ console.log("\n【清理测试文件】");
83
+ fs.unlinkSync(testIdfPath);
84
+ console.log("测试文件已删除");
85
+
86
+ console.log("\n=== 测试完成 ===");
@@ -0,0 +1,60 @@
1
+ var nodejieba = require("./index.js");
2
+
3
+ nodejieba.load();
4
+
5
+ console.log("测试1: 加载包含空白字符的词典条目");
6
+ try {
7
+ var result = nodejieba.loadUserDict([
8
+ "有效词1",
9
+ "",
10
+ " ",
11
+ "\t",
12
+ "\n",
13
+ "有效词2",
14
+ " 测试词 ",
15
+ " 空格词 "
16
+ ]);
17
+ console.log("✅ 加载成功:", result);
18
+ } catch (e) {
19
+ console.log("❌ 加载失败:", e.message);
20
+ }
21
+
22
+ console.log("\n测试2: 使用包含空白字符的词典进行分词");
23
+ try {
24
+ var result = nodejieba.cut("有效词1和有效词2以及测试词");
25
+ console.log("✅ 分词成功:", result);
26
+ if (result.includes("有效词1") && result.includes("有效词2") && result.includes("测试词")) {
27
+ console.log("✅ 词典条目正确识别");
28
+ }
29
+ } catch (e) {
30
+ console.log("❌ 分词失败:", e.message);
31
+ }
32
+
33
+ console.log("\n测试3: 加载大量空白字符的词典");
34
+ try {
35
+ var largeDict = [];
36
+ for (var i = 0; i < 100; i++) {
37
+ largeDict.push("词" + i);
38
+ largeDict.push("");
39
+ largeDict.push(" ");
40
+ largeDict.push("\t\n");
41
+ }
42
+ var result = nodejieba.loadUserDict(largeDict);
43
+ console.log("✅ 大量词典加载成功:", result);
44
+ } catch (e) {
45
+ console.log("❌ 大量词典加载失败:", e.message);
46
+ }
47
+
48
+ console.log("\n测试4: Buffer 包含空白行");
49
+ try {
50
+ var bufferContent = "词A\n\n \n\t\n词B\n 词C \n";
51
+ var result = nodejieba.loadUserDict(Buffer.from(bufferContent));
52
+ console.log("✅ Buffer 加载成功:", result);
53
+
54
+ var cutResult = nodejieba.cut("词A和词B以及词C");
55
+ console.log("✅ Buffer 词典分词成功:", cutResult);
56
+ } catch (e) {
57
+ console.log("❌ Buffer 加载失败:", e.message);
58
+ }
59
+
60
+ console.log("\n✅ 所有测试完成,断言错误已修复!");
@@ -0,0 +1,43 @@
1
+ var nodejieba = require("./index.js");
2
+
3
+ console.log("=== 测试 IDF 权重提升功能 ===\n");
4
+
5
+ const content = "疯狂动物城 疯狂动物城 疯狂动物城 这是一个二次开发的项目,整合了原版的动画及Open Claw打包制作了MAC安装包,它可以出Open Claw现在你的系统桌面的任何地方,也会随互动有特定的动作,还蛮有意思的项目地址:https://github.com/justaLoli/VPet-Mac云盘:https://pan.quark.cn/s/62596470429a功能:✅开始、关闭、正常效果的动画播放✅拖动效果✅「互动」菜单里的互动,即睡觉、学习、工作等(带计时器,但没有经验、金钱加成)✅自动事件(发呆、待机、睡觉等)✅桌宠自动移动✅摸头预览";
6
+
7
+ console.log("【测试 1: 默认提取(未加载用户词典)】");
8
+ nodejieba.load();
9
+ var result1 = nodejieba.extract(content, 20);
10
+ console.log("关键词及权重:");
11
+ result1.forEach((item, i) => {
12
+ console.log(` ${i + 1}. ${item.word}: ${item.weight.toFixed(2)}`);
13
+ });
14
+
15
+ console.log("\n【测试 2: 加载用户词典(自动提升 IDF 权重)】");
16
+ nodejieba.loadUserDict("Open Claw 10 n");
17
+ var result2 = nodejieba.extract(content, 20);
18
+ console.log("关键词及权重:");
19
+ result2.forEach((item, i) => {
20
+ console.log(` ${i + 1}. ${item.word}: ${item.weight.toFixed(2)}`);
21
+ });
22
+
23
+ console.log("\n【测试 3: 手动设置 IDF 权重】");
24
+ nodejieba.setIdf("Open Claw", 30.0);
25
+ var result3 = nodejieba.extract(content, 20);
26
+ console.log("关键词及权重:");
27
+ result3.forEach((item, i) => {
28
+ console.log(` ${i + 1}. ${item.word}: ${item.weight.toFixed(2)}`);
29
+ });
30
+
31
+ console.log("\n【测试 4: 使用倍数提升 IDF 权重】");
32
+ nodejieba.setIdf("Open Claw", null, 3.0); // 3倍权重
33
+ var result4 = nodejieba.extract(content, 20);
34
+ console.log("关键词及权重:");
35
+ result4.forEach((item, i) => {
36
+ console.log(` ${i + 1}. ${item.word}: ${item.weight.toFixed(2)}`);
37
+ });
38
+
39
+ console.log("\n【对比结果】");
40
+ console.log("未加载词典时 'Open Claw' 排名:", result1.findIndex(r => r.word === "Open Claw") + 1 || "未出现");
41
+ console.log("加载词典后 'Open Claw' 排名:", result2.findIndex(r => r.word === "Open Claw") + 1);
42
+ console.log("手动设置IDF后 'Open Claw' 排名:", result3.findIndex(r => r.word === "Open Claw") + 1);
43
+ console.log("3倍权重后 'Open Claw' 排名:", result4.findIndex(r => r.word === "Open Claw") + 1);
@@ -0,0 +1,65 @@
1
+ var nodejieba = require("./index.js");
2
+
3
+ const content = "疯狂动物城 疯狂动物城 疯狂动物城 这是一个二次开发的项目,整合了原版的动画及Open Claw打包制作了MAC安装包,它可以出Open Claw现在你的系统桌面的任何地方,也会随互动有特定的动作,还蛮有意思的项目地址:https://github.com/justaLoli/VPet-Mac云盘:https://pan.quark.cn/s/62596470429a功能:✅开始、关闭、正常效果的动画播放✅拖动效果✅「互动」菜单里的互动,即睡觉、学习、工作等(带计时器,但没有经验、金钱加成)✅自动事件(发呆、待机、睡觉等)✅桌宠自动移动✅摸头预览";
4
+
5
+ console.log("=== 测试 1: 默认加载词典 ===");
6
+ nodejieba.load();
7
+
8
+ console.log("\n测试 1.1: 分词结果(未加载用户词典)");
9
+ var cutResult1 = nodejieba.cut(content);
10
+ console.log("分词结果:", cutResult1);
11
+ console.log("包含 'Open Claw':", cutResult1.includes("Open Claw") || cutResult1.includes("Open") || cutResult1.includes("Claw"));
12
+
13
+ console.log("\n测试 1.2: 关键词提取(未加载用户词典)");
14
+ var extractResult1 = nodejieba.extract(content, 20);
15
+ console.log("关键词提取结果:", extractResult1.map(r => r.word));
16
+
17
+ console.log("\n=== 测试 2: 加载用户词典后 ===");
18
+ console.log("加载用户词典: 'Open Claw 10 n'");
19
+ var loadResult = nodejieba.loadUserDict("Open Claw 10 n");
20
+ console.log("加载结果:", loadResult);
21
+
22
+ console.log("\n测试 2.1: 分词结果(已加载用户词典)");
23
+ var cutResult2 = nodejieba.cut(content);
24
+ console.log("分词结果:", cutResult2);
25
+ console.log("包含 'Open Claw':", cutResult2.includes("Open Claw"));
26
+
27
+ console.log("\n测试 2.2: 关键词提取(已加载用户词典)");
28
+ var extractResult2 = nodejieba.extract(content, 20);
29
+ console.log("关键词提取结果:", extractResult2.map(r => r.word));
30
+ console.log("包含 'Open Claw':", extractResult2.some(r => r.word === "Open Claw"));
31
+
32
+ console.log("\n=== 测试 3: 使用 insertWord ===");
33
+ nodejieba.insertWord("Open Claw", "n");
34
+ console.log("插入词: 'Open Claw'");
35
+
36
+ console.log("\n测试 3.1: 分词结果(使用 insertWord)");
37
+ var cutResult3 = nodejieba.cut(content);
38
+ console.log("分词结果:", cutResult3);
39
+ console.log("包含 'Open Claw':", cutResult3.includes("Open Claw"));
40
+
41
+ console.log("\n测试 3.2: 关键词提取(使用 insertWord)");
42
+ var extractResult3 = nodejieba.extract(content, 20);
43
+ console.log("关键词提取结果:", extractResult3.map(r => r.word));
44
+ console.log("包含 'Open Claw':", extractResult3.some(r => r.word === "Open Claw"));
45
+
46
+ console.log("\n=== 测试 4: 检查文本中的 Open Claw ===");
47
+ var testText1 = "Open Claw是一个好游戏";
48
+ var testText2 = "我喜欢Open Claw";
49
+ var testText3 = "OpenClaw很好玩";
50
+
51
+ console.log("\n测试文本1:", testText1);
52
+ console.log("分词:", nodejieba.cut(testText1));
53
+ console.log("关键词:", nodejieba.extract(testText1, 5).map(r => r.word));
54
+
55
+ console.log("\n测试文本2:", testText2);
56
+ console.log("分词:", nodejieba.cut(testText2));
57
+ console.log("关键词:", nodejieba.extract(testText2, 5).map(r => r.word));
58
+
59
+ console.log("\n测试文本3:", testText3);
60
+ console.log("分词:", nodejieba.cut(testText3));
61
+ console.log("关键词:", nodejieba.extract(testText3, 5).map(r => r.word));
62
+
63
+ console.log("\n=== 测试 5: 检查 IDF 词典 ===");
64
+ console.log("说明: 关键词提取需要 IDF 词典支持");
65
+ console.log("用户词典只影响分词,不影响关键词提取的权重计算");
package/test_simple.js ADDED
@@ -0,0 +1,17 @@
1
+ var nodejieba = require("./index.js");
2
+
3
+ console.log("=== 开始测试 ===\n");
4
+
5
+ try {
6
+ console.log("调用 load()...");
7
+ nodejieba.load();
8
+ console.log("load() 完成");
9
+
10
+ console.log("\n测试分词...");
11
+ var result = nodejieba.cut("测试");
12
+ console.log("分词结果:", result);
13
+ } catch (e) {
14
+ console.error("错误:", e);
15
+ }
16
+
17
+ console.log("\n=== 测试结束 ===");
@@ -0,0 +1,66 @@
1
+ var nodejieba = require("./index.js");
2
+
3
+ console.log("=== 测试包含空格的关键词匹配功能 ===\n");
4
+
5
+ // 加载词典
6
+ nodejieba.load();
7
+
8
+ // 测试1: 加载包含空格的关键词(带词频和词性)
9
+ console.log("测试1: 加载 'Open Claw 2 n'");
10
+ nodejieba.loadUserDict(["Open Claw 2 n"]);
11
+
12
+ var testCases1 = [
13
+ "Open Claw",
14
+ "OpenClaw",
15
+ "Openclaw",
16
+ "OPENCLAW",
17
+ "open claw",
18
+ "OPEN CLAW"
19
+ ];
20
+
21
+ console.log("测试各种大小写变体:");
22
+ testCases1.forEach(function(testText) {
23
+ var result = nodejieba.cut(testText);
24
+ console.log(" '" + testText + "' ->", result);
25
+ });
26
+
27
+ console.log("\n");
28
+
29
+ // 测试2: 加载包含空格的关键词(只有关键词)
30
+ console.log("测试2: 加载 'Game Master' (只有关键词)");
31
+ nodejieba.loadUserDict("Game Master");
32
+
33
+ var testCases2 = [
34
+ "Game Master",
35
+ "GameMaster",
36
+ "gamemaster",
37
+ "GAMEMASTER",
38
+ "GAME MASTER"
39
+ ];
40
+
41
+ console.log("测试各种大小写变体:");
42
+ testCases2.forEach(function(testText) {
43
+ var result = nodejieba.cut(testText);
44
+ console.log(" '" + testText + "' ->", result);
45
+ });
46
+
47
+ console.log("\n");
48
+
49
+ // 测试3: 在句子中匹配
50
+ console.log("测试3: 在句子中匹配包含空格的关键词");
51
+ var sentence1 = "I like Open Claw game very much";
52
+ var result1 = nodejieba.cut(sentence1);
53
+ console.log(" 句子: '" + sentence1 + "'");
54
+ console.log(" 分词结果:", result1);
55
+
56
+ var sentence2 = "Open Claw和Game Master都是好游戏";
57
+ var result2 = nodejieba.cut(sentence2);
58
+ console.log(" 句子: '" + sentence2 + "'");
59
+ console.log(" 分词结果:", result2);
60
+
61
+ var sentence3 = "OPENCLAW和gamemaster都是好游戏";
62
+ var result3 = nodejieba.cut(sentence3);
63
+ console.log(" 句子: '" + sentence3 + "'");
64
+ console.log(" 分词结果:", result3);
65
+
66
+ console.log("\n=== 测试完成 ===");
package/types/index.d.ts CHANGED
@@ -28,4 +28,5 @@ declare module "nodejieba" {
28
28
  export function insertWord(word: string, tag?: string): boolean;
29
29
  export function cutSmall(sentence: string, small: number): string[];
30
30
  export function loadUserDict(dict: string | string[] | Set<string> | Buffer): boolean;
31
+ export function setIdf(word: string, idf?: number, multiplier?: number): boolean;
31
32
  }