nodejieba-plus 3.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/.github/FUNDING.yml +12 -0
  2. package/.github/workflows/github_release.yml +61 -0
  3. package/.github/workflows/npm_publish.yml +24 -0
  4. package/.github/workflows/stale-issues.yml +24 -0
  5. package/.github/workflows/test.yml +42 -0
  6. package/.gitmodules +3 -0
  7. package/.npmignore +15 -0
  8. package/CHANGELOG.md +360 -0
  9. package/CONTRIBUTING.md +78 -0
  10. package/LICENSE +21 -0
  11. package/README.md +349 -0
  12. package/binding.gyp +63 -0
  13. package/index.js +77 -0
  14. package/lib/index.cpp +3 -0
  15. package/lib/nodejieba.cpp +218 -0
  16. package/lib/nodejieba.h +28 -0
  17. package/lib/utils.h +47 -0
  18. package/package.json +48 -0
  19. package/submodules/cppjieba/.github/workflows/cmake.yml +51 -0
  20. package/submodules/cppjieba/.github/workflows/stale-issues.yml +24 -0
  21. package/submodules/cppjieba/.gitmodules +3 -0
  22. package/submodules/cppjieba/CHANGELOG.md +305 -0
  23. package/submodules/cppjieba/CMakeLists.txt +42 -0
  24. package/submodules/cppjieba/LICENSE +20 -0
  25. package/submodules/cppjieba/README.md +280 -0
  26. package/submodules/cppjieba/deps/limonp/.github/workflows/cmake.yml +43 -0
  27. package/submodules/cppjieba/deps/limonp/.gitmodules +0 -0
  28. package/submodules/cppjieba/deps/limonp/CHANGELOG.md +160 -0
  29. package/submodules/cppjieba/deps/limonp/CMakeLists.txt +61 -0
  30. package/submodules/cppjieba/deps/limonp/LICENSE +20 -0
  31. package/submodules/cppjieba/deps/limonp/README.md +38 -0
  32. package/submodules/cppjieba/deps/limonp/include/limonp/ArgvContext.hpp +70 -0
  33. package/submodules/cppjieba/deps/limonp/include/limonp/Closure.hpp +206 -0
  34. package/submodules/cppjieba/deps/limonp/include/limonp/Colors.hpp +31 -0
  35. package/submodules/cppjieba/deps/limonp/include/limonp/Condition.hpp +38 -0
  36. package/submodules/cppjieba/deps/limonp/include/limonp/Config.hpp +103 -0
  37. package/submodules/cppjieba/deps/limonp/include/limonp/ForcePublic.hpp +7 -0
  38. package/submodules/cppjieba/deps/limonp/include/limonp/LocalVector.hpp +139 -0
  39. package/submodules/cppjieba/deps/limonp/include/limonp/Logging.hpp +90 -0
  40. package/submodules/cppjieba/deps/limonp/include/limonp/NonCopyable.hpp +21 -0
  41. package/submodules/cppjieba/deps/limonp/include/limonp/StdExtension.hpp +157 -0
  42. package/submodules/cppjieba/deps/limonp/include/limonp/StringUtil.hpp +386 -0
  43. package/submodules/cppjieba/deps/limonp/test/CMakeLists.txt +8 -0
  44. package/submodules/cppjieba/deps/limonp/test/demo.cpp +40 -0
  45. package/submodules/cppjieba/deps/limonp/test/testdata/1.conf +5 -0
  46. package/submodules/cppjieba/deps/limonp/test/testdata/StdExtension.data +3 -0
  47. package/submodules/cppjieba/deps/limonp/test/testdata/dict.gbk +50 -0
  48. package/submodules/cppjieba/deps/limonp/test/testdata/dict.utf8 +50 -0
  49. package/submodules/cppjieba/deps/limonp/test/testdata/io_testfile +2 -0
  50. package/submodules/cppjieba/deps/limonp/test/testdata/jieba.dict.0.1.utf8 +93 -0
  51. package/submodules/cppjieba/deps/limonp/test/testdata/jieba.dict.0.utf8 +93 -0
  52. package/submodules/cppjieba/deps/limonp/test/testdata/jieba.dict.1.utf8 +67 -0
  53. package/submodules/cppjieba/deps/limonp/test/testdata/jieba.dict.2.utf8 +64 -0
  54. package/submodules/cppjieba/deps/limonp/test/unittest/CMakeLists.txt +30 -0
  55. package/submodules/cppjieba/deps/limonp/test/unittest/TArgvContext.cpp +16 -0
  56. package/submodules/cppjieba/deps/limonp/test/unittest/TCastFloat.cpp +19 -0
  57. package/submodules/cppjieba/deps/limonp/test/unittest/TClosure.cpp +85 -0
  58. package/submodules/cppjieba/deps/limonp/test/unittest/TColorPrint.cpp +20 -0
  59. package/submodules/cppjieba/deps/limonp/test/unittest/TConfig.cpp +17 -0
  60. package/submodules/cppjieba/deps/limonp/test/unittest/TLocalVector.cpp +41 -0
  61. package/submodules/cppjieba/deps/limonp/test/unittest/TLogging.cpp +12 -0
  62. package/submodules/cppjieba/deps/limonp/test/unittest/TStdExtension.cpp +95 -0
  63. package/submodules/cppjieba/deps/limonp/test/unittest/TStringUtil.cpp +183 -0
  64. package/submodules/cppjieba/deps/limonp/test/unittest/gtest_main.cpp +39 -0
  65. package/submodules/cppjieba/dict/README.md +31 -0
  66. package/submodules/cppjieba/dict/hmm_model.utf8 +34 -0
  67. package/submodules/cppjieba/dict/idf.utf8 +258826 -0
  68. package/submodules/cppjieba/dict/jieba.dict.utf8 +348982 -0
  69. package/submodules/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
  70. package/submodules/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
  71. package/submodules/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
  72. package/submodules/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
  73. package/submodules/cppjieba/dict/stop_words.utf8 +1534 -0
  74. package/submodules/cppjieba/dict/user.dict.utf8 +4 -0
  75. package/submodules/cppjieba/include/cppjieba/DictTrie.hpp +381 -0
  76. package/submodules/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
  77. package/submodules/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
  78. package/submodules/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
  79. package/submodules/cppjieba/include/cppjieba/Jieba.hpp +169 -0
  80. package/submodules/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
  81. package/submodules/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
  82. package/submodules/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
  83. package/submodules/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
  84. package/submodules/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
  85. package/submodules/cppjieba/include/cppjieba/QuerySegment.hpp +89 -0
  86. package/submodules/cppjieba/include/cppjieba/SegmentBase.hpp +48 -0
  87. package/submodules/cppjieba/include/cppjieba/SegmentTagged.hpp +23 -0
  88. package/submodules/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
  89. package/submodules/cppjieba/include/cppjieba/Trie.hpp +200 -0
  90. package/submodules/cppjieba/include/cppjieba/Unicode.hpp +231 -0
  91. package/submodules/cppjieba/test/CMakeLists.txt +4 -0
  92. package/submodules/cppjieba/test/load_test.cpp +54 -0
  93. package/submodules/cppjieba/test/testdata/curl.res +1 -0
  94. package/submodules/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +109750 -0
  95. package/submodules/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +34 -0
  96. package/submodules/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +348982 -0
  97. package/submodules/cppjieba/test/testdata/jieba.dict.0.1.utf8 +93 -0
  98. package/submodules/cppjieba/test/testdata/jieba.dict.0.utf8 +93 -0
  99. package/submodules/cppjieba/test/testdata/jieba.dict.1.utf8 +67 -0
  100. package/submodules/cppjieba/test/testdata/jieba.dict.2.utf8 +64 -0
  101. package/submodules/cppjieba/test/testdata/load_test.urls +2 -0
  102. package/submodules/cppjieba/test/testdata/review.100 +100 -0
  103. package/submodules/cppjieba/test/testdata/review.100.res +200 -0
  104. package/submodules/cppjieba/test/testdata/server.conf +19 -0
  105. package/submodules/cppjieba/test/testdata/testlines.gbk +9 -0
  106. package/submodules/cppjieba/test/testdata/testlines.utf8 +8 -0
  107. package/submodules/cppjieba/test/testdata/userdict.2.utf8 +1 -0
  108. package/submodules/cppjieba/test/testdata/userdict.english +2 -0
  109. package/submodules/cppjieba/test/testdata/userdict.utf8 +8 -0
  110. package/submodules/cppjieba/test/testdata/weicheng.utf8 +247 -0
  111. package/submodules/cppjieba/test/unittest/CMakeLists.txt +33 -0
  112. package/submodules/cppjieba/test/unittest/gtest_main.cpp +39 -0
  113. package/submodules/cppjieba/test/unittest/jieba_test.cpp +166 -0
  114. package/submodules/cppjieba/test/unittest/keyword_extractor_test.cpp +79 -0
  115. package/submodules/cppjieba/test/unittest/pos_tagger_test.cpp +41 -0
  116. package/submodules/cppjieba/test/unittest/pre_filter_test.cpp +43 -0
  117. package/submodules/cppjieba/test/unittest/segments_test.cpp +256 -0
  118. package/submodules/cppjieba/test/unittest/textrank_test.cpp +86 -0
  119. package/submodules/cppjieba/test/unittest/trie_test.cpp +177 -0
  120. package/submodules/cppjieba/test/unittest/unicode_test.cpp +43 -0
  121. package/test/debug_split +0 -0
  122. package/test/debug_split2 +0 -0
  123. package/test/debug_split3 +0 -0
  124. package/test/load_dict_test.js +14 -0
  125. package/test/missing_binding_test.js +42 -0
  126. package/test/test.js +366 -0
  127. package/test/testdata/userdict.utf8 +1 -0
  128. package/tsconfig.json +59 -0
  129. package/types/index.d.ts +30 -0
  130. package/typescript_demo.ts +38 -0
@@ -0,0 +1,86 @@
1
+ #include "cppjieba/TextRankExtractor.hpp"
2
+ #include "gtest/gtest.h"
3
+
4
+ using namespace cppjieba;
5
+
6
+ TEST(TextRankExtractorTest, Test1) {
7
+ TextRankExtractor Extractor(
8
+ "../test/testdata/extra_dict/jieba.dict.small.utf8",
9
+ "../dict/hmm_model.utf8",
10
+ "../dict/stop_words.utf8");
11
+ {
12
+ string s("你好世界世界而且而且");
13
+ string res;
14
+ size_t topN = 5;
15
+
16
+ {
17
+ vector<string> words;
18
+ Extractor.Extract(s, words, topN);
19
+ res << words;
20
+ ASSERT_EQ(res, "[\"世界\", \"你好\"]");
21
+ }
22
+
23
+ {
24
+ vector<pair<string, double> > words;
25
+ Extractor.Extract(s, words, topN);
26
+ res << words;
27
+ ASSERT_EQ(res, "[世界:1, 你好:0.519787]");
28
+ }
29
+
30
+ {
31
+ vector<TextRankExtractor::Word> words;
32
+ Extractor.Extract(s, words, topN);
33
+ res << words;
34
+ ASSERT_EQ(res, "[{\"word\": \"世界\", \"offset\": [6, 12], \"weight\": 1}, {\"word\": \"你好\", \"offset\": [0], \"weight\": 0.519787}]");
35
+ }
36
+ }
37
+
38
+ {
39
+ string s("\xe6\x88\x91\xe6\x98\xaf\xe6\x8b\x96\xe6\x8b\x89\xe6\x9c\xba\xe5\xad\xa6\xe9\x99\xa2\xe6\x89\x8b\xe6\x89\xb6\xe6\x8b\x96\xe6\x8b\x89\xe6\x9c\xba\xe4\xb8\x93\xe4\xb8\x9a\xe7\x9a\x84\xe3\x80\x82\xe4\xb8\x8d\xe7\x94\xa8\xe5\xa4\x9a\xe4\xb9\x85\xef\xbc\x8c\xe6\x88\x91\xe5\xb0\xb1\xe4\xbc\x9a\xe5\x8d\x87\xe8\x81\x8c\xe5\x8a\xa0\xe8\x96\xaa\xef\xbc\x8c\xe5\xbd\x93\xe4\xb8\x8a CEO\xef\xbc\x8c\xe8\xb5\xb0\xe4\xb8\x8a\xe4\xba\xba\xe7\x94\x9f\xe5\xb7\x85\xe5\xb3\xb0");
40
+ string res;
41
+ vector<TextRankExtractor::Word> wordweights;
42
+ size_t topN = 5;
43
+ Extractor.Extract(s, wordweights, topN);
44
+ res << wordweights;
45
+ ASSERT_EQ(res, "[{\"word\": \"当上\", \"offset\": [87], \"weight\": 1}, {\"word\": \"不用\", \"offset\": [48], \"weight\": 0.989848}, {\"word\": \"多久\", \"offset\": [54], \"weight\": 0.985126}, {\"word\": \"加薪\", \"offset\": [78], \"weight\": 0.983046}, {\"word\": \"升职\", \"offset\": [72], \"weight\": 0.980278}]");
46
+ //ASSERT_EQ(res, "[{\"word\": \"专业\", \"offset\": [36], \"weight\": 1}, {\"word\": \"CEO\", \"offset\": [94], \"weight\": 0.95375}, {\"word\": \"手扶拖拉机\", \"offset\": [21], \"weight\": 0.801701}, {\"word\": \"当上\", \"offset\": [87], \"weight\": 0.798968}, {\"word\": \"走上\", \"offset\": [100], \"weight\": 0.775505}]");
47
+ }
48
+
49
+ {
50
+ string s("一部iPhone6");
51
+ string res;
52
+ vector<TextRankExtractor::Word> wordweights;
53
+ size_t topN = 5;
54
+ Extractor.Extract(s, wordweights, topN);
55
+ res << wordweights;
56
+ ASSERT_EQ(res, "[{\"word\": \"一部\", \"offset\": [0], \"weight\": 1}, {\"word\": \"iPhone6\", \"offset\": [6], \"weight\": 0.996126}]");
57
+ }
58
+ }
59
+
60
+ TEST(TextRankExtractorTest, Test2) {
61
+ TextRankExtractor Extractor(
62
+ "../test/testdata/extra_dict/jieba.dict.small.utf8",
63
+ "../dict/hmm_model.utf8",
64
+ "../dict/stop_words.utf8",
65
+ "../test/testdata/userdict.utf8");
66
+
67
+ {
68
+ string s("\xe8\x93\x9d\xe7\xbf\x94\xe4\xbc\x98\xe7\xa7\x80\xe6\xaf\x95\xe4\xb8\x9a\xe7\x94\x9f");
69
+ string res;
70
+ vector<TextRankExtractor::Word> wordweights;
71
+ size_t topN = 5;
72
+ Extractor.Extract(s, wordweights, topN);
73
+ res << wordweights;
74
+ ASSERT_EQ(res, "[{\"word\": \"蓝翔\", \"offset\": [0], \"weight\": 1}, {\"word\": \"毕业生\", \"offset\": [12], \"weight\": 0.996685}, {\"word\": \"优秀\", \"offset\": [6], \"weight\": 0.992994}]");
75
+ }
76
+
77
+ {
78
+ string s("一部iPhone6");
79
+ string res;
80
+ vector<TextRankExtractor::Word> wordweights;
81
+ size_t topN = 5;
82
+ Extractor.Extract(s, wordweights, topN);
83
+ res << wordweights;
84
+ ASSERT_EQ(res, "[{\"word\": \"一部\", \"offset\": [0], \"weight\": 1}, {\"word\": \"iPhone6\", \"offset\": [6], \"weight\": 0.996126}]");
85
+ }
86
+ }
@@ -0,0 +1,177 @@
1
+ #include "cppjieba/DictTrie.hpp"
2
+ #include "cppjieba/MPSegment.hpp"
3
+ #include "gtest/gtest.h"
4
+
5
+ using namespace cppjieba;
6
+
7
+ static const char* const DICT_FILE = "../test/testdata/extra_dict/jieba.dict.small.utf8";
8
+
9
+ TEST(TrieTest, Empty) {
10
+ vector<Unicode> keys;
11
+ vector<const DictUnit*> values;
12
+ Trie trie(keys, values);
13
+ }
14
+
15
+ TEST(TrieTest, Construct) {
16
+ vector<Unicode> keys;
17
+ vector<const DictUnit*> values;
18
+ keys.push_back(DecodeUTF8RunesInString("你"));
19
+ values.push_back((const DictUnit*)(NULL));
20
+ Trie trie(keys, values);
21
+ }
22
+
23
+ TEST(DictTrieTest, NewAndDelete) {
24
+ DictTrie * trie;
25
+ trie = new DictTrie(DICT_FILE);
26
+ delete trie;
27
+ }
28
+
29
+ TEST(DictTrieTest, Test1) {
30
+ string s1, s2;
31
+ DictTrie trie(DICT_FILE);
32
+ ASSERT_LT(trie.GetMinWeight() + 15.6479, 0.001);
33
+ string word("来到");
34
+ cppjieba::RuneStrArray uni;
35
+ ASSERT_TRUE(DecodeUTF8RunesInString(word, uni));
36
+ //DictUnit nodeInfo;
37
+ //nodeInfo.word = uni;
38
+ //nodeInfo.tag = "v";
39
+ //nodeInfo.weight = -8.87033;
40
+ //s1 << nodeInfo;
41
+ //s2 << (*trie.Find(uni.begin(), uni.end()));
42
+ const DictUnit* du = trie.Find(uni.begin(), uni.end());
43
+ ASSERT_TRUE(du != NULL);
44
+ ASSERT_EQ(2u, du->word.size());
45
+ ASSERT_EQ(26469u, du->word[0]);
46
+ ASSERT_EQ(21040u, du->word[1]);
47
+ ASSERT_EQ("v", du->tag);
48
+ ASSERT_NEAR(-8.870, du->weight, 0.001);
49
+
50
+ //EXPECT_EQ("[\"26469\", \"21040\"] v -8.870", s2);
51
+ word = "清华大学";
52
+ LocalVector<pair<size_t, const DictUnit*> > res;
53
+ const char * words[] = {"清", "清华", "清华大学"};
54
+ for (size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) {
55
+ ASSERT_TRUE(DecodeUTF8RunesInString(words[i], uni));
56
+ res.push_back(make_pair(uni.size() - 1, trie.Find(uni.begin(), uni.end())));
57
+ //resMap[uni.size() - 1] = trie.Find(uni.begin(), uni.end());
58
+ }
59
+ vector<pair<size_t, const DictUnit*> > vec;
60
+ vector<struct Dag> dags;
61
+ ASSERT_TRUE(DecodeUTF8RunesInString(word, uni));
62
+ trie.Find(uni.begin(), uni.end(), dags);
63
+ ASSERT_EQ(dags.size(), uni.size());
64
+ ASSERT_NE(dags.size(), 0u);
65
+ s1 << res;
66
+ s2 << dags[0].nexts;
67
+ ASSERT_EQ(s1, s2);
68
+
69
+ }
70
+
71
+ TEST(DictTrieTest, UserDict) {
72
+ DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8");
73
+ string word = "云计算";
74
+ cppjieba::RuneStrArray unicode;
75
+ ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
76
+ const DictUnit * unit = trie.Find(unicode.begin(), unicode.end());
77
+ ASSERT_TRUE(unit != NULL);
78
+ ASSERT_NEAR(unit->weight, -14.100, 0.001);
79
+
80
+ word = "蓝翔";
81
+ ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
82
+ unit = trie.Find(unicode.begin(), unicode.end());
83
+ ASSERT_TRUE(unit != NULL);
84
+ ASSERT_EQ(unit->tag, "nz");
85
+ ASSERT_NEAR(unit->weight, -14.100, 0.001);
86
+
87
+ word = "区块链";
88
+ ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
89
+ unit = trie.Find(unicode.begin(), unicode.end());
90
+ ASSERT_TRUE(unit != NULL);
91
+ ASSERT_EQ(unit->tag, "nz");
92
+ ASSERT_NEAR(unit->weight, -15.6478, 0.001);
93
+ }
94
+
95
+ TEST(DictTrieTest, UserDictWithMaxWeight) {
96
+ DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8", DictTrie::WordWeightMax);
97
+ string word = "云计算";
98
+ cppjieba::RuneStrArray unicode;
99
+ ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
100
+ const DictUnit * unit = trie.Find(unicode.begin(), unicode.end());
101
+ ASSERT_TRUE(unit);
102
+ ASSERT_NEAR(unit->weight, -2.975, 0.001);
103
+ }
104
+
105
+ TEST(DictTrieTest, Dag) {
106
+ DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8");
107
+
108
+ {
109
+ string word = "清华大学";
110
+ cppjieba::RuneStrArray unicode;
111
+ ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
112
+ vector<struct Dag> res;
113
+ trie.Find(unicode.begin(), unicode.end(), res);
114
+
115
+ size_t nexts_sizes[] = {3, 2, 2, 1};
116
+ ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0]));
117
+ for (size_t i = 0; i < res.size(); i++) {
118
+ ASSERT_EQ(res[i].nexts.size(), nexts_sizes[i]);
119
+ }
120
+ }
121
+
122
+ {
123
+ string word = "北京邮电大学";
124
+ cppjieba::RuneStrArray unicode;
125
+ ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
126
+ vector<struct Dag> res;
127
+ trie.Find(unicode.begin(), unicode.end(), res);
128
+
129
+ size_t nexts_sizes[] = {3, 1, 2, 2, 2, 1};
130
+ ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0]));
131
+ for (size_t i = 0; i < res.size(); i++) {
132
+ ASSERT_EQ(res[i].nexts.size(), nexts_sizes[i]);
133
+ }
134
+ }
135
+
136
+ {
137
+ string word = "长江大桥";
138
+ cppjieba::RuneStrArray unicode;
139
+ ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
140
+ vector<struct Dag> res;
141
+ trie.Find(unicode.begin(), unicode.end(), res);
142
+
143
+ size_t nexts_sizes[] = {3, 1, 2, 1};
144
+ ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0]));
145
+ for (size_t i = 0; i < res.size(); i++) {
146
+ ASSERT_EQ(res[i].nexts.size(), nexts_sizes[i]);
147
+ }
148
+ }
149
+
150
+ {
151
+ string word = "长江大桥";
152
+ cppjieba::RuneStrArray unicode;
153
+ ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
154
+ vector<struct Dag> res;
155
+ trie.Find(unicode.begin(), unicode.end(), res, 3);
156
+
157
+ size_t nexts_sizes[] = {2, 1, 2, 1};
158
+ ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0]));
159
+ for (size_t i = 0; i < res.size(); i++) {
160
+ ASSERT_EQ(res[i].nexts.size(), nexts_sizes[i]);
161
+ }
162
+ }
163
+
164
+ {
165
+ string word = "长江大桥";
166
+ cppjieba::RuneStrArray unicode;
167
+ ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
168
+ vector<struct Dag> res;
169
+ trie.Find(unicode.begin(), unicode.end(), res, 4);
170
+
171
+ size_t nexts_sizes[] = {3, 1, 2, 1};
172
+ ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0]));
173
+ for (size_t i = 0; i < res.size(); i++) {
174
+ ASSERT_EQ(res[i].nexts.size(), nexts_sizes[i]);
175
+ }
176
+ }
177
+ }
@@ -0,0 +1,43 @@
1
+ #include "cppjieba/Unicode.hpp"
2
+ #include "limonp/StdExtension.hpp"
3
+ #include "gtest/gtest.h"
4
+
5
+ using namespace cppjieba;
6
+ using namespace std;
7
+
8
+ TEST(UnicodeTest, Test1) {
9
+ string s = "你好世界";
10
+ RuneStrArray runes;
11
+ ASSERT_TRUE(DecodeUTF8RunesInString(s, runes));
12
+ string actual;
13
+ string expected = "[\"{\"rune\": \"20320\", \"offset\": 0, \"len\": 3}\", \"{\"rune\": \"22909\", \"offset\": 3, \"len\": 3}\", \"{\"rune\": \"19990\", \"offset\": 6, \"len\": 3}\", \"{\"rune\": \"30028\", \"offset\": 9, \"len\": 3}\"]";
14
+ actual << runes;
15
+ ASSERT_EQ(expected, actual);
16
+ }
17
+
18
+ TEST(UnicodeTest, Illegal) {
19
+ string s = "123\x80";
20
+ RuneStrArray runes;
21
+ ASSERT_FALSE(DecodeUTF8RunesInString(s, runes));
22
+ string actual;
23
+ string expected = "[]";
24
+ actual << runes;
25
+ ASSERT_EQ(expected, actual);
26
+ }
27
+
28
+ TEST(UnicodeTest, Rand) {
29
+ const size_t ITERATION = 1024;
30
+ const size_t MAX_LEN = 256;
31
+ string s;
32
+ srand(time(NULL));
33
+
34
+ for (size_t i = 0; i < ITERATION; i++) {
35
+ size_t len = rand() % MAX_LEN;
36
+ s.resize(len);
37
+ for (size_t j = 0; j < len; j++) {
38
+ s[rand() % len] = rand();
39
+ }
40
+ RuneStrArray runes;
41
+ DecodeUTF8RunesInString(s, runes);
42
+ }
43
+ }
Binary file
Binary file
Binary file
@@ -0,0 +1,14 @@
1
+ var should = require("should");
2
+ var nodejieba = require("../index.js");
3
+
4
+ describe("nodejieba", function() {
5
+ var userDict = __dirname + '/testdata/userdict.utf8';
6
+ nodejieba.load({
7
+ userDict: userDict,
8
+ });
9
+ it('nodejieba.cut("红掌拨清波")', function() {
10
+ nodejieba.cut("红掌拨清波").should.eql([
11
+ '红掌拨清波',
12
+ ]);
13
+ });
14
+ });
@@ -0,0 +1,42 @@
1
+ var fs = require("fs");
2
+ var os = require("os");
3
+ var path = require("path");
4
+ var should = require("should");
5
+
6
+ describe("nodejieba missing binding", function() {
7
+ it("loads without the native binding until first use", function() {
8
+ var fixtureDir = fs.mkdtempSync(path.join(os.tmpdir(), "nodejieba-missing-binding-"));
9
+ try {
10
+ var fixtureIndex = path.join(fixtureDir, "index.js");
11
+
12
+ fs.copyFileSync(path.join(__dirname, "..", "index.js"), fixtureIndex);
13
+
14
+ var nodejieba = require(fixtureIndex);
15
+
16
+ should(nodejieba.DEFAULT_DICT).type("string");
17
+ should(nodejieba.cut).type("function");
18
+
19
+ var firstError;
20
+ var secondError;
21
+
22
+ try {
23
+ nodejieba.cut("南京市长江大桥");
24
+ } catch (err) {
25
+ firstError = err;
26
+ }
27
+
28
+ try {
29
+ nodejieba.cut("南京市长江大桥");
30
+ } catch (err) {
31
+ secondError = err;
32
+ }
33
+
34
+ should(firstError).be.an.Error();
35
+ firstError.message.should.match(/nodejieba native binding was not found/);
36
+ firstError.code.should.equal("BINDING_NOT_FOUND");
37
+ should(secondError).equal(firstError);
38
+ } finally {
39
+ fs.rmSync(fixtureDir, { recursive: true, force: true });
40
+ }
41
+ });
42
+ });