npm - nodejieba-plus - Versions diffs - 3.5.13 → 3.5.17 - Mend

nodejieba-plus 3.5.13 → 3.5.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +130 -93
package/analyze_weight.js +57 -0
package/build/Release/nodejieba.node +0 -0
package/diagnose_priority.js +71 -0
package/index.js +9 -1
package/lib/nodejieba.cpp +145 -13
package/lib/nodejieba.h +1 -0
package/package.json +1 -1
package/submodules/cppjieba/include/cppjieba/DictTrie.hpp +169 -30
package/submodules/cppjieba/include/cppjieba/Jieba.hpp +8 -0
package/submodules/cppjieba/include/cppjieba/KeywordExtractor.hpp +29 -8
package/submodules/cppjieba/include/cppjieba/SegmentBase.hpp +1 -1
package/submodules/cppjieba/include/cppjieba/Trie.hpp +10 -13
package/submodules/cppjieba/include/cppjieba/Unicode.hpp +52 -0
package/test/load_user_dict_test.js +48 -4
package/test_1_3x_weight.js +86 -0
package/test_assertion_fix.js +60 -0
package/test_idf_feature.js +43 -0
package/test_open_claw.js +65 -0
package/test_simple.js +17 -0
package/test_space_keyword.js +66 -0
package/types/index.d.ts +1 -0

package/lib/nodejieba.cpp CHANGED Viewed

@@ -7,6 +7,7 @@
 #include "cppjieba/TextRankExtractor.hpp"
 #include <sstream>
+#include <cctype>
 NodeJieba::NodeJieba(Napi::Env env, Napi::Object exports) {
   DefineAddon(exports, {
@@ -20,7 +21,8 @@ NodeJieba::NodeJieba(Napi::Env env, Napi::Object exports) {
     InstanceMethod("extract", &NodeJieba::extract),
     InstanceMethod("textRankExtract", &NodeJieba::textRankExtract),
     InstanceMethod("insertWord", &NodeJieba::insertWord),
-    InstanceMethod("loadUserDict", &NodeJieba::loadUserDict)
+    InstanceMethod("loadUserDict", &NodeJieba::loadUserDict),
+    InstanceMethod("setIdf", &NodeJieba::setIdf)
   });
 }
@@ -52,6 +54,44 @@ Napi::Value NodeJieba::load(const Napi::CallbackInfo& info) {
   return Napi::Boolean::New(info.Env(), true);
 }
+Napi::Value NodeJieba::setIdf(const Napi::CallbackInfo& info) {
+  if (info.Length() < 1) {
+    return Napi::Boolean::New(info.Env(), false);
+  }
+  if (!_jieba_handle) {
+    Napi::Error::New(info.Env(), "Before calling any other function you have to call load() first").ThrowAsJavaScriptException();
+    return Napi::Boolean::New(info.Env(), false);
+  }
+  std::string word;
+  double idf = 0.0;
+  double multiplier = 2.0;
+  if (info[0].IsString()) {
+    word = info[0].As<Napi::String>().Utf8Value();
+  } else {
+    return Napi::Boolean::New(info.Env(), false);
+  }
+  if (info.Length() >= 2) {
+    if (info[1].IsNumber()) {
+      idf = info[1].As<Napi::Number>().DoubleValue();
+      _jieba_handle->SetIdfForWord(word, idf);
+      return Napi::Boolean::New(info.Env(), true);
+    }
+  }
+  if (info.Length() >= 3) {
+    if (info[2].IsNumber()) {
+      multiplier = info[2].As<Napi::Number>().DoubleValue();
+    }
+  }
+  _jieba_handle->SetIdfForWordWithMultiplier(word, multiplier);
+  return Napi::Boolean::New(info.Env(), true);
+}
 Napi::Value NodeJieba::insertWord(const Napi::CallbackInfo& info) {
   if(info.Length() < 1) {
     return Napi::Boolean::New(info.Env(), false);
@@ -229,43 +269,135 @@ Napi::Value NodeJieba::loadUserDict(const Napi::CallbackInfo& info) {
     Napi::Error::New(info.Env(), "Before calling any other function you have to call load() first").ThrowAsJavaScriptException();
   }
-  // 支持传入字符串数组、单个字符串或 Buffer
+  auto isBlankString = [](const std::string& str) -> bool {
+    for (char c : str) {
+      if (!std::isspace(static_cast<unsigned char>(c))) {
+        return false;
+      }
+    }
+    return true;
+  };
+  auto trimString = [](std::string& str) -> void {
+    size_t start = 0;
+    size_t end = str.length();
+    while (start < end && std::isspace(static_cast<unsigned char>(str[start]))) {
+      start++;
+    }
+    while (end > start && std::isspace(static_cast<unsigned char>(str[end - 1]))) {
+      end--;
+    }
+    str = str.substr(start, end - start);
+  };
+  auto extractKeyword = [](const std::string& line) -> std::string {
+    std::istringstream iss(line);
+    std::vector<std::string> parts;
+    std::string part;
+    while (iss >> part) {
+      parts.push_back(part);
+    }
+    if (parts.empty()) {
+      return "";
+    }
+    if (parts.size() == 1) {
+      return parts[0];
+    }
+    if (parts.size() == 2) {
+      size_t pos;
+      try {
+        std::stoi(parts[1], &pos);
+        if (pos == parts[1].length()) {
+          return parts[0];
+        }
+      } catch (...) {
+      }
+      return parts[0];
+    }
+    if (parts.size() >= 3) {
+      size_t pos;
+      try {
+        std::stoi(parts[parts.size() - 2], &pos);
+        if (pos == parts[parts.size() - 2].length()) {
+          std::string keyword;
+          for (size_t i = 0; i < parts.size() - 2; i++) {
+            if (i > 0) keyword += " ";
+            keyword += parts[i];
+          }
+          return keyword;
+        }
+      } catch (...) {
+      }
+      std::string keyword;
+      for (size_t i = 0; i < parts.size() - 1; i++) {
+        if (i > 0) keyword += " ";
+        keyword += parts[i];
+      }
+      return keyword;
+    }
+    return parts[0];
+  };
+  auto setDefaultIdf = [&](const std::vector<std::string>& dictLines) {
+    for (const auto& line : dictLines) {
+      std::string keyword = extractKeyword(line);
+      if (!keyword.empty()) {
+        _jieba_handle->SetIdfForWordWithMultiplier(keyword, 1.3);
+      }
+    }
+  };
   if (info[0].IsArray()) {
     Napi::Array arr = info[0].As<Napi::Array>();
     std::vector<std::string> buf;
     for (size_t i = 0; i < arr.Length(); i++) {
       Napi::Value val = arr[i];
-      if (val.IsString()) {
-        std::string line = val.As<Napi::String>().Utf8Value();
-        // 过滤空字符串，避免断言失败
-        if (!line.empty()) {
-          buf.push_back(line);
-        }
+      if (!val.IsString()) {
+        Napi::TypeError::New(info.Env(), "Array elements must be strings")
+          .ThrowAsJavaScriptException();
+        return Napi::Boolean::New(info.Env(), false);
+      }
+      std::string line = val.As<Napi::String>().Utf8Value();
+      trimString(line);
+      if (!line.empty() && !isBlankString(line)) {
+        buf.push_back(line);
       }
     }
     _jieba_handle->LoadUserDict(buf);
+    setDefaultIdf(buf);
   } else if (info[0].IsString()) {
-    // 支持传入单个词典条目字符串
     std::string line = info[0].As<Napi::String>().Utf8Value();
+    trimString(line);
     std::vector<std::string> buf;
-    // 过滤空字符串
-    if (!line.empty()) {
+    if (!line.empty() && !isBlankString(line)) {
       buf.push_back(line);
       _jieba_handle->LoadUserDict(buf);
+      setDefaultIdf(buf);
     }
   } else if (info[0].IsBuffer()) {
-    // 支持传入 Buffer，将其转换为字符串并按行分割
     Napi::Buffer<char> buffer = info[0].As<Napi::Buffer<char>>();
     std::string content(buffer.Data(), buffer.Length());
     std::vector<std::string> buf;
     std::istringstream iss(content);
     std::string line;
     while (std::getline(iss, line)) {
-      if (!line.empty()) {
+      trimString(line);
+      if (!line.empty() && !isBlankString(line)) {
         buf.push_back(line);
       }
     }
     _jieba_handle->LoadUserDict(buf);
+    setDefaultIdf(buf);
   } else {
     return Napi::Boolean::New(info.Env(), false);
   }

package/lib/nodejieba.h CHANGED Viewed

@@ -21,6 +21,7 @@ private:
   Napi::Value textRankExtract(const Napi::CallbackInfo& info);
   Napi::Value insertWord(const Napi::CallbackInfo& info);
   Napi::Value loadUserDict(const Napi::CallbackInfo& info);
+  Napi::Value setIdf(const Napi::CallbackInfo& info);
   cppjieba::Jieba* _jieba_handle{nullptr};
   cppjieba::TextRankExtractor* _text_rank_extractor_handle{nullptr};

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "nodejieba-plus",
   "description": "chinese word segmentation for node",
-  "version": "3.5.13",
+  "version": "3.5.17",
   "author": "Yanyi Wu <wuyanyi09@foxmail.com>",
   "maintainers": [
     "Yanyi Wu <wuyanyi09@foxmail.com>"

package/submodules/cppjieba/include/cppjieba/DictTrie.hpp CHANGED Viewed

@@ -10,6 +10,7 @@
 #include <stdint.h>
 #include <cmath>
 #include <limits>
+#include <algorithm>
 #include "limonp/StringUtil.hpp"
 #include "limonp/Logging.hpp"
 #include "Unicode.hpp"
@@ -32,7 +33,7 @@ class DictTrie {
     WordWeightMax,
   }; // enum UserWordWeightOption
-  DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
+  DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = WordWeightMedian) : trie_(NULL) {
     Init(dict_path, user_dict_paths, user_word_weight_opt);
   }
@@ -41,23 +42,84 @@ class DictTrie {
   }
   bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
-    DictUnit node_info;
-    if (!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) {
-      return false;
+    std::set<string> insertedWords;
+    insertedWords.insert(word);
+    bool hasSpace = (word.find(' ') != string::npos);
+    if (hasSpace) {
+      string wordNoSpace = word;
+      wordNoSpace.erase(remove(wordNoSpace.begin(), wordNoSpace.end(), ' '), wordNoSpace.end());
+      if (!wordNoSpace.empty() && wordNoSpace != word) {
+        insertedWords.insert(wordNoSpace);
+      }
+    }
+    string wordLower = ToLowerString(word);
+    if (wordLower != word) {
+      insertedWords.insert(wordLower);
+    }
+    if (hasSpace) {
+      string wordNoSpace = word;
+      wordNoSpace.erase(remove(wordNoSpace.begin(), wordNoSpace.end(), ' '), wordNoSpace.end());
+      if (!wordNoSpace.empty()) {
+        string wordNoSpaceLower = ToLowerString(wordNoSpace);
+        if (wordNoSpaceLower != wordNoSpace) {
+          insertedWords.insert(wordNoSpaceLower);
+        }
+      }
+    }
+    for (std::set<string>::const_iterator it = insertedWords.begin(); it != insertedWords.end(); ++it) {
+      DictUnit node_info;
+      if (!MakeNodeInfo(node_info, *it, user_word_default_weight_, tag)) {
+        continue;
+      }
+      active_node_infos_.push_back(node_info);
+      trie_->InsertNode(node_info.word, &active_node_infos_.back());
     }
-    active_node_infos_.push_back(node_info);
-    trie_->InsertNode(node_info.word, &active_node_infos_.back());
     return true;
   }
   bool InsertUserWord(const string& word,int freq, const string& tag = UNKNOWN_TAG) {
-    DictUnit node_info;
-    double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_ ;
-    if (!MakeNodeInfo(node_info, word, weight , tag)) {
-      return false;
+    double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_;
+    std::set<string> insertedWords;
+    insertedWords.insert(word);
+    bool hasSpace = (word.find(' ') != string::npos);
+    if (hasSpace) {
+      string wordNoSpace = word;
+      wordNoSpace.erase(remove(wordNoSpace.begin(), wordNoSpace.end(), ' '), wordNoSpace.end());
+      if (!wordNoSpace.empty() && wordNoSpace != word) {
+        insertedWords.insert(wordNoSpace);
+      }
+    }
+    string wordLower = ToLowerString(word);
+    if (wordLower != word) {
+      insertedWords.insert(wordLower);
+    }
+    if (hasSpace) {
+      string wordNoSpace = word;
+      wordNoSpace.erase(remove(wordNoSpace.begin(), wordNoSpace.end(), ' '), wordNoSpace.end());
+      if (!wordNoSpace.empty()) {
+        string wordNoSpaceLower = ToLowerString(wordNoSpace);
+        if (wordNoSpaceLower != wordNoSpace) {
+          insertedWords.insert(wordNoSpaceLower);
+        }
+      }
+    }
+    for (std::set<string>::const_iterator it = insertedWords.begin(); it != insertedWords.end(); ++it) {
+      DictUnit node_info;
+      if (!MakeNodeInfo(node_info, *it, weight, tag)) {
+        continue;
+      }
+      active_node_infos_.push_back(node_info);
+      trie_->InsertNode(node_info.word, &active_node_infos_.back());
     }
-    active_node_infos_.push_back(node_info);
-    trie_->InsertNode(node_info.word, &active_node_infos_.back());
     return true;
   }
@@ -112,26 +174,93 @@ class DictTrie {
     vector<string> buf;
     DictUnit node_info;
     Split(line, buf, " ");
-    if(buf.size() == 1){
-          MakeNodeInfo(node_info,
-                buf[0],
-                user_word_default_weight_,
-                UNKNOWN_TAG);
-        } else if (buf.size() == 2) {
-          MakeNodeInfo(node_info,
-                buf[0],
-                user_word_default_weight_,
-                buf[1]);
-        } else if (buf.size() == 3) {
-          int freq = atoi(buf[1].c_str());
-          assert(freq_sum_ > 0.0);
-          double weight = log(1.0 * freq / freq_sum_);
-          MakeNodeInfo(node_info, buf[0], weight, buf[2]);
+    string word;
+    string tag = UNKNOWN_TAG;
+    double weight = user_word_default_weight_;
+    bool hasSpace = false;
+    if (buf.size() == 1) {
+      word = buf[0];
+    } else if (buf.size() == 2) {
+      int freq = atoi(buf[1].c_str());
+      if (freq > 0) {
+        assert(freq_sum_ > 0.0);
+        weight = log(1.0 * freq / freq_sum_);
+        word = buf[0];
+      } else {
+        word = line;
+      }
+    } else if (buf.size() >= 3) {
+      bool isFreq = true;
+      for (char c : buf[buf.size() - 2]) {
+        if (!isdigit(c)) {
+          isFreq = false;
+          break;
         }
-        static_node_infos_.push_back(node_info);
-        if (node_info.word.size() == 1) {
-          user_dict_single_chinese_word_.insert(node_info.word[0]);
+      }
+      if (isFreq) {
+        int freq = atoi(buf[buf.size() - 2].c_str());
+        assert(freq_sum_ > 0.0);
+        weight = log(1.0 * freq / freq_sum_);
+        for (size_t i = 0; i < buf.size() - 2; ++i) {
+          if (i > 0) word += " ";
+          word += buf[i];
+        }
+        tag = buf[buf.size() - 1];
+      } else {
+        word = line;
+      }
+    }
+    hasSpace = (word.find(' ') != string::npos);
+    std::set<string> insertedWords;
+    insertedWords.insert(word);
+    if (hasSpace) {
+      string wordNoSpace = word;
+      wordNoSpace.erase(remove(wordNoSpace.begin(), wordNoSpace.end(), ' '), wordNoSpace.end());
+      if (!wordNoSpace.empty() && wordNoSpace != word) {
+        insertedWords.insert(wordNoSpace);
+      }
+    }
+    string wordLower = ToLowerString(word);
+    if (wordLower != word) {
+      insertedWords.insert(wordLower);
+    }
+    if (hasSpace) {
+      string wordNoSpace = word;
+      wordNoSpace.erase(remove(wordNoSpace.begin(), wordNoSpace.end(), ' '), wordNoSpace.end());
+      if (!wordNoSpace.empty()) {
+        string wordNoSpaceLower = ToLowerString(wordNoSpace);
+        if (wordNoSpaceLower != wordNoSpace) {
+          insertedWords.insert(wordNoSpaceLower);
+        }
+      }
+    }
+    for (std::set<string>::const_iterator it = insertedWords.begin(); it != insertedWords.end(); ++it) {
+      DictUnit temp_node_info;
+      if (MakeNodeInfo(temp_node_info, *it, weight, tag)) {
+        if (trie_) {
+          active_node_infos_.push_back(temp_node_info);
+          trie_->InsertNode(active_node_infos_.back().word, &active_node_infos_.back());
+          if (active_node_infos_.back().word.size() == 1) {
+            user_dict_single_chinese_word_.insert(active_node_infos_.back().word[0]);
+          }
+        } else {
+          static_node_infos_.push_back(temp_node_info);
+          if (temp_node_info.word.size() == 1) {
+            user_dict_single_chinese_word_.insert(temp_node_info.word[0]);
+          }
         }
+      }
+    }
   }
   void LoadUserDict(const vector<string>& buf) {
@@ -206,6 +335,16 @@ class DictTrie {
     return true;
   }
+  bool MakeNodeInfo(DictUnit& node_info,
+        const Unicode& word,
+        double weight,
+        const string& tag) {
+    node_info.word = word;
+    node_info.weight = weight;
+    node_info.tag = tag;
+    return true;
+  }
   void LoadDict(const string& filePath) {
     ifstream ifs(filePath.c_str());
     XCHECK(ifs.is_open()) << "open " << filePath << " failed.";

package/submodules/cppjieba/include/cppjieba/Jieba.hpp CHANGED Viewed

@@ -116,6 +116,14 @@ class Jieba {
     dict_trie_.LoadUserDict(path);
   }
+  void SetIdfForWord(const string& word, double idf) {
+    extractor.SetIdf(word, idf);
+  }
+  void SetIdfForWordWithMultiplier(const string& word, double multiplier = 2.0) {
+    extractor.SetIdfWithMultiplier(word, multiplier);
+  }
  private:
   static string pathJoin(const string& dir, const string& filename) {
     if (dir.empty()) {

package/submodules/cppjieba/include/cppjieba/KeywordExtractor.hpp CHANGED Viewed

@@ -39,6 +39,19 @@ class KeywordExtractor {
   ~KeywordExtractor() {
   }
+  void SetIdf(const string& word, double idf) {
+    idfMap_[word] = idf;
+  }
+  void SetIdfWithMultiplier(const string& word, double multiplier = 2.0) {
+    unordered_map<string, double>::const_iterator cit = idfMap_.find(word);
+    if (cit != idfMap_.end()) {
+      idfMap_[word] = cit->second * multiplier;
+    } else {
+      idfMap_[word] = idfAverage_ * multiplier;
+    }
+  }
   void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
     vector<Word> topWords;
     Extract(sentence, topWords, topN);
@@ -96,25 +109,33 @@ class KeywordExtractor {
     ifstream ifs(idfPath.c_str());
     XCHECK(ifs.is_open()) << "open " << idfPath << " failed";
     string line ;
-    vector<string> buf;
     double idf = 0.0;
     double idfSum = 0.0;
     size_t lineno = 0;
     for (; getline(ifs, line); lineno++) {
-      buf.clear();
       if (line.empty()) {
         XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
         continue;
       }
-      Split(line, buf, " ");
-      if (buf.size() != 2) {
-        XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped.";
+      size_t lastSpace = line.find_last_of(" \t");
+      if (lastSpace == string::npos) {
+        XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " format error. skipped.";
+        continue;
+      }
+      string word = line.substr(0, lastSpace);
+      string idfStr = line.substr(lastSpace + 1);
+      char* endptr;
+      idf = strtod(idfStr.c_str(), &endptr);
+      if (endptr == idfStr.c_str()) {
+        XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " idf format error. skipped.";
         continue;
       }
-      idf = atof(buf[1].c_str());
-      idfMap_[buf[0]] = idf;
+      idfMap_[word] = idf;
       idfSum += idf;
     }
     assert(lineno);

package/submodules/cppjieba/include/cppjieba/SegmentBase.hpp CHANGED Viewed

@@ -8,7 +8,7 @@
 namespace cppjieba {
-const char* const SPECIAL_SEPARATORS = " \t\n\xEF\xBC\x8C\xE3\x80\x82";
+const char* const SPECIAL_SEPARATORS = "\t\n\xEF\xBC\x8C\xE3\x80\x82";
 using namespace limonp;

package/submodules/cppjieba/include/cppjieba/Trie.hpp CHANGED Viewed

@@ -69,7 +69,8 @@ class Trie {
       if (NULL == ptNode->next) {
         return NULL;
       }
-      citer = ptNode->next->find(it->rune);
+      Rune searchRune = ToLowerRune(it->rune);
+      citer = ptNode->next->find(searchRune);
       if (ptNode->next->end() == citer) {
         return NULL;
       }
@@ -90,7 +91,7 @@ class Trie {
     for (size_t i = 0; i < size_t(end - begin); i++) {
       res[i].runestr = *(begin + i);
-      if (root_->next != NULL && root_->next->end() != (citer = root_->next->find(res[i].runestr.rune))) {
+      if (root_->next != NULL && root_->next->end() != (citer = root_->next->find(ToLowerRune(res[i].runestr.rune)))) {
         ptNode = citer->second;
       } else {
         ptNode = NULL;
@@ -105,7 +106,7 @@ class Trie {
         if (ptNode == NULL || ptNode->next == NULL) {
           break;
         }
-        citer = ptNode->next->find((begin + j)->rune);
+        citer = ptNode->next->find(ToLowerRune((begin + j)->rune));
         if (ptNode->next->end() == citer) {
           break;
         }
@@ -128,11 +129,12 @@ class Trie {
       if (NULL == ptNode->next) {
         ptNode->next = new TrieNode::NextMap;
       }
-      kmIter = ptNode->next->find(*citer);
+      Rune insertRune = ToLowerRune(*citer);
+      kmIter = ptNode->next->find(insertRune);
       if (ptNode->next->end() == kmIter) {
         TrieNode *nextNode = new TrieNode;
-        ptNode->next->insert(make_pair(*citer, nextNode));
+        ptNode->next->insert(make_pair(insertRune, nextNode));
         ptNode = nextNode;
       } else {
         ptNode = kmIter->second;
@@ -145,23 +147,18 @@ class Trie {
       if (key.begin() == key.end()) {
         return;
       }
-      //定义一个NextMap迭代器
       TrieNode::NextMap::const_iterator kmIter;
-      //定义一个指向root的TrieNode指针
       TrieNode *ptNode = root_;
       for (Unicode::const_iterator citer = key.begin(); citer != key.end(); ++citer) {
-        //链表不存在元素
         if (NULL == ptNode->next) {
           return;
         }
-        kmIter = ptNode->next->find(*citer);
-        //如果map中不存在,跳出循环
+        Rune deleteRune = ToLowerRune(*citer);
+        kmIter = ptNode->next->find(deleteRune);
         if (ptNode->next->end() == kmIter) {
               break;
         }
-        //从unordered_map中擦除该项
-        ptNode->next->erase(*citer);
-        //删除该node
+        ptNode->next->erase(deleteRune);
         ptNode = kmIter->second;
         delete ptNode;
         break;

package/submodules/cppjieba/include/cppjieba/Unicode.hpp CHANGED Viewed

@@ -222,6 +222,58 @@ inline void GetStringsFromWords(const vector<Word>& words, vector<string>& strs)
   }
 }
+inline Rune ToLowerRune(Rune r) {
+  if (r >= 'A' && r <= 'Z') {
+    return r + ('a' - 'A');
+  }
+  return r;
+}
+inline Rune ToUpperRune(Rune r) {
+  if (r >= 'a' && r <= 'z') {
+    return r - ('a' - 'A');
+  }
+  return r;
+}
+inline Unicode ToLowerUnicode(const Unicode& unicode) {
+  Unicode result;
+  result.reserve(unicode.size());
+  for (size_t i = 0; i < unicode.size(); i++) {
+    result.push_back(ToLowerRune(unicode[i]));
+  }
+  return result;
+}
+inline Unicode ToUpperUnicode(const Unicode& unicode) {
+  Unicode result;
+  result.reserve(unicode.size());
+  for (size_t i = 0; i < unicode.size(); i++) {
+    result.push_back(ToUpperRune(unicode[i]));
+  }
+  return result;
+}
+inline string ToLowerString(const string& s) {
+  string result = s;
+  for (size_t i = 0; i < result.size(); i++) {
+    if (result[i] >= 'A' && result[i] <= 'Z') {
+      result[i] = result[i] + ('a' - 'A');
+    }
+  }
+  return result;
+}
+inline string ToUpperString(const string& s) {
+  string result = s;
+  for (size_t i = 0; i < result.size(); i++) {
+    if (result[i] >= 'a' && result[i] <= 'z') {
+      result[i] = result[i] - ('a' - 'A');
+    }
+  }
+  return result;
+}
 } // namespace cppjieba
 #endif // CPPJIEBA_UNICODE_H