jieba_rb 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.gitmodules +3 -0
- data/.travis.yml +6 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +51 -0
- data/Rakefile +11 -0
- data/ext/cppjieba/.gitignore +17 -0
- data/ext/cppjieba/.travis.yml +22 -0
- data/ext/cppjieba/CMakeLists.txt +27 -0
- data/ext/cppjieba/ChangeLog.md +81 -0
- data/ext/cppjieba/Dockerfile +11 -0
- data/ext/cppjieba/LICENSE +20 -0
- data/ext/cppjieba/README.md +359 -0
- data/ext/cppjieba/conf/CMakeLists.txt +1 -0
- data/ext/cppjieba/conf/server.conf +16 -0
- data/ext/cppjieba/dict/CMakeLists.txt +1 -0
- data/ext/cppjieba/dict/README.md +31 -0
- data/ext/cppjieba/dict/extra_dict/jieba.dict.small.utf8 +109750 -0
- data/ext/cppjieba/dict/gbk_dict/hmm_model.gbk +34 -0
- data/ext/cppjieba/dict/gbk_dict/jieba.dict.gbk +348982 -0
- data/ext/cppjieba/dict/hmm_model.utf8 +34 -0
- data/ext/cppjieba/dict/idf.utf8 +258826 -0
- data/ext/cppjieba/dict/jieba.dict.utf8 +348982 -0
- data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
- data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
- data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
- data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
- data/ext/cppjieba/dict/stop_words.utf8 +1534 -0
- data/ext/cppjieba/dict/user.dict.utf8 +3 -0
- data/ext/cppjieba/script/CMakeLists.txt +1 -0
- data/ext/cppjieba/script/cjserver.start +12 -0
- data/ext/cppjieba/script/cjserver.stop +13 -0
- data/ext/cppjieba/server/CMakeLists.txt +9 -0
- data/ext/cppjieba/server/Husky/HttpReqInfo.hpp +294 -0
- data/ext/cppjieba/server/Husky/IRequestHandler.hpp +18 -0
- data/ext/cppjieba/server/Husky/ThreadPoolServer.hpp +108 -0
- data/ext/cppjieba/server/Husky/WorkerThread.hpp +133 -0
- data/ext/cppjieba/server/server.cpp +91 -0
- data/ext/cppjieba/src/DictTrie.hpp +211 -0
- data/ext/cppjieba/src/FullSegment.hpp +153 -0
- data/ext/cppjieba/src/HMMSegment.hpp +394 -0
- data/ext/cppjieba/src/ISegment.hpp +17 -0
- data/ext/cppjieba/src/KeywordExtractor.hpp +173 -0
- data/ext/cppjieba/src/Limonp/ArgvContext.hpp +84 -0
- data/ext/cppjieba/src/Limonp/BlockingQueue.hpp +128 -0
- data/ext/cppjieba/src/Limonp/BoundedQueue.hpp +73 -0
- data/ext/cppjieba/src/Limonp/CastFloat.hpp +90 -0
- data/ext/cppjieba/src/Limonp/Condition.hpp +48 -0
- data/ext/cppjieba/src/Limonp/Config.hpp +118 -0
- data/ext/cppjieba/src/Limonp/HandyMacro.hpp +31 -0
- data/ext/cppjieba/src/Limonp/InitOnOff.hpp +21 -0
- data/ext/cppjieba/src/Limonp/LocalVector.hpp +171 -0
- data/ext/cppjieba/src/Limonp/Logger.hpp +74 -0
- data/ext/cppjieba/src/Limonp/Md5.hpp +432 -0
- data/ext/cppjieba/src/Limonp/MutexLock.hpp +57 -0
- data/ext/cppjieba/src/Limonp/MysqlClient.hpp +125 -0
- data/ext/cppjieba/src/Limonp/NonCopyable.hpp +22 -0
- data/ext/cppjieba/src/Limonp/StdExtension.hpp +139 -0
- data/ext/cppjieba/src/Limonp/StringUtil.hpp +349 -0
- data/ext/cppjieba/src/Limonp/Thread.hpp +50 -0
- data/ext/cppjieba/src/Limonp/ThreadPool.hpp +105 -0
- data/ext/cppjieba/src/MPSegment.hpp +148 -0
- data/ext/cppjieba/src/MixSegment.hpp +121 -0
- data/ext/cppjieba/src/PosTagger.hpp +109 -0
- data/ext/cppjieba/src/QuerySegment.hpp +123 -0
- data/ext/cppjieba/src/SegmentBase.hpp +78 -0
- data/ext/cppjieba/src/TransCode.hpp +63 -0
- data/ext/cppjieba/src/Trie.hpp +298 -0
- data/ext/cppjieba/test/CMakeLists.txt +7 -0
- data/ext/cppjieba/test/keyword_demo.cpp +16 -0
- data/ext/cppjieba/test/load_test.cpp +56 -0
- data/ext/cppjieba/test/segment_demo.cpp +59 -0
- data/ext/cppjieba/test/servertest/go_load_test.sh +2 -0
- data/ext/cppjieba/test/servertest/load_test.py +91 -0
- data/ext/cppjieba/test/servertest/run_curl.sh +11 -0
- data/ext/cppjieba/test/tagging_demo.cpp +12 -0
- data/ext/cppjieba/test/testdata/curl.res +1 -0
- data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +93 -0
- data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +93 -0
- data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +67 -0
- data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +64 -0
- data/ext/cppjieba/test/testdata/load_test.urls +2 -0
- data/ext/cppjieba/test/testdata/review.100 +100 -0
- data/ext/cppjieba/test/testdata/review.100.res +200 -0
- data/ext/cppjieba/test/testdata/server.conf +13 -0
- data/ext/cppjieba/test/testdata/testlines.gbk +9 -0
- data/ext/cppjieba/test/testdata/testlines.utf8 +8 -0
- data/ext/cppjieba/test/testdata/userdict.utf8 +6 -0
- data/ext/cppjieba/test/testdata/weicheng.utf8 +247 -0
- data/ext/cppjieba/test/unittest/CMakeLists.txt +28 -0
- data/ext/cppjieba/test/unittest/TKeywordExtractor.cpp +18 -0
- data/ext/cppjieba/test/unittest/TPosTagger.cpp +43 -0
- data/ext/cppjieba/test/unittest/TSegments.cpp +187 -0
- data/ext/cppjieba/test/unittest/TTrie.cpp +80 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-death-test.h +283 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-message.h +230 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-param-test.h +1421 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-param-test.h.pump +487 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-printers.h +796 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-spi.h +232 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-test-part.h +176 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-typed-test.h +259 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest.h +2155 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest_pred_impl.h +358 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest_prod.h +58 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-death-test-internal.h +308 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-filepath.h +210 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-internal.h +1226 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-linked_ptr.h +233 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-param-util-generated.h +4822 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-param-util-generated.h.pump +301 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-param-util.h +619 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-port.h +1788 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-string.h +350 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-tuple.h +968 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-tuple.h.pump +336 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-type-util.h +3330 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-type-util.h.pump +296 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/src/.deps/.dirstamp +0 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/src/.deps/gtest-all.Plo +681 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/src/.deps/gtest_main.Plo +509 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/src/.dirstamp +0 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-all.cc +48 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-death-test.cc +1234 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-filepath.cc +380 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-internal-inl.h +1038 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-port.cc +746 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-printers.cc +356 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-test-part.cc +110 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-typed-test.cc +110 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest.cc +4898 -0
- data/ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest_main.cc +39 -0
- data/ext/cppjieba/test/unittest/gtest_main.cpp +39 -0
- data/ext/jieba/extconf.rb +26 -0
- data/ext/jieba/jieba.c +9 -0
- data/ext/jieba/jieba.h +9 -0
- data/ext/jieba/segment.cc +88 -0
- data/ext/jieba/segment.h +17 -0
- data/jieba_rb.gemspec +51 -0
- data/lib/jieba_rb/version.rb +3 -0
- data/lib/jieba_rb.rb +28 -0
- data/test/test_segment.rb +32 -0
- metadata +246 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
// Copyright 2006, Google Inc.
|
2
|
+
// All rights reserved.
|
3
|
+
//
|
4
|
+
// Redistribution and use in source and binary forms, with or without
|
5
|
+
// modification, are permitted provided that the following conditions are
|
6
|
+
// met:
|
7
|
+
//
|
8
|
+
// * Redistributions of source code must retain the above copyright
|
9
|
+
// notice, this list of conditions and the following disclaimer.
|
10
|
+
// * Redistributions in binary form must reproduce the above
|
11
|
+
// copyright notice, this list of conditions and the following disclaimer
|
12
|
+
// in the documentation and/or other materials provided with the
|
13
|
+
// distribution.
|
14
|
+
// * Neither the name of Google Inc. nor the names of its
|
15
|
+
// contributors may be used to endorse or promote products derived from
|
16
|
+
// this software without specific prior written permission.
|
17
|
+
//
|
18
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
|
30
|
+
#include <iostream>
|
31
|
+
|
32
|
+
#include "gtest/gtest.h"
|
33
|
+
|
34
|
+
GTEST_API_ int main(int argc, char **argv) {
|
35
|
+
std::cout << "Running main() from gtest_main.cc\n";
|
36
|
+
|
37
|
+
testing::InitGoogleTest(&argc, argv);
|
38
|
+
return RUN_ALL_TESTS();
|
39
|
+
}
|
@@ -0,0 +1,39 @@
|
|
1
|
+
// Copyright 2006, Google Inc.
|
2
|
+
// All rights reserved.
|
3
|
+
//
|
4
|
+
// Redistribution and use in source and binary forms, with or without
|
5
|
+
// modification, are permitted provided that the following conditions are
|
6
|
+
// met:
|
7
|
+
//
|
8
|
+
// * Redistributions of source code must retain the above copyright
|
9
|
+
// notice, this list of conditions and the following disclaimer.
|
10
|
+
// * Redistributions in binary form must reproduce the above
|
11
|
+
// copyright notice, this list of conditions and the following disclaimer
|
12
|
+
// in the documentation and/or other materials provided with the
|
13
|
+
// distribution.
|
14
|
+
// * Neither the name of Google Inc. nor the names of its
|
15
|
+
// contributors may be used to endorse or promote products derived from
|
16
|
+
// this software without specific prior written permission.
|
17
|
+
//
|
18
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
|
30
|
+
#include <iostream>
|
31
|
+
|
32
|
+
#include "gtest/gtest.h"
|
33
|
+
|
34
|
+
GTEST_API_ int main(int argc, char **argv) {
|
35
|
+
std::cout << "Running main() from gtest_main.cc\n";
|
36
|
+
|
37
|
+
testing::InitGoogleTest(&argc, argv);
|
38
|
+
return RUN_ALL_TESTS();
|
39
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require "mkmf"
|
2
|
+
abs = File.expand_path File.dirname(__FILE__)
|
3
|
+
|
4
|
+
LIBDIR = RbConfig::CONFIG['libdir']
|
5
|
+
INCLUDEDIR = RbConfig::CONFIG['includedir']
|
6
|
+
|
7
|
+
HEADER_DIRS = [
|
8
|
+
INCLUDEDIR,
|
9
|
+
"#{abs}/../cppjieba/src"
|
10
|
+
]
|
11
|
+
|
12
|
+
LIB_DIRS = [
|
13
|
+
|
14
|
+
LIBDIR
|
15
|
+
]
|
16
|
+
|
17
|
+
dir_config('cppjieba_src', HEADER_DIRS, LIB_DIRS)
|
18
|
+
|
19
|
+
CONFIG["CXXFLAGS"] += " -std=c++0x -O3"
|
20
|
+
$CXXFLAGS = "#{$CXXFLAGS} -std=c++0x -O3"
|
21
|
+
create_makefile 'jieba'
|
22
|
+
# respect header changes
|
23
|
+
headers = Dir.glob('*.{hpp,h}').join ' '
|
24
|
+
File.open 'Makefile', 'a' do |f|
|
25
|
+
f.puts "\n$(OBJS): #{headers}"
|
26
|
+
end
|
data/ext/jieba/jieba.c
ADDED
data/ext/jieba/jieba.h
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
#include "segment.h"
|
2
|
+
#include <ruby/encoding.h>
|
3
|
+
#include <MPSegment.hpp>
|
4
|
+
#include <HMMSegment.hpp>
|
5
|
+
#include <MixSegment.hpp>
|
6
|
+
|
7
|
+
static rb_encoding* u8_enc;
|
8
|
+
|
9
|
+
struct SegWrapper{
|
10
|
+
CppJieba::ISegment * segp;
|
11
|
+
};
|
12
|
+
static void seg_free(void *p){
|
13
|
+
delete ((SegWrapper*) p) -> segp;
|
14
|
+
delete (SegWrapper*)p;
|
15
|
+
}
|
16
|
+
|
17
|
+
static VALUE allocate(VALUE klass)
|
18
|
+
{
|
19
|
+
SegWrapper* seg_wrapper = new SegWrapper();
|
20
|
+
return Data_Wrap_Struct(klass, NULL, seg_free, seg_wrapper);
|
21
|
+
}
|
22
|
+
|
23
|
+
static void seg_init(VALUE self,
|
24
|
+
VALUE type_rb_sym,
|
25
|
+
VALUE jieba_dict_rbs,
|
26
|
+
VALUE hmm_dict_rbs,
|
27
|
+
VALUE user_dict_rbs)
|
28
|
+
{
|
29
|
+
SegWrapper* seg_wrapper;
|
30
|
+
Data_Get_Struct(self, SegWrapper, seg_wrapper);
|
31
|
+
|
32
|
+
Check_Type(jieba_dict_rbs, T_STRING);
|
33
|
+
Check_Type(hmm_dict_rbs, T_STRING);
|
34
|
+
Check_Type(user_dict_rbs, T_STRING);
|
35
|
+
|
36
|
+
std::string jieba_dict = StringValueCStr(jieba_dict_rbs);
|
37
|
+
std::string hmm_dict = StringValueCStr(hmm_dict_rbs);
|
38
|
+
std::string user_dict = StringValueCStr(user_dict_rbs);
|
39
|
+
|
40
|
+
ID type = SYM2ID(type_rb_sym);
|
41
|
+
if ( type == rb_intern("mix") )
|
42
|
+
{
|
43
|
+
seg_wrapper->segp = new CppJieba::MixSegment(jieba_dict, hmm_dict, user_dict);
|
44
|
+
}
|
45
|
+
else if ( type == rb_intern("hmm") )
|
46
|
+
{
|
47
|
+
seg_wrapper->segp = new CppJieba::HMMSegment(hmm_dict);
|
48
|
+
}
|
49
|
+
else if ( type == rb_intern("mp"))
|
50
|
+
{
|
51
|
+
seg_wrapper->segp = new CppJieba::MPSegment(jieba_dict);
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
static VALUE seg_cut(VALUE self, VALUE text_rbs)
|
56
|
+
{
|
57
|
+
Check_Type(text_rbs, T_STRING);
|
58
|
+
std::string text = StringValueCStr(text_rbs);
|
59
|
+
|
60
|
+
SegWrapper* seg_wrapper;
|
61
|
+
Data_Get_Struct(self, SegWrapper, seg_wrapper);
|
62
|
+
|
63
|
+
std::vector<std::string> words;
|
64
|
+
seg_wrapper->segp->cut(text, words);
|
65
|
+
|
66
|
+
volatile VALUE arr = rb_ary_new();
|
67
|
+
for (std::vector<std::string>::const_iterator j = words.begin(); j != words.end(); j++)
|
68
|
+
{
|
69
|
+
|
70
|
+
rb_ary_push(arr, rb_enc_str_new((*j).c_str(), (*j).length(), u8_enc));
|
71
|
+
|
72
|
+
}
|
73
|
+
return arr;
|
74
|
+
}
|
75
|
+
|
76
|
+
#define DEF(k,n,f,c) rb_define_method(k,n,RUBY_METHOD_FUNC(f),c)
|
77
|
+
|
78
|
+
extern "C" {
|
79
|
+
void Init_segment()
|
80
|
+
{
|
81
|
+
VALUE cSegment = rb_define_class_under(mJieba, "Segment", rb_cObject);
|
82
|
+
u8_enc = rb_utf8_encoding();
|
83
|
+
rb_define_alloc_func(cSegment, allocate);
|
84
|
+
DEF(cSegment, "_init",seg_init,4);
|
85
|
+
DEF(cSegment, "cut",seg_cut,1);
|
86
|
+
}
|
87
|
+
|
88
|
+
}
|
data/ext/jieba/segment.h
ADDED
data/jieba_rb.gemspec
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'jieba_rb/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "jieba_rb"
|
8
|
+
spec.version = JiebaRb::VERSION
|
9
|
+
spec.authors = ["Chris Li"]
|
10
|
+
spec.email = ["liqi8822@gmail.com"]
|
11
|
+
spec.summary = "cppjieba binding for ruby"
|
12
|
+
spec.description = "cppjieba binding for ruby"
|
13
|
+
spec.homepage = "https://github.com/altkatz/jieba_rb"
|
14
|
+
spec.required_ruby_version = ">=1.9.2"
|
15
|
+
spec.license = "MIT"
|
16
|
+
spec.extensions = ["ext/jieba/extconf.rb"]
|
17
|
+
|
18
|
+
spec.files = `git ls-files -z`.split("\x0")
|
19
|
+
relative_path = File.expand_path("../", __FILE__) + '/'
|
20
|
+
`git submodule --quiet foreach pwd`.split($\).each do |submodule_path|
|
21
|
+
if (ENV['OS'] == 'Windows_NT') && submodule_path[0] == '/'
|
22
|
+
# Detect if cygwin path is being used by git
|
23
|
+
submodule_path = submodule_path[1..-1]
|
24
|
+
submodule_path.insert(1, ':')
|
25
|
+
end
|
26
|
+
# for each submodule, change working directory to that submodule
|
27
|
+
Dir.chdir(submodule_path) do
|
28
|
+
# Make the submodule path relative
|
29
|
+
submodule_path = submodule_path.gsub(/#{relative_path}/i, '')
|
30
|
+
# issue git ls-files in submodule's directory
|
31
|
+
submodule_files = `git ls-files`.split($\)
|
32
|
+
|
33
|
+
# prepend the submodule path to create relative file paths
|
34
|
+
submodule_files_paths = submodule_files.map do |filename|
|
35
|
+
File.join(submodule_path, filename)
|
36
|
+
end
|
37
|
+
|
38
|
+
# add relative paths to gem.files
|
39
|
+
spec.files += submodule_files_paths
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
44
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
45
|
+
spec.require_paths = ["lib"]
|
46
|
+
|
47
|
+
spec.add_development_dependency "bundler", "~> 1.5"
|
48
|
+
spec.add_development_dependency "rake"
|
49
|
+
spec.add_development_dependency "rake-compiler"
|
50
|
+
spec.add_development_dependency "minitest"
|
51
|
+
end
|
data/lib/jieba_rb.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require "jieba_rb/version"
|
2
|
+
require "jieba"
|
3
|
+
module JiebaRb
|
4
|
+
class Segment
|
5
|
+
abs = File.expand_path File.dirname(__FILE__)
|
6
|
+
EXT_BASE = "#{abs}/../ext/cppjieba/"
|
7
|
+
JIEBA_DICT_FILE = EXT_BASE + "dict/jieba.dict.utf8";
|
8
|
+
HMM_DICT_FILE = EXT_BASE + "dict/hmm_model.utf8";
|
9
|
+
USER_DICT_FILE = EXT_BASE + "dict/user.dict.utf8";
|
10
|
+
|
11
|
+
private :_init
|
12
|
+
def initialize opts = {}
|
13
|
+
valid_seg_types = [:mix, :hmm, :mp]
|
14
|
+
if type = opts[:type]
|
15
|
+
raise "Type must be one of :mix :hmm :mp" unless valid_seg_types.include? type
|
16
|
+
else
|
17
|
+
type = :mix #default
|
18
|
+
end
|
19
|
+
|
20
|
+
jieba_dict = opts[:jieba_dict] || JIEBA_DICT_FILE
|
21
|
+
hmm_dict = opts[:hmm_dict] || HMM_DICT_FILE
|
22
|
+
user_dict = opts[:user_dict] || ""
|
23
|
+
user_dict = USER_DICT_FILE if user_dict == :default
|
24
|
+
|
25
|
+
_init type, jieba_dict, hmm_dict, user_dict
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'minitest/autorun'
|
3
|
+
require 'jieba_rb'
|
4
|
+
class JiebaTest < Minitest::Test
|
5
|
+
def test_mix_segment
|
6
|
+
seg = JiebaRb::Segment.new
|
7
|
+
words = seg.cut "我来到南京市长江大桥"
|
8
|
+
assert_equal %w(我 来到 南京市 长江大桥), words
|
9
|
+
|
10
|
+
words = seg.cut "令狐冲是云计算行业的专家"
|
11
|
+
assert_equal %w(令狐冲 是 云 计算 行业 的 专家), words
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_mix_segment_with_user_dict
|
15
|
+
seg = JiebaRb::Segment.new user_dict: :default
|
16
|
+
words = seg.cut "令狐冲是云计算行业的专家"
|
17
|
+
assert_equal %w(令狐冲 是 云计算 行业 的 专家), words
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_hmm_segment
|
21
|
+
seg = JiebaRb::Segment.new type: :hmm
|
22
|
+
words = seg.cut "令狐冲是云计算行业的专家"
|
23
|
+
assert_equal %w(令狐冲 是 云计算 行业 的 专家), words
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_max_prob_segment
|
27
|
+
seg = JiebaRb::Segment.new type: :mp
|
28
|
+
words = seg.cut "令狐冲是云计算行业的专家"
|
29
|
+
assert_equal %w(令狐冲 是 云 计算 行业 的 专家), words
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
metadata
ADDED
@@ -0,0 +1,246 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jieba_rb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Chris Li
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-12-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.5'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake-compiler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: cppjieba binding for ruby
|
70
|
+
email:
|
71
|
+
- liqi8822@gmail.com
|
72
|
+
executables: []
|
73
|
+
extensions:
|
74
|
+
- ext/jieba/extconf.rb
|
75
|
+
extra_rdoc_files: []
|
76
|
+
files:
|
77
|
+
- .gitignore
|
78
|
+
- .gitmodules
|
79
|
+
- .travis.yml
|
80
|
+
- Gemfile
|
81
|
+
- LICENSE.txt
|
82
|
+
- README.md
|
83
|
+
- Rakefile
|
84
|
+
- ext/jieba/extconf.rb
|
85
|
+
- ext/jieba/jieba.c
|
86
|
+
- ext/jieba/jieba.h
|
87
|
+
- ext/jieba/segment.cc
|
88
|
+
- ext/jieba/segment.h
|
89
|
+
- jieba_rb.gemspec
|
90
|
+
- lib/jieba_rb.rb
|
91
|
+
- lib/jieba_rb/version.rb
|
92
|
+
- test/test_segment.rb
|
93
|
+
- ext/cppjieba/.gitignore
|
94
|
+
- ext/cppjieba/.travis.yml
|
95
|
+
- ext/cppjieba/CMakeLists.txt
|
96
|
+
- ext/cppjieba/ChangeLog.md
|
97
|
+
- ext/cppjieba/Dockerfile
|
98
|
+
- ext/cppjieba/LICENSE
|
99
|
+
- ext/cppjieba/README.md
|
100
|
+
- ext/cppjieba/conf/CMakeLists.txt
|
101
|
+
- ext/cppjieba/conf/server.conf
|
102
|
+
- ext/cppjieba/dict/CMakeLists.txt
|
103
|
+
- ext/cppjieba/dict/README.md
|
104
|
+
- ext/cppjieba/dict/extra_dict/jieba.dict.small.utf8
|
105
|
+
- ext/cppjieba/dict/gbk_dict/hmm_model.gbk
|
106
|
+
- ext/cppjieba/dict/gbk_dict/jieba.dict.gbk
|
107
|
+
- ext/cppjieba/dict/hmm_model.utf8
|
108
|
+
- ext/cppjieba/dict/idf.utf8
|
109
|
+
- ext/cppjieba/dict/jieba.dict.utf8
|
110
|
+
- ext/cppjieba/dict/pos_dict/char_state_tab.utf8
|
111
|
+
- ext/cppjieba/dict/pos_dict/prob_emit.utf8
|
112
|
+
- ext/cppjieba/dict/pos_dict/prob_start.utf8
|
113
|
+
- ext/cppjieba/dict/pos_dict/prob_trans.utf8
|
114
|
+
- ext/cppjieba/dict/stop_words.utf8
|
115
|
+
- ext/cppjieba/dict/user.dict.utf8
|
116
|
+
- ext/cppjieba/script/CMakeLists.txt
|
117
|
+
- ext/cppjieba/script/cjserver.start
|
118
|
+
- ext/cppjieba/script/cjserver.stop
|
119
|
+
- ext/cppjieba/server/CMakeLists.txt
|
120
|
+
- ext/cppjieba/server/Husky/HttpReqInfo.hpp
|
121
|
+
- ext/cppjieba/server/Husky/IRequestHandler.hpp
|
122
|
+
- ext/cppjieba/server/Husky/ThreadPoolServer.hpp
|
123
|
+
- ext/cppjieba/server/Husky/WorkerThread.hpp
|
124
|
+
- ext/cppjieba/server/server.cpp
|
125
|
+
- ext/cppjieba/src/DictTrie.hpp
|
126
|
+
- ext/cppjieba/src/FullSegment.hpp
|
127
|
+
- ext/cppjieba/src/HMMSegment.hpp
|
128
|
+
- ext/cppjieba/src/ISegment.hpp
|
129
|
+
- ext/cppjieba/src/KeywordExtractor.hpp
|
130
|
+
- ext/cppjieba/src/Limonp/ArgvContext.hpp
|
131
|
+
- ext/cppjieba/src/Limonp/BlockingQueue.hpp
|
132
|
+
- ext/cppjieba/src/Limonp/BoundedQueue.hpp
|
133
|
+
- ext/cppjieba/src/Limonp/CastFloat.hpp
|
134
|
+
- ext/cppjieba/src/Limonp/Condition.hpp
|
135
|
+
- ext/cppjieba/src/Limonp/Config.hpp
|
136
|
+
- ext/cppjieba/src/Limonp/HandyMacro.hpp
|
137
|
+
- ext/cppjieba/src/Limonp/InitOnOff.hpp
|
138
|
+
- ext/cppjieba/src/Limonp/LocalVector.hpp
|
139
|
+
- ext/cppjieba/src/Limonp/Logger.hpp
|
140
|
+
- ext/cppjieba/src/Limonp/Md5.hpp
|
141
|
+
- ext/cppjieba/src/Limonp/MutexLock.hpp
|
142
|
+
- ext/cppjieba/src/Limonp/MysqlClient.hpp
|
143
|
+
- ext/cppjieba/src/Limonp/NonCopyable.hpp
|
144
|
+
- ext/cppjieba/src/Limonp/StdExtension.hpp
|
145
|
+
- ext/cppjieba/src/Limonp/StringUtil.hpp
|
146
|
+
- ext/cppjieba/src/Limonp/Thread.hpp
|
147
|
+
- ext/cppjieba/src/Limonp/ThreadPool.hpp
|
148
|
+
- ext/cppjieba/src/MPSegment.hpp
|
149
|
+
- ext/cppjieba/src/MixSegment.hpp
|
150
|
+
- ext/cppjieba/src/PosTagger.hpp
|
151
|
+
- ext/cppjieba/src/QuerySegment.hpp
|
152
|
+
- ext/cppjieba/src/SegmentBase.hpp
|
153
|
+
- ext/cppjieba/src/TransCode.hpp
|
154
|
+
- ext/cppjieba/src/Trie.hpp
|
155
|
+
- ext/cppjieba/test/CMakeLists.txt
|
156
|
+
- ext/cppjieba/test/keyword_demo.cpp
|
157
|
+
- ext/cppjieba/test/load_test.cpp
|
158
|
+
- ext/cppjieba/test/segment_demo.cpp
|
159
|
+
- ext/cppjieba/test/servertest/go_load_test.sh
|
160
|
+
- ext/cppjieba/test/servertest/load_test.py
|
161
|
+
- ext/cppjieba/test/servertest/run_curl.sh
|
162
|
+
- ext/cppjieba/test/tagging_demo.cpp
|
163
|
+
- ext/cppjieba/test/testdata/curl.res
|
164
|
+
- ext/cppjieba/test/testdata/jieba.dict.0.1.utf8
|
165
|
+
- ext/cppjieba/test/testdata/jieba.dict.0.utf8
|
166
|
+
- ext/cppjieba/test/testdata/jieba.dict.1.utf8
|
167
|
+
- ext/cppjieba/test/testdata/jieba.dict.2.utf8
|
168
|
+
- ext/cppjieba/test/testdata/load_test.urls
|
169
|
+
- ext/cppjieba/test/testdata/review.100
|
170
|
+
- ext/cppjieba/test/testdata/review.100.res
|
171
|
+
- ext/cppjieba/test/testdata/server.conf
|
172
|
+
- ext/cppjieba/test/testdata/testlines.gbk
|
173
|
+
- ext/cppjieba/test/testdata/testlines.utf8
|
174
|
+
- ext/cppjieba/test/testdata/userdict.utf8
|
175
|
+
- ext/cppjieba/test/testdata/weicheng.utf8
|
176
|
+
- ext/cppjieba/test/unittest/CMakeLists.txt
|
177
|
+
- ext/cppjieba/test/unittest/TKeywordExtractor.cpp
|
178
|
+
- ext/cppjieba/test/unittest/TPosTagger.cpp
|
179
|
+
- ext/cppjieba/test/unittest/TSegments.cpp
|
180
|
+
- ext/cppjieba/test/unittest/TTrie.cpp
|
181
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-death-test.h
|
182
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-message.h
|
183
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-param-test.h
|
184
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-param-test.h.pump
|
185
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-printers.h
|
186
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-spi.h
|
187
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-test-part.h
|
188
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest-typed-test.h
|
189
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest.h
|
190
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest_pred_impl.h
|
191
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/gtest_prod.h
|
192
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-death-test-internal.h
|
193
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-filepath.h
|
194
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-internal.h
|
195
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-linked_ptr.h
|
196
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-param-util-generated.h
|
197
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-param-util-generated.h.pump
|
198
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-param-util.h
|
199
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-port.h
|
200
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-string.h
|
201
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-tuple.h
|
202
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-tuple.h.pump
|
203
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-type-util.h
|
204
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/include/gtest/internal/gtest-type-util.h.pump
|
205
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/src/.deps/.dirstamp
|
206
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/src/.deps/gtest-all.Plo
|
207
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/src/.deps/gtest_main.Plo
|
208
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/src/.dirstamp
|
209
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-all.cc
|
210
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-death-test.cc
|
211
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-filepath.cc
|
212
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-internal-inl.h
|
213
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-port.cc
|
214
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-printers.cc
|
215
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-test-part.cc
|
216
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest-typed-test.cc
|
217
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest.cc
|
218
|
+
- ext/cppjieba/test/unittest/gtest-1.6.0/src/gtest_main.cc
|
219
|
+
- ext/cppjieba/test/unittest/gtest_main.cpp
|
220
|
+
homepage: https://github.com/altkatz/jieba_rb
|
221
|
+
licenses:
|
222
|
+
- MIT
|
223
|
+
metadata: {}
|
224
|
+
post_install_message:
|
225
|
+
rdoc_options: []
|
226
|
+
require_paths:
|
227
|
+
- lib
|
228
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
229
|
+
requirements:
|
230
|
+
- - '>='
|
231
|
+
- !ruby/object:Gem::Version
|
232
|
+
version: 1.9.2
|
233
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
234
|
+
requirements:
|
235
|
+
- - '>='
|
236
|
+
- !ruby/object:Gem::Version
|
237
|
+
version: '0'
|
238
|
+
requirements: []
|
239
|
+
rubyforge_project:
|
240
|
+
rubygems_version: 2.1.11
|
241
|
+
signing_key:
|
242
|
+
specification_version: 4
|
243
|
+
summary: cppjieba binding for ruby
|
244
|
+
test_files:
|
245
|
+
- test/test_segment.rb
|
246
|
+
has_rdoc:
|