kytea 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 950e979e65cd669a8f1bdd21d57d77650866a569
4
+ data.tar.gz: b45f60d9924e12d146bbbf1b01cafb01ce3b1069
5
+ SHA512:
6
+ metadata.gz: 143e6b0eafc9785203dc610e3d68feee84b0314e3be81619966e400c1fdb4764169bdf24a318f9eae1d44d27b76d53bff1edfef4c3052ea1623b7af7587a6e49
7
+ data.tar.gz: dadb7e2235d8561b6675d41122e9f0243ff893d1de1baf1dfa55495f907b3544da1e4edd83db60a285ff519f7a7c54c03ba6f32b4e11f801bfff09b05bb5fc22
data/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ *.o
2
+ *.bundle
3
+ Makefile
4
+ /.bundle/
5
+ /.yardoc
6
+ /Gemfile.lock
7
+ /_yardoc/
8
+ /coverage/
9
+ /doc/
10
+ /pkg/
11
+ /spec/reports/
12
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,16 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.2
4
+
5
+ before_install:
6
+ - sudo apt-get install swig
7
+ - wget http://www.phontron.com/kytea/download/kytea-0.4.7.tar.gz
8
+ - tar zxf kytea-0.4.7.tar.gz
9
+ - pushd kytea-0.4.7 && ./configure && make && sudo make install && popd
10
+ - rm -r kytea-0.4.7
11
+ - sudo ldconfig
12
+
13
+ install:
14
+ - pushd ext && swig -Wall -c++ -ruby -I/usr/local/include mykytea.i && ruby extconf.rb && make && popd
15
+ - bundle install
16
+ - bundle exec rake install
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in kytea.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Michiaki Ariga
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,61 @@
1
+ KyTea wrapper for ruby
2
+ ==========================
3
+
4
+ Mykytea-ruby is a ruby wrapper module for KyTea, a general text analysis toolkit.
5
+ KyTea is developed by KyTea Development Team
6
+
7
+ Detailed information of KyTea can be found at
8
+ http://www.phontron.com/kytea
9
+
10
+ Install Dependencies
11
+ --------------------
12
+
13
+ You need to install KyTea before build.
14
+
15
+ To build Mykytea-ruby, run
16
+ --------------------
17
+
18
+ % ruby ext/extconf.rb
19
+ % make
20
+
21
+ (If you want to install, run)
22
+
23
+ % sudo make install
24
+
25
+ (If you fail to make, please try to install SWIG and run)
26
+
27
+ % swig -c++ -ruby -I/usr/local/include ext/mykytea.i
28
+
29
+ How to use?
30
+ --------------------
31
+
32
+ ```
33
+ require 'kytea'
34
+
35
+ # You can write original kytea option
36
+ opt = "-model /usr/local/share/kytea/model.bin"
37
+ kytea = KyTea.new(opt)
38
+
39
+ str = "今日はいい天気です。"
40
+
41
+ kytea.segment(str)
42
+ #=> ["今日", "は", "い", "い", "天気", "で", "す", "。"]
43
+
44
+ kytea.tag_info_of(str)
45
+ #=> "今日/名詞/きょう は/助詞/は い/形容詞/い い/語尾/い 天気/名詞/てんき で/助動詞/で す/語尾/す 。/補助記号/。 "
46
+
47
+ kytea.tags_of(str)
48
+ #=> [{:surface=>"今日", :tags=>[[{:tag=>"名詞", :val=>3.610404674503611}], [{:tag=>"きょう", :val=>1.0726515803715995}]]},
49
+ # {:surface=>"は", :tags=>[[{:tag=>"助詞", :val=>3.5500698037485963}], [{:tag=>"は", :val=>100.0}]]},
50
+ # {:surface=>"い", :tags=>[[{:tag=>"形容詞", :val=>2.5966088884369523}], [{:tag=>"い", :val=>100.0}]]},
51
+ # {:surface=>"い", :tags=>[[{:tag=>"語尾", :val=>2.7064013574728385}], [{:tag=>"い", :val=>100.0}]]},
52
+ # {:surface=>"天気", :tags=>[[{:tag=>"名詞", :val=>4.220721634732509}], [{:tag=>"てんき", :val=>100.0}]]},
53
+ # {:surface=>"で", :tags=>[[{:tag=>"助動詞", :val=>2.9093304720685786}], [{:tag=>"で", :val=>0.99994530321086}]]},
54
+ # {:surface=>"す", :tags=>[[{:tag=>"語尾", :val=>2.5160490891753264}], [{:tag=>"す", :val=>0.9998735552127426}]]},
55
+ # {:surface=>"。", :tags=>[[{:tag=>"補助記号", :val=>3.070959942739055}], [{:tag=>"。", :val=>100.0}]]}]
56
+ ```
57
+
58
+
59
+ License
60
+ --------------------
61
+ MIT License
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+ require 'rake/clean'
4
+
5
+ CLEAN.include('ext/**/*{.o,.log,.so,.bundle}')
6
+ CLEAN.include('ext/**/Makefile')
7
+ CLOBBER.include('lib/*{.so,.bundle}')
8
+
9
+ RSpec::Core::RakeTask.new(:spec)
10
+
11
+ task :default => :spec
data/ext/extconf.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ $libs += " -lstdc++ -lkytea"
3
+ create_makefile("Mykytea")
data/ext/mykytea.cpp ADDED
@@ -0,0 +1,134 @@
1
+ // mykytea.cpp
2
+ #include <iostream>
3
+ #include <cstring>
4
+ #include "mykytea.hpp"
5
+
6
+ const int MAX_LEN = 256;
7
+
8
+ int split_argv(char* input, const char* configs[]){
9
+ int len;
10
+ const char *delim = " ";
11
+
12
+ char *cp = (char *)malloc(strlen(input) + 1);
13
+ strcpy(cp, input);
14
+ configs[0] = "";
15
+ for(len = 0; len < MAX_LEN; len++){
16
+ if((configs[len + 1] = std::strtok(cp, delim)) == NULL )
17
+ break;
18
+ cp = NULL;
19
+ }
20
+ return len + 1;
21
+ }
22
+
23
+ Mykytea::Mykytea(char* str)
24
+ {
25
+ const char* configs[MAX_LEN + 1];
26
+ int len = split_argv(str, configs);
27
+
28
+ config = new KyteaConfig;
29
+ config->setDebug(0);
30
+ config->setOnTraining(false);
31
+ config->parseRunCommandLine(len, configs);
32
+
33
+ kytea = new Kytea(config);
34
+ kytea->readModel(config->getModelFile().c_str());
35
+ util = kytea->getStringUtil();
36
+ }
37
+
38
+ Mykytea::~Mykytea()
39
+ {
40
+ if(kytea != NULL) delete kytea;
41
+ }
42
+
43
+ vector<string>* Mykytea::getWS(string str){
44
+ vector<string>* vec = new vector<string>;
45
+ KyteaString surface_string = util->mapString(str);
46
+ KyteaSentence sentence(surface_string, util->normalize(surface_string));
47
+ kytea->calculateWS(sentence);
48
+
49
+ const KyteaSentence::Words & words = sentence.words;
50
+
51
+ for(int i = 0; i < (int)words.size(); i++) {
52
+ (*vec).push_back(util->showString(words[i].surface));
53
+ }
54
+ return vec;
55
+ }
56
+
57
+ vector<Tags>* Mykytea::getTags(string str){
58
+ vector<Tags>* ret_words = new vector<Tags>;
59
+
60
+ KyteaString surface_string = util->mapString(str);
61
+ KyteaSentence sentence(surface_string, util->normalize(surface_string));
62
+ kytea->calculateWS(sentence);
63
+
64
+ for(int i = 0; i < config->getNumTags(); i++)
65
+ kytea->calculateTags(sentence,i);
66
+
67
+ const KyteaSentence::Words & words = sentence.words;
68
+
69
+ for(int i = 0; i < (int)words.size(); i++) {
70
+ tags vec_tag;
71
+ for(int j = 0; j < (int)words[i].tags.size(); j++) {
72
+ vector< pair<string, double> > vec_tmp;
73
+ for(int k = 0; k < 1; k++) {
74
+ vec_tmp.push_back( make_pair(util->showString(words[i].tags[j][k].first), words[i].tags[j][k].second) );
75
+ }
76
+ vec_tag.push_back( vec_tmp );
77
+ }
78
+ struct Tags t = { util->showString(words[i].surface), vec_tag };
79
+ (*ret_words).push_back( t );
80
+ }
81
+ return ret_words;
82
+ }
83
+
84
+ vector<Tags>* Mykytea::getAllTags(string str){
85
+ vector<Tags>* ret_words = new vector<Tags>;
86
+
87
+ KyteaString surface_string = util->mapString(str);
88
+ KyteaSentence sentence(surface_string, util->normalize(surface_string));
89
+ kytea->calculateWS(sentence);
90
+
91
+ for(int i = 0; i < config->getNumTags(); i++)
92
+ kytea->calculateTags(sentence,i);
93
+
94
+ const KyteaSentence::Words & words = sentence.words;
95
+
96
+ for(int i = 0; i < (int)words.size(); i++) {
97
+ tags vec_tag;
98
+ for(int j = 0; j < (int)words[i].tags.size(); j++) {
99
+ vector< pair<string, double> > vec_tmp;
100
+ for(int k = 0; k < (int)words[i].tags[j].size(); k++) {
101
+ vec_tmp.push_back( make_pair(util->showString(words[i].tags[j][k].first), words[i].tags[j][k].second) );
102
+ }
103
+ vec_tag.push_back( vec_tmp );
104
+ }
105
+ struct Tags t = { util->showString(words[i].surface), vec_tag };
106
+ (*ret_words).push_back( t );
107
+ }
108
+ return ret_words;
109
+ }
110
+
111
+ string Mykytea::getTagsToString(string str)
112
+ {
113
+ KyteaString surface_string = util->mapString(str);
114
+ KyteaSentence sentence(surface_string, util->normalize(surface_string));
115
+ kytea->calculateWS(sentence);
116
+
117
+ for(int i = 0; i < config->getNumTags(); i++)
118
+ kytea->calculateTags(sentence,i);
119
+
120
+ const KyteaSentence::Words & words = sentence.words;
121
+
122
+ string ret_str;
123
+ for(int i = 0; i < (int)words.size(); i++) {
124
+ ret_str += util->showString(words[i].surface);
125
+ for(int j = 0; j < (int)words[i].tags.size(); j++) {
126
+ for(int k = 0; k < 1; k++) {
127
+ ret_str += "/";
128
+ ret_str += util->showString(words[i].tags[j][k].first);
129
+ }
130
+ }
131
+ ret_str += " ";
132
+ }
133
+ return ret_str;
134
+ }
data/ext/mykytea.hpp ADDED
@@ -0,0 +1,52 @@
1
+ // mykytea.hpp
2
+ #ifndef MYKYTEA_HPP
3
+ #define MYKYTEA_HPP
4
+
5
+ #include <vector>
6
+ #include <string>
7
+ #include <map>
8
+ #include <cstdlib>
9
+
10
+ #include <kytea/kytea.h>
11
+ #include <kytea/kytea-struct.h>
12
+ #include <kytea/string-util.h>
13
+
14
+ using namespace std;
15
+ using namespace kytea;
16
+
17
+ typedef vector< vector< pair<string, double> > > tags;
18
+
19
+ struct Tags{
20
+ string surface;
21
+ tags tag;
22
+ };
23
+
24
+ class Mykytea
25
+ {
26
+ public:
27
+
28
+ Mykytea(char* str);
29
+ ~Mykytea();
30
+
31
+ //単語境界を取得する
32
+
33
+ vector<string>* getWS(string str);
34
+
35
+ //尤もらしいタグを取得する
36
+
37
+ vector<Tags>* getTags(string str);
38
+
39
+ //すべてのタグを取得する
40
+
41
+ vector<Tags>* getAllTags(string str);
42
+
43
+ //タグを文字列で取得する
44
+
45
+ string getTagsToString(string str);
46
+
47
+ private:
48
+ Kytea* kytea;
49
+ StringUtil* util;
50
+ KyteaConfig* config;
51
+ };
52
+ #endif
data/ext/mykytea.i ADDED
@@ -0,0 +1,37 @@
1
+ %module Mykytea
2
+ %include "stl.i"
3
+ %include "exception.i"
4
+
5
+
6
+ %{
7
+ #include "mykytea.hpp"
8
+ %}
9
+
10
+
11
+ namespace std {
12
+ %template(StringVector) vector<string>;
13
+ %template(Pairsd) pair<string, double>;
14
+ %template(PairVector) vector< pair<string, double> >;
15
+ %template(PairVectorVector) vector< vector< pair<string, double> > >;
16
+ %template(TagsVector) vector<Tags>;
17
+
18
+ }
19
+
20
+ %newobject getWS;
21
+ %newobject getTags;
22
+ %newobject getAllTags;
23
+
24
+ %exception{
25
+ try{
26
+ $action
27
+ } catch (const std::exception &e){
28
+ SWIG_exception(SWIG_RuntimeError, e.what() );
29
+ } catch (...) {
30
+ SWIG_exception(SWIG_UnknownError, "Unknown exception");
31
+ }
32
+ }
33
+
34
+ %include kytea/kytea.h
35
+ %include kytea/kytea-struct.h
36
+ %include mykytea.hpp
37
+