kytea 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 950e979e65cd669a8f1bdd21d57d77650866a569
4
+ data.tar.gz: b45f60d9924e12d146bbbf1b01cafb01ce3b1069
5
+ SHA512:
6
+ metadata.gz: 143e6b0eafc9785203dc610e3d68feee84b0314e3be81619966e400c1fdb4764169bdf24a318f9eae1d44d27b76d53bff1edfef4c3052ea1623b7af7587a6e49
7
+ data.tar.gz: dadb7e2235d8561b6675d41122e9f0243ff893d1de1baf1dfa55495f907b3544da1e4edd83db60a285ff519f7a7c54c03ba6f32b4e11f801bfff09b05bb5fc22
data/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ *.o
2
+ *.bundle
3
+ Makefile
4
+ /.bundle/
5
+ /.yardoc
6
+ /Gemfile.lock
7
+ /_yardoc/
8
+ /coverage/
9
+ /doc/
10
+ /pkg/
11
+ /spec/reports/
12
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,16 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.2
4
+
5
+ before_install:
6
+ - sudo apt-get install swig
7
+ - wget http://www.phontron.com/kytea/download/kytea-0.4.7.tar.gz
8
+ - tar zxf kytea-0.4.7.tar.gz
9
+ - pushd kytea-0.4.7 && ./configure && make && sudo make install && popd
10
+ - rm -r kytea-0.4.7
11
+ - sudo ldconfig
12
+
13
+ install:
14
+ - pushd ext && swig -Wall -c++ -ruby -I/usr/local/include mykytea.i && ruby extconf.rb && make && popd
15
+ - bundle install
16
+ - bundle exec rake install
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in kytea.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Michiaki Ariga
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,61 @@
1
+ KyTea wrapper for ruby
2
+ ==========================
3
+
4
+ Mykytea-ruby is a ruby wrapper module for KyTea, a general text analysis toolkit.
5
+ KyTea is developed by KyTea Development Team
6
+
7
+ Detailed information of KyTea can be found at
8
+ http://www.phontron.com/kytea
9
+
10
+ Install Dependencies
11
+ --------------------
12
+
13
+ You need to install KyTea before build.
14
+
15
+ To build Mykytea-ruby, run
16
+ --------------------
17
+
18
+ % ruby ext/extconf.rb
19
+ % make
20
+
21
+ (If you want to install, run)
22
+
23
+ % sudo make install
24
+
25
+ (If you fail to make, please try to install SWIG and run)
26
+
27
+ % swig -c++ -ruby -I/usr/local/include ext/mykytea.i
28
+
29
+ How to use?
30
+ --------------------
31
+
32
+ ```
33
+ require 'kytea'
34
+
35
+ # You can write original kytea option
36
+ opt = "-model /usr/local/share/kytea/model.bin"
37
+ kytea = KyTea.new(opt)
38
+
39
+ str = "今日はいい天気です。"
40
+
41
+ kytea.segment(str)
42
+ #=> ["今日", "は", "い", "い", "天気", "で", "す", "。"]
43
+
44
+ kytea.tag_info_of(str)
45
+ #=> "今日/名詞/きょう は/助詞/は い/形容詞/い い/語尾/い 天気/名詞/てんき で/助動詞/で す/語尾/す 。/補助記号/。 "
46
+
47
+ kytea.tags_of(str)
48
+ #=> [{:surface=>"今日", :tags=>[[{:tag=>"名詞", :val=>3.610404674503611}], [{:tag=>"きょう", :val=>1.0726515803715995}]]},
49
+ # {:surface=>"は", :tags=>[[{:tag=>"助詞", :val=>3.5500698037485963}], [{:tag=>"は", :val=>100.0}]]},
50
+ # {:surface=>"い", :tags=>[[{:tag=>"形容詞", :val=>2.5966088884369523}], [{:tag=>"い", :val=>100.0}]]},
51
+ # {:surface=>"い", :tags=>[[{:tag=>"語尾", :val=>2.7064013574728385}], [{:tag=>"い", :val=>100.0}]]},
52
+ # {:surface=>"天気", :tags=>[[{:tag=>"名詞", :val=>4.220721634732509}], [{:tag=>"てんき", :val=>100.0}]]},
53
+ # {:surface=>"で", :tags=>[[{:tag=>"助動詞", :val=>2.9093304720685786}], [{:tag=>"で", :val=>0.99994530321086}]]},
54
+ # {:surface=>"す", :tags=>[[{:tag=>"語尾", :val=>2.5160490891753264}], [{:tag=>"す", :val=>0.9998735552127426}]]},
55
+ # {:surface=>"。", :tags=>[[{:tag=>"補助記号", :val=>3.070959942739055}], [{:tag=>"。", :val=>100.0}]]}]
56
+ ```
57
+
58
+
59
+ License
60
+ --------------------
61
+ MIT License
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+ require 'rake/clean'
4
+
5
+ CLEAN.include('ext/**/*{.o,.log,.so,.bundle}')
6
+ CLEAN.include('ext/**/Makefile')
7
+ CLOBBER.include('lib/*{.so,.bundle}')
8
+
9
+ RSpec::Core::RakeTask.new(:spec)
10
+
11
+ task :default => :spec
data/ext/extconf.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ $libs += " -lstdc++ -lkytea"
3
+ create_makefile("Mykytea")
data/ext/mykytea.cpp ADDED
@@ -0,0 +1,134 @@
1
+ // mykytea.cpp
2
+ #include <iostream>
3
+ #include <cstring>
4
+ #include "mykytea.hpp"
5
+
6
+ const int MAX_LEN = 256;
7
+
8
+ int split_argv(char* input, const char* configs[]){
9
+ int len;
10
+ const char *delim = " ";
11
+
12
+ char *cp = (char *)malloc(strlen(input) + 1);
13
+ strcpy(cp, input);
14
+ configs[0] = "";
15
+ for(len = 0; len < MAX_LEN; len++){
16
+ if((configs[len + 1] = std::strtok(cp, delim)) == NULL )
17
+ break;
18
+ cp = NULL;
19
+ }
20
+ return len + 1;
21
+ }
22
+
23
+ Mykytea::Mykytea(char* str)
24
+ {
25
+ const char* configs[MAX_LEN + 1];
26
+ int len = split_argv(str, configs);
27
+
28
+ config = new KyteaConfig;
29
+ config->setDebug(0);
30
+ config->setOnTraining(false);
31
+ config->parseRunCommandLine(len, configs);
32
+
33
+ kytea = new Kytea(config);
34
+ kytea->readModel(config->getModelFile().c_str());
35
+ util = kytea->getStringUtil();
36
+ }
37
+
38
+ Mykytea::~Mykytea()
39
+ {
40
+ if(kytea != NULL) delete kytea;
41
+ }
42
+
43
+ vector<string>* Mykytea::getWS(string str){
44
+ vector<string>* vec = new vector<string>;
45
+ KyteaString surface_string = util->mapString(str);
46
+ KyteaSentence sentence(surface_string, util->normalize(surface_string));
47
+ kytea->calculateWS(sentence);
48
+
49
+ const KyteaSentence::Words & words = sentence.words;
50
+
51
+ for(int i = 0; i < (int)words.size(); i++) {
52
+ (*vec).push_back(util->showString(words[i].surface));
53
+ }
54
+ return vec;
55
+ }
56
+
57
+ vector<Tags>* Mykytea::getTags(string str){
58
+ vector<Tags>* ret_words = new vector<Tags>;
59
+
60
+ KyteaString surface_string = util->mapString(str);
61
+ KyteaSentence sentence(surface_string, util->normalize(surface_string));
62
+ kytea->calculateWS(sentence);
63
+
64
+ for(int i = 0; i < config->getNumTags(); i++)
65
+ kytea->calculateTags(sentence,i);
66
+
67
+ const KyteaSentence::Words & words = sentence.words;
68
+
69
+ for(int i = 0; i < (int)words.size(); i++) {
70
+ tags vec_tag;
71
+ for(int j = 0; j < (int)words[i].tags.size(); j++) {
72
+ vector< pair<string, double> > vec_tmp;
73
+ for(int k = 0; k < 1; k++) {
74
+ vec_tmp.push_back( make_pair(util->showString(words[i].tags[j][k].first), words[i].tags[j][k].second) );
75
+ }
76
+ vec_tag.push_back( vec_tmp );
77
+ }
78
+ struct Tags t = { util->showString(words[i].surface), vec_tag };
79
+ (*ret_words).push_back( t );
80
+ }
81
+ return ret_words;
82
+ }
83
+
84
+ vector<Tags>* Mykytea::getAllTags(string str){
85
+ vector<Tags>* ret_words = new vector<Tags>;
86
+
87
+ KyteaString surface_string = util->mapString(str);
88
+ KyteaSentence sentence(surface_string, util->normalize(surface_string));
89
+ kytea->calculateWS(sentence);
90
+
91
+ for(int i = 0; i < config->getNumTags(); i++)
92
+ kytea->calculateTags(sentence,i);
93
+
94
+ const KyteaSentence::Words & words = sentence.words;
95
+
96
+ for(int i = 0; i < (int)words.size(); i++) {
97
+ tags vec_tag;
98
+ for(int j = 0; j < (int)words[i].tags.size(); j++) {
99
+ vector< pair<string, double> > vec_tmp;
100
+ for(int k = 0; k < (int)words[i].tags[j].size(); k++) {
101
+ vec_tmp.push_back( make_pair(util->showString(words[i].tags[j][k].first), words[i].tags[j][k].second) );
102
+ }
103
+ vec_tag.push_back( vec_tmp );
104
+ }
105
+ struct Tags t = { util->showString(words[i].surface), vec_tag };
106
+ (*ret_words).push_back( t );
107
+ }
108
+ return ret_words;
109
+ }
110
+
111
+ string Mykytea::getTagsToString(string str)
112
+ {
113
+ KyteaString surface_string = util->mapString(str);
114
+ KyteaSentence sentence(surface_string, util->normalize(surface_string));
115
+ kytea->calculateWS(sentence);
116
+
117
+ for(int i = 0; i < config->getNumTags(); i++)
118
+ kytea->calculateTags(sentence,i);
119
+
120
+ const KyteaSentence::Words & words = sentence.words;
121
+
122
+ string ret_str;
123
+ for(int i = 0; i < (int)words.size(); i++) {
124
+ ret_str += util->showString(words[i].surface);
125
+ for(int j = 0; j < (int)words[i].tags.size(); j++) {
126
+ for(int k = 0; k < 1; k++) {
127
+ ret_str += "/";
128
+ ret_str += util->showString(words[i].tags[j][k].first);
129
+ }
130
+ }
131
+ ret_str += " ";
132
+ }
133
+ return ret_str;
134
+ }
data/ext/mykytea.hpp ADDED
@@ -0,0 +1,52 @@
1
+ // mykytea.hpp
2
+ #ifndef MYKYTEA_HPP
3
+ #define MYKYTEA_HPP
4
+
5
+ #include <vector>
6
+ #include <string>
7
+ #include <map>
8
+ #include <cstdlib>
9
+
10
+ #include <kytea/kytea.h>
11
+ #include <kytea/kytea-struct.h>
12
+ #include <kytea/string-util.h>
13
+
14
+ using namespace std;
15
+ using namespace kytea;
16
+
17
+ typedef vector< vector< pair<string, double> > > tags;
18
+
19
+ struct Tags{
20
+ string surface;
21
+ tags tag;
22
+ };
23
+
24
+ class Mykytea
25
+ {
26
+ public:
27
+
28
+ Mykytea(char* str);
29
+ ~Mykytea();
30
+
31
+ //単語境界を取得する
32
+
33
+ vector<string>* getWS(string str);
34
+
35
+ //尤もらしいタグを取得する
36
+
37
+ vector<Tags>* getTags(string str);
38
+
39
+ //すべてのタグを取得する
40
+
41
+ vector<Tags>* getAllTags(string str);
42
+
43
+ //タグを文字列で取得する
44
+
45
+ string getTagsToString(string str);
46
+
47
+ private:
48
+ Kytea* kytea;
49
+ StringUtil* util;
50
+ KyteaConfig* config;
51
+ };
52
+ #endif
data/ext/mykytea.i ADDED
@@ -0,0 +1,37 @@
1
+ %module Mykytea
2
+ %include "stl.i"
3
+ %include "exception.i"
4
+
5
+
6
+ %{
7
+ #include "mykytea.hpp"
8
+ %}
9
+
10
+
11
+ namespace std {
12
+ %template(StringVector) vector<string>;
13
+ %template(Pairsd) pair<string, double>;
14
+ %template(PairVector) vector< pair<string, double> >;
15
+ %template(PairVectorVector) vector< vector< pair<string, double> > >;
16
+ %template(TagsVector) vector<Tags>;
17
+
18
+ }
19
+
20
+ %newobject getWS;
21
+ %newobject getTags;
22
+ %newobject getAllTags;
23
+
24
+ %exception{
25
+ try{
26
+ $action
27
+ } catch (const std::exception &e){
28
+ SWIG_exception(SWIG_RuntimeError, e.what() );
29
+ } catch (...) {
30
+ SWIG_exception(SWIG_UnknownError, "Unknown exception");
31
+ }
32
+ }
33
+
34
+ %include kytea/kytea.h
35
+ %include kytea/kytea-struct.h
36
+ %include mykytea.hpp
37
+