kytea 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.rspec +2 -0
- data/.travis.yml +16 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +61 -0
- data/Rakefile +11 -0
- data/ext/extconf.rb +3 -0
- data/ext/mykytea.cpp +134 -0
- data/ext/mykytea.hpp +52 -0
- data/ext/mykytea.i +37 -0
- data/ext/mykytea_wrap.cxx +19160 -0
- data/kytea.gemspec +26 -0
- data/lib/kytea/version.rb +3 -0
- data/lib/kytea.rb +36 -0
- metadata +103 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 950e979e65cd669a8f1bdd21d57d77650866a569
|
4
|
+
data.tar.gz: b45f60d9924e12d146bbbf1b01cafb01ce3b1069
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 143e6b0eafc9785203dc610e3d68feee84b0314e3be81619966e400c1fdb4764169bdf24a318f9eae1d44d27b76d53bff1edfef4c3052ea1623b7af7587a6e49
|
7
|
+
data.tar.gz: dadb7e2235d8561b6675d41122e9f0243ff893d1de1baf1dfa55495f907b3544da1e4edd83db60a285ff519f7a7c54c03ba6f32b4e11f801bfff09b05bb5fc22
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 2.2.2
|
4
|
+
|
5
|
+
before_install:
|
6
|
+
- sudo apt-get install swig
|
7
|
+
- wget http://www.phontron.com/kytea/download/kytea-0.4.7.tar.gz
|
8
|
+
- tar zxf kytea-0.4.7.tar.gz
|
9
|
+
- pushd kytea-0.4.7 && ./configure && make && sudo make install && popd
|
10
|
+
- rm -r kytea-0.4.7
|
11
|
+
- sudo ldconfig
|
12
|
+
|
13
|
+
install:
|
14
|
+
- pushd ext && swig -Wall -c++ -ruby -I/usr/local/include mykytea.i && ruby extconf.rb && make && popd
|
15
|
+
- bundle install
|
16
|
+
- bundle exec rake install
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Michiaki Ariga
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
KyTea wrapper for ruby
|
2
|
+
==========================
|
3
|
+
|
4
|
+
Mykytea-ruby is a ruby wrapper module for KyTea, a general text analysis toolkit.
|
5
|
+
KyTea is developed by KyTea Development Team
|
6
|
+
|
7
|
+
Detailed information of KyTea can be found at
|
8
|
+
http://www.phontron.com/kytea
|
9
|
+
|
10
|
+
Install Dependencies
|
11
|
+
--------------------
|
12
|
+
|
13
|
+
You need to install KyTea before build.
|
14
|
+
|
15
|
+
To build Mykytea-ruby, run
|
16
|
+
--------------------
|
17
|
+
|
18
|
+
% ruby ext/extconf.rb
|
19
|
+
% make
|
20
|
+
|
21
|
+
(If you want to install, run)
|
22
|
+
|
23
|
+
% sudo make install
|
24
|
+
|
25
|
+
(If you fail to make, please try to install SWIG and run)
|
26
|
+
|
27
|
+
% swig -c++ -ruby -I/usr/local/include ext/mykytea.i
|
28
|
+
|
29
|
+
How to use?
|
30
|
+
--------------------
|
31
|
+
|
32
|
+
```
|
33
|
+
require 'kytea'
|
34
|
+
|
35
|
+
# You can write original kytea option
|
36
|
+
opt = "-model /usr/local/share/kytea/model.bin"
|
37
|
+
kytea = KyTea.new(opt)
|
38
|
+
|
39
|
+
str = "今日はいい天気です。"
|
40
|
+
|
41
|
+
kytea.segment(str)
|
42
|
+
#=> ["今日", "は", "い", "い", "天気", "で", "す", "。"]
|
43
|
+
|
44
|
+
kytea.tag_info_of(str)
|
45
|
+
#=> "今日/名詞/きょう は/助詞/は い/形容詞/い い/語尾/い 天気/名詞/てんき で/助動詞/で す/語尾/す 。/補助記号/。 "
|
46
|
+
|
47
|
+
kytea.tags_of(str)
|
48
|
+
#=> [{:surface=>"今日", :tags=>[[{:tag=>"名詞", :val=>3.610404674503611}], [{:tag=>"きょう", :val=>1.0726515803715995}]]},
|
49
|
+
# {:surface=>"は", :tags=>[[{:tag=>"助詞", :val=>3.5500698037485963}], [{:tag=>"は", :val=>100.0}]]},
|
50
|
+
# {:surface=>"い", :tags=>[[{:tag=>"形容詞", :val=>2.5966088884369523}], [{:tag=>"い", :val=>100.0}]]},
|
51
|
+
# {:surface=>"い", :tags=>[[{:tag=>"語尾", :val=>2.7064013574728385}], [{:tag=>"い", :val=>100.0}]]},
|
52
|
+
# {:surface=>"天気", :tags=>[[{:tag=>"名詞", :val=>4.220721634732509}], [{:tag=>"てんき", :val=>100.0}]]},
|
53
|
+
# {:surface=>"で", :tags=>[[{:tag=>"助動詞", :val=>2.9093304720685786}], [{:tag=>"で", :val=>0.99994530321086}]]},
|
54
|
+
# {:surface=>"す", :tags=>[[{:tag=>"語尾", :val=>2.5160490891753264}], [{:tag=>"す", :val=>0.9998735552127426}]]},
|
55
|
+
# {:surface=>"。", :tags=>[[{:tag=>"補助記号", :val=>3.070959942739055}], [{:tag=>"。", :val=>100.0}]]}]
|
56
|
+
```
|
57
|
+
|
58
|
+
|
59
|
+
License
|
60
|
+
--------------------
|
61
|
+
MIT License
|
data/Rakefile
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rspec/core/rake_task"
|
3
|
+
require 'rake/clean'
|
4
|
+
|
5
|
+
CLEAN.include('ext/**/*{.o,.log,.so,.bundle}')
|
6
|
+
CLEAN.include('ext/**/Makefile')
|
7
|
+
CLOBBER.include('lib/*{.so,.bundle}')
|
8
|
+
|
9
|
+
RSpec::Core::RakeTask.new(:spec)
|
10
|
+
|
11
|
+
task :default => :spec
|
data/ext/extconf.rb
ADDED
data/ext/mykytea.cpp
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
// mykytea.cpp
|
2
|
+
#include <iostream>
|
3
|
+
#include <cstring>
|
4
|
+
#include "mykytea.hpp"
|
5
|
+
|
6
|
+
const int MAX_LEN = 256;
|
7
|
+
|
8
|
+
int split_argv(char* input, const char* configs[]){
|
9
|
+
int len;
|
10
|
+
const char *delim = " ";
|
11
|
+
|
12
|
+
char *cp = (char *)malloc(strlen(input) + 1);
|
13
|
+
strcpy(cp, input);
|
14
|
+
configs[0] = "";
|
15
|
+
for(len = 0; len < MAX_LEN; len++){
|
16
|
+
if((configs[len + 1] = std::strtok(cp, delim)) == NULL )
|
17
|
+
break;
|
18
|
+
cp = NULL;
|
19
|
+
}
|
20
|
+
return len + 1;
|
21
|
+
}
|
22
|
+
|
23
|
+
Mykytea::Mykytea(char* str)
|
24
|
+
{
|
25
|
+
const char* configs[MAX_LEN + 1];
|
26
|
+
int len = split_argv(str, configs);
|
27
|
+
|
28
|
+
config = new KyteaConfig;
|
29
|
+
config->setDebug(0);
|
30
|
+
config->setOnTraining(false);
|
31
|
+
config->parseRunCommandLine(len, configs);
|
32
|
+
|
33
|
+
kytea = new Kytea(config);
|
34
|
+
kytea->readModel(config->getModelFile().c_str());
|
35
|
+
util = kytea->getStringUtil();
|
36
|
+
}
|
37
|
+
|
38
|
+
Mykytea::~Mykytea()
|
39
|
+
{
|
40
|
+
if(kytea != NULL) delete kytea;
|
41
|
+
}
|
42
|
+
|
43
|
+
vector<string>* Mykytea::getWS(string str){
|
44
|
+
vector<string>* vec = new vector<string>;
|
45
|
+
KyteaString surface_string = util->mapString(str);
|
46
|
+
KyteaSentence sentence(surface_string, util->normalize(surface_string));
|
47
|
+
kytea->calculateWS(sentence);
|
48
|
+
|
49
|
+
const KyteaSentence::Words & words = sentence.words;
|
50
|
+
|
51
|
+
for(int i = 0; i < (int)words.size(); i++) {
|
52
|
+
(*vec).push_back(util->showString(words[i].surface));
|
53
|
+
}
|
54
|
+
return vec;
|
55
|
+
}
|
56
|
+
|
57
|
+
vector<Tags>* Mykytea::getTags(string str){
|
58
|
+
vector<Tags>* ret_words = new vector<Tags>;
|
59
|
+
|
60
|
+
KyteaString surface_string = util->mapString(str);
|
61
|
+
KyteaSentence sentence(surface_string, util->normalize(surface_string));
|
62
|
+
kytea->calculateWS(sentence);
|
63
|
+
|
64
|
+
for(int i = 0; i < config->getNumTags(); i++)
|
65
|
+
kytea->calculateTags(sentence,i);
|
66
|
+
|
67
|
+
const KyteaSentence::Words & words = sentence.words;
|
68
|
+
|
69
|
+
for(int i = 0; i < (int)words.size(); i++) {
|
70
|
+
tags vec_tag;
|
71
|
+
for(int j = 0; j < (int)words[i].tags.size(); j++) {
|
72
|
+
vector< pair<string, double> > vec_tmp;
|
73
|
+
for(int k = 0; k < 1; k++) {
|
74
|
+
vec_tmp.push_back( make_pair(util->showString(words[i].tags[j][k].first), words[i].tags[j][k].second) );
|
75
|
+
}
|
76
|
+
vec_tag.push_back( vec_tmp );
|
77
|
+
}
|
78
|
+
struct Tags t = { util->showString(words[i].surface), vec_tag };
|
79
|
+
(*ret_words).push_back( t );
|
80
|
+
}
|
81
|
+
return ret_words;
|
82
|
+
}
|
83
|
+
|
84
|
+
vector<Tags>* Mykytea::getAllTags(string str){
|
85
|
+
vector<Tags>* ret_words = new vector<Tags>;
|
86
|
+
|
87
|
+
KyteaString surface_string = util->mapString(str);
|
88
|
+
KyteaSentence sentence(surface_string, util->normalize(surface_string));
|
89
|
+
kytea->calculateWS(sentence);
|
90
|
+
|
91
|
+
for(int i = 0; i < config->getNumTags(); i++)
|
92
|
+
kytea->calculateTags(sentence,i);
|
93
|
+
|
94
|
+
const KyteaSentence::Words & words = sentence.words;
|
95
|
+
|
96
|
+
for(int i = 0; i < (int)words.size(); i++) {
|
97
|
+
tags vec_tag;
|
98
|
+
for(int j = 0; j < (int)words[i].tags.size(); j++) {
|
99
|
+
vector< pair<string, double> > vec_tmp;
|
100
|
+
for(int k = 0; k < (int)words[i].tags[j].size(); k++) {
|
101
|
+
vec_tmp.push_back( make_pair(util->showString(words[i].tags[j][k].first), words[i].tags[j][k].second) );
|
102
|
+
}
|
103
|
+
vec_tag.push_back( vec_tmp );
|
104
|
+
}
|
105
|
+
struct Tags t = { util->showString(words[i].surface), vec_tag };
|
106
|
+
(*ret_words).push_back( t );
|
107
|
+
}
|
108
|
+
return ret_words;
|
109
|
+
}
|
110
|
+
|
111
|
+
string Mykytea::getTagsToString(string str)
|
112
|
+
{
|
113
|
+
KyteaString surface_string = util->mapString(str);
|
114
|
+
KyteaSentence sentence(surface_string, util->normalize(surface_string));
|
115
|
+
kytea->calculateWS(sentence);
|
116
|
+
|
117
|
+
for(int i = 0; i < config->getNumTags(); i++)
|
118
|
+
kytea->calculateTags(sentence,i);
|
119
|
+
|
120
|
+
const KyteaSentence::Words & words = sentence.words;
|
121
|
+
|
122
|
+
string ret_str;
|
123
|
+
for(int i = 0; i < (int)words.size(); i++) {
|
124
|
+
ret_str += util->showString(words[i].surface);
|
125
|
+
for(int j = 0; j < (int)words[i].tags.size(); j++) {
|
126
|
+
for(int k = 0; k < 1; k++) {
|
127
|
+
ret_str += "/";
|
128
|
+
ret_str += util->showString(words[i].tags[j][k].first);
|
129
|
+
}
|
130
|
+
}
|
131
|
+
ret_str += " ";
|
132
|
+
}
|
133
|
+
return ret_str;
|
134
|
+
}
|
data/ext/mykytea.hpp
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
// mykytea.hpp
|
2
|
+
#ifndef MYKYTEA_HPP
|
3
|
+
#define MYKYTEA_HPP
|
4
|
+
|
5
|
+
#include <vector>
|
6
|
+
#include <string>
|
7
|
+
#include <map>
|
8
|
+
#include <cstdlib>
|
9
|
+
|
10
|
+
#include <kytea/kytea.h>
|
11
|
+
#include <kytea/kytea-struct.h>
|
12
|
+
#include <kytea/string-util.h>
|
13
|
+
|
14
|
+
using namespace std;
|
15
|
+
using namespace kytea;
|
16
|
+
|
17
|
+
typedef vector< vector< pair<string, double> > > tags;
|
18
|
+
|
19
|
+
struct Tags{
|
20
|
+
string surface;
|
21
|
+
tags tag;
|
22
|
+
};
|
23
|
+
|
24
|
+
class Mykytea
|
25
|
+
{
|
26
|
+
public:
|
27
|
+
|
28
|
+
Mykytea(char* str);
|
29
|
+
~Mykytea();
|
30
|
+
|
31
|
+
//単語境界を取得する
|
32
|
+
|
33
|
+
vector<string>* getWS(string str);
|
34
|
+
|
35
|
+
//尤もらしいタグを取得する
|
36
|
+
|
37
|
+
vector<Tags>* getTags(string str);
|
38
|
+
|
39
|
+
//すべてのタグを取得する
|
40
|
+
|
41
|
+
vector<Tags>* getAllTags(string str);
|
42
|
+
|
43
|
+
//タグを文字列で取得する
|
44
|
+
|
45
|
+
string getTagsToString(string str);
|
46
|
+
|
47
|
+
private:
|
48
|
+
Kytea* kytea;
|
49
|
+
StringUtil* util;
|
50
|
+
KyteaConfig* config;
|
51
|
+
};
|
52
|
+
#endif
|
data/ext/mykytea.i
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
%module Mykytea
|
2
|
+
%include "stl.i"
|
3
|
+
%include "exception.i"
|
4
|
+
|
5
|
+
|
6
|
+
%{
|
7
|
+
#include "mykytea.hpp"
|
8
|
+
%}
|
9
|
+
|
10
|
+
|
11
|
+
namespace std {
|
12
|
+
%template(StringVector) vector<string>;
|
13
|
+
%template(Pairsd) pair<string, double>;
|
14
|
+
%template(PairVector) vector< pair<string, double> >;
|
15
|
+
%template(PairVectorVector) vector< vector< pair<string, double> > >;
|
16
|
+
%template(TagsVector) vector<Tags>;
|
17
|
+
|
18
|
+
}
|
19
|
+
|
20
|
+
%newobject getWS;
|
21
|
+
%newobject getTags;
|
22
|
+
%newobject getAllTags;
|
23
|
+
|
24
|
+
%exception{
|
25
|
+
try{
|
26
|
+
$action
|
27
|
+
} catch (const std::exception &e){
|
28
|
+
SWIG_exception(SWIG_RuntimeError, e.what() );
|
29
|
+
} catch (...) {
|
30
|
+
SWIG_exception(SWIG_UnknownError, "Unknown exception");
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
%include kytea/kytea.h
|
35
|
+
%include kytea/kytea-struct.h
|
36
|
+
%include mykytea.hpp
|
37
|
+
|