bayon 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +9 -0
- data/ext/bayonext.cpp +37 -6
- data/lib/bayon.rb +15 -1
- metadata +2 -2
data/README
CHANGED
@@ -31,6 +31,15 @@ gem install bayon
|
|
31
31
|
result.each do |labels|
|
32
32
|
puts labels.join(', ')
|
33
33
|
end
|
34
|
+
|
35
|
+
docs.output_similairty_point = true
|
36
|
+
result = docs.do_clustering
|
37
|
+
|
38
|
+
result.each do |label_points|
|
39
|
+
puts label_points.map {|label, point|
|
40
|
+
"#{label}(#{point})"
|
41
|
+
}.join(', ')
|
42
|
+
end
|
34
43
|
|
35
44
|
== Project Page
|
36
45
|
|
data/ext/bayonext.cpp
CHANGED
@@ -85,6 +85,8 @@ VALUE CBayonDocument::rb_cBayonDocument = Qnil;
|
|
85
85
|
class CBayonAnalyzer {
|
86
86
|
bayon::Analyzer* analyzer_;
|
87
87
|
|
88
|
+
bool output_similairty_point_;
|
89
|
+
|
88
90
|
static void free(CBayonAnalyzer *p) {
|
89
91
|
if (p->analyzer_) {
|
90
92
|
delete p->analyzer_;
|
@@ -107,6 +109,7 @@ class CBayonAnalyzer {
|
|
107
109
|
|
108
110
|
Data_Get_Struct(self, CBayonAnalyzer, p);
|
109
111
|
p->analyzer_ = new bayon::Analyzer;
|
112
|
+
p->output_similairty_point_ = false;
|
110
113
|
|
111
114
|
return Qnil;
|
112
115
|
}
|
@@ -144,6 +147,24 @@ class CBayonAnalyzer {
|
|
144
147
|
return Qnil;
|
145
148
|
}
|
146
149
|
|
150
|
+
static VALUE set_output_similairty_point(VALUE self, VALUE v_output) {
|
151
|
+
CBayonAnalyzer *p;
|
152
|
+
bool output = false;
|
153
|
+
|
154
|
+
if (TYPE(v_output) == T_TRUE) {
|
155
|
+
output = true;
|
156
|
+
} else if (TYPE(v_output) == T_FALSE) {
|
157
|
+
output = false;
|
158
|
+
} else {
|
159
|
+
rb_raise(rb_eTypeError, "wrong argument type %s (expected boolean value)");
|
160
|
+
}
|
161
|
+
|
162
|
+
Data_Get_Struct(self, CBayonAnalyzer, p);
|
163
|
+
p->output_similairty_point_ = output;
|
164
|
+
|
165
|
+
return Qnil;
|
166
|
+
}
|
167
|
+
|
147
168
|
static VALUE do_clustering(VALUE self, VALUE v_method) {
|
148
169
|
CBayonAnalyzer *p;
|
149
170
|
|
@@ -156,18 +177,27 @@ class CBayonAnalyzer {
|
|
156
177
|
}
|
157
178
|
|
158
179
|
static VALUE get_next_result(VALUE self) {
|
180
|
+
typedef std::vector< std::pair<bayon::Document *, double> > documents;
|
159
181
|
CBayonAnalyzer *p;
|
160
182
|
bayon::Cluster cluster;
|
161
183
|
|
162
184
|
Data_Get_Struct(self, CBayonAnalyzer, p);
|
163
185
|
|
164
186
|
if(p->analyzer_->get_next_result(cluster)) {
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
187
|
+
std::vector< std::pair<bayon::Document *, double> > pairs;
|
188
|
+
cluster.sorted_documents(pairs);
|
189
|
+
VALUE docids = rb_ary_new2(pairs.size());
|
190
|
+
|
191
|
+
for(documents::const_iterator i = pairs.begin(); i != pairs.end(); i++) {
|
192
|
+
bayon::Document* doc = i->first;
|
193
|
+
double point = i->second;
|
194
|
+
|
195
|
+
if (p->output_similairty_point_) {
|
196
|
+
VALUE docid_points = rb_ary_new3(2, LONG2NUM(doc->id()), rb_float_new(point));
|
197
|
+
rb_ary_push(docids, docid_points);
|
198
|
+
} else {
|
199
|
+
rb_ary_push(docids, LONG2NUM(doc->id()));
|
200
|
+
}
|
171
201
|
}
|
172
202
|
|
173
203
|
return docids;
|
@@ -185,6 +215,7 @@ public:
|
|
185
215
|
rb_define_method(rb_cBayonAnalyzer, "add_document", __F(&add_document), 1);
|
186
216
|
rb_define_method(rb_cBayonAnalyzer, "set_cluster_size_limit", __F(&set_cluster_size_limit), 1);
|
187
217
|
rb_define_method(rb_cBayonAnalyzer, "set_eval_limit", __F(&set_eval_limit), 1);
|
218
|
+
rb_define_method(rb_cBayonAnalyzer, "set_output_similairty_point", __F(&set_output_similairty_point), 1);
|
188
219
|
rb_define_method(rb_cBayonAnalyzer, "do_clustering", __F(&do_clustering), 1);
|
189
220
|
rb_define_method(rb_cBayonAnalyzer, "get_next_result", __F(&get_next_result), 0);
|
190
221
|
|
data/lib/bayon.rb
CHANGED
@@ -6,6 +6,7 @@ module Bayon
|
|
6
6
|
@documents = []
|
7
7
|
@cluster_size_limit = nil
|
8
8
|
@eval_limit = nil
|
9
|
+
@output_similairty_point = nil
|
9
10
|
end
|
10
11
|
|
11
12
|
def cluster_size_limit=(limit)
|
@@ -24,6 +25,14 @@ module Bayon
|
|
24
25
|
@eval_limit = limit
|
25
26
|
end
|
26
27
|
|
28
|
+
def output_similairty_point=(output)
|
29
|
+
unless output.instance_of?(TrueClass) or output.instance_of?(FalseClass)
|
30
|
+
raise TypeError, "wrong argument type #{limit.class} (expected boolean value)"
|
31
|
+
end
|
32
|
+
|
33
|
+
@output_similairty_point = output
|
34
|
+
end
|
35
|
+
|
27
36
|
def add_document(label, features)
|
28
37
|
unless features.kind_of?(Hash)
|
29
38
|
raise TypeError, "wrong argument type #{limit.class} (expected Hash)"
|
@@ -40,6 +49,7 @@ module Bayon
|
|
40
49
|
analyzer = Analyzer.new
|
41
50
|
analyzer.set_cluster_size_limit(@cluster_size_limit) if @cluster_size_limit
|
42
51
|
analyzer.set_eval_limit(@eval_limit) if @eval_limit
|
52
|
+
analyzer.set_output_similairty_point(@output_similairty_point) if @output_similairty_point
|
43
53
|
|
44
54
|
feature_set = []
|
45
55
|
|
@@ -59,7 +69,11 @@ module Bayon
|
|
59
69
|
result = []
|
60
70
|
|
61
71
|
while (cluster = analyzer.get_next_result)
|
62
|
-
|
72
|
+
if @output_similairty_point
|
73
|
+
result << cluster.map {|doc_id, point| [@documents[doc_id][0], point] }
|
74
|
+
else
|
75
|
+
result << cluster.map {|doc_id| @documents[doc_id][0] }
|
76
|
+
end
|
63
77
|
end
|
64
78
|
|
65
79
|
return result
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bayon
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- winebarrel
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-06-
|
12
|
+
date: 2009-06-24 00:00:00 +09:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|