bayon 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +9 -0
- data/ext/bayonext.cpp +37 -6
- data/lib/bayon.rb +15 -1
- metadata +2 -2
data/README
CHANGED
|
@@ -31,6 +31,15 @@ gem install bayon
|
|
|
31
31
|
result.each do |labels|
|
|
32
32
|
puts labels.join(', ')
|
|
33
33
|
end
|
|
34
|
+
|
|
35
|
+
docs.output_similairty_point = true
|
|
36
|
+
result = docs.do_clustering
|
|
37
|
+
|
|
38
|
+
result.each do |label_points|
|
|
39
|
+
puts label_points.map {|label, point|
|
|
40
|
+
"#{label}(#{point})"
|
|
41
|
+
}.join(', ')
|
|
42
|
+
end
|
|
34
43
|
|
|
35
44
|
== Project Page
|
|
36
45
|
|
data/ext/bayonext.cpp
CHANGED
|
@@ -85,6 +85,8 @@ VALUE CBayonDocument::rb_cBayonDocument = Qnil;
|
|
|
85
85
|
class CBayonAnalyzer {
|
|
86
86
|
bayon::Analyzer* analyzer_;
|
|
87
87
|
|
|
88
|
+
bool output_similairty_point_;
|
|
89
|
+
|
|
88
90
|
static void free(CBayonAnalyzer *p) {
|
|
89
91
|
if (p->analyzer_) {
|
|
90
92
|
delete p->analyzer_;
|
|
@@ -107,6 +109,7 @@ class CBayonAnalyzer {
|
|
|
107
109
|
|
|
108
110
|
Data_Get_Struct(self, CBayonAnalyzer, p);
|
|
109
111
|
p->analyzer_ = new bayon::Analyzer;
|
|
112
|
+
p->output_similairty_point_ = false;
|
|
110
113
|
|
|
111
114
|
return Qnil;
|
|
112
115
|
}
|
|
@@ -144,6 +147,24 @@ class CBayonAnalyzer {
|
|
|
144
147
|
return Qnil;
|
|
145
148
|
}
|
|
146
149
|
|
|
150
|
+
static VALUE set_output_similairty_point(VALUE self, VALUE v_output) {
|
|
151
|
+
CBayonAnalyzer *p;
|
|
152
|
+
bool output = false;
|
|
153
|
+
|
|
154
|
+
if (TYPE(v_output) == T_TRUE) {
|
|
155
|
+
output = true;
|
|
156
|
+
} else if (TYPE(v_output) == T_FALSE) {
|
|
157
|
+
output = false;
|
|
158
|
+
} else {
|
|
159
|
+
rb_raise(rb_eTypeError, "wrong argument type %s (expected boolean value)");
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
Data_Get_Struct(self, CBayonAnalyzer, p);
|
|
163
|
+
p->output_similairty_point_ = output;
|
|
164
|
+
|
|
165
|
+
return Qnil;
|
|
166
|
+
}
|
|
167
|
+
|
|
147
168
|
static VALUE do_clustering(VALUE self, VALUE v_method) {
|
|
148
169
|
CBayonAnalyzer *p;
|
|
149
170
|
|
|
@@ -156,18 +177,27 @@ class CBayonAnalyzer {
|
|
|
156
177
|
}
|
|
157
178
|
|
|
158
179
|
static VALUE get_next_result(VALUE self) {
|
|
180
|
+
typedef std::vector< std::pair<bayon::Document *, double> > documents;
|
|
159
181
|
CBayonAnalyzer *p;
|
|
160
182
|
bayon::Cluster cluster;
|
|
161
183
|
|
|
162
184
|
Data_Get_Struct(self, CBayonAnalyzer, p);
|
|
163
185
|
|
|
164
186
|
if(p->analyzer_->get_next_result(cluster)) {
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
187
|
+
std::vector< std::pair<bayon::Document *, double> > pairs;
|
|
188
|
+
cluster.sorted_documents(pairs);
|
|
189
|
+
VALUE docids = rb_ary_new2(pairs.size());
|
|
190
|
+
|
|
191
|
+
for(documents::const_iterator i = pairs.begin(); i != pairs.end(); i++) {
|
|
192
|
+
bayon::Document* doc = i->first;
|
|
193
|
+
double point = i->second;
|
|
194
|
+
|
|
195
|
+
if (p->output_similairty_point_) {
|
|
196
|
+
VALUE docid_points = rb_ary_new3(2, LONG2NUM(doc->id()), rb_float_new(point));
|
|
197
|
+
rb_ary_push(docids, docid_points);
|
|
198
|
+
} else {
|
|
199
|
+
rb_ary_push(docids, LONG2NUM(doc->id()));
|
|
200
|
+
}
|
|
171
201
|
}
|
|
172
202
|
|
|
173
203
|
return docids;
|
|
@@ -185,6 +215,7 @@ public:
|
|
|
185
215
|
rb_define_method(rb_cBayonAnalyzer, "add_document", __F(&add_document), 1);
|
|
186
216
|
rb_define_method(rb_cBayonAnalyzer, "set_cluster_size_limit", __F(&set_cluster_size_limit), 1);
|
|
187
217
|
rb_define_method(rb_cBayonAnalyzer, "set_eval_limit", __F(&set_eval_limit), 1);
|
|
218
|
+
rb_define_method(rb_cBayonAnalyzer, "set_output_similairty_point", __F(&set_output_similairty_point), 1);
|
|
188
219
|
rb_define_method(rb_cBayonAnalyzer, "do_clustering", __F(&do_clustering), 1);
|
|
189
220
|
rb_define_method(rb_cBayonAnalyzer, "get_next_result", __F(&get_next_result), 0);
|
|
190
221
|
|
data/lib/bayon.rb
CHANGED
|
@@ -6,6 +6,7 @@ module Bayon
|
|
|
6
6
|
@documents = []
|
|
7
7
|
@cluster_size_limit = nil
|
|
8
8
|
@eval_limit = nil
|
|
9
|
+
@output_similairty_point = nil
|
|
9
10
|
end
|
|
10
11
|
|
|
11
12
|
def cluster_size_limit=(limit)
|
|
@@ -24,6 +25,14 @@ module Bayon
|
|
|
24
25
|
@eval_limit = limit
|
|
25
26
|
end
|
|
26
27
|
|
|
28
|
+
def output_similairty_point=(output)
|
|
29
|
+
unless output.instance_of?(TrueClass) or output.instance_of?(FalseClass)
|
|
30
|
+
raise TypeError, "wrong argument type #{limit.class} (expected boolean value)"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
@output_similairty_point = output
|
|
34
|
+
end
|
|
35
|
+
|
|
27
36
|
def add_document(label, features)
|
|
28
37
|
unless features.kind_of?(Hash)
|
|
29
38
|
raise TypeError, "wrong argument type #{limit.class} (expected Hash)"
|
|
@@ -40,6 +49,7 @@ module Bayon
|
|
|
40
49
|
analyzer = Analyzer.new
|
|
41
50
|
analyzer.set_cluster_size_limit(@cluster_size_limit) if @cluster_size_limit
|
|
42
51
|
analyzer.set_eval_limit(@eval_limit) if @eval_limit
|
|
52
|
+
analyzer.set_output_similairty_point(@output_similairty_point) if @output_similairty_point
|
|
43
53
|
|
|
44
54
|
feature_set = []
|
|
45
55
|
|
|
@@ -59,7 +69,11 @@ module Bayon
|
|
|
59
69
|
result = []
|
|
60
70
|
|
|
61
71
|
while (cluster = analyzer.get_next_result)
|
|
62
|
-
|
|
72
|
+
if @output_similairty_point
|
|
73
|
+
result << cluster.map {|doc_id, point| [@documents[doc_id][0], point] }
|
|
74
|
+
else
|
|
75
|
+
result << cluster.map {|doc_id| @documents[doc_id][0] }
|
|
76
|
+
end
|
|
63
77
|
end
|
|
64
78
|
|
|
65
79
|
return result
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bayon
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- winebarrel
|
|
@@ -9,7 +9,7 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date: 2009-06-
|
|
12
|
+
date: 2009-06-24 00:00:00 +09:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies: []
|
|
15
15
|
|