bayon 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/README +9 -0
  2. data/ext/bayonext.cpp +37 -6
  3. data/lib/bayon.rb +15 -1
  4. metadata +2 -2
data/README CHANGED
@@ -31,6 +31,15 @@ gem install bayon
31
31
  result.each do |labels|
32
32
  puts labels.join(', ')
33
33
  end
34
+
35
+ docs.output_similairty_point = true
36
+ result = docs.do_clustering
37
+
38
+ result.each do |label_points|
39
+ puts label_points.map {|label, point|
40
+ "#{label}(#{point})"
41
+ }.join(', ')
42
+ end
34
43
 
35
44
  == Project Page
36
45
 
data/ext/bayonext.cpp CHANGED
@@ -85,6 +85,8 @@ VALUE CBayonDocument::rb_cBayonDocument = Qnil;
85
85
  class CBayonAnalyzer {
86
86
  bayon::Analyzer* analyzer_;
87
87
 
88
+ bool output_similairty_point_;
89
+
88
90
  static void free(CBayonAnalyzer *p) {
89
91
  if (p->analyzer_) {
90
92
  delete p->analyzer_;
@@ -107,6 +109,7 @@ class CBayonAnalyzer {
107
109
 
108
110
  Data_Get_Struct(self, CBayonAnalyzer, p);
109
111
  p->analyzer_ = new bayon::Analyzer;
112
+ p->output_similairty_point_ = false;
110
113
 
111
114
  return Qnil;
112
115
  }
@@ -144,6 +147,24 @@ class CBayonAnalyzer {
144
147
  return Qnil;
145
148
  }
146
149
 
150
+ static VALUE set_output_similairty_point(VALUE self, VALUE v_output) {
151
+ CBayonAnalyzer *p;
152
+ bool output = false;
153
+
154
+ if (TYPE(v_output) == T_TRUE) {
155
+ output = true;
156
+ } else if (TYPE(v_output) == T_FALSE) {
157
+ output = false;
158
+ } else {
159
+ rb_raise(rb_eTypeError, "wrong argument type %s (expected boolean value)");
160
+ }
161
+
162
+ Data_Get_Struct(self, CBayonAnalyzer, p);
163
+ p->output_similairty_point_ = output;
164
+
165
+ return Qnil;
166
+ }
167
+
147
168
  static VALUE do_clustering(VALUE self, VALUE v_method) {
148
169
  CBayonAnalyzer *p;
149
170
 
@@ -156,18 +177,27 @@ class CBayonAnalyzer {
156
177
  }
157
178
 
158
179
  static VALUE get_next_result(VALUE self) {
180
+ typedef std::vector< std::pair<bayon::Document *, double> > documents;
159
181
  CBayonAnalyzer *p;
160
182
  bayon::Cluster cluster;
161
183
 
162
184
  Data_Get_Struct(self, CBayonAnalyzer, p);
163
185
 
164
186
  if(p->analyzer_->get_next_result(cluster)) {
165
- const std::vector<bayon::Document *> documents = cluster.documents();
166
- VALUE docids = rb_ary_new2(documents.size());
167
-
168
- for(std::vector<bayon::Document *>::const_iterator i = documents.begin(); i != documents.end(); i++) {
169
- bayon::Document* doc = *i;
170
- rb_ary_push(docids, LONG2NUM(doc->id()));
187
+ std::vector< std::pair<bayon::Document *, double> > pairs;
188
+ cluster.sorted_documents(pairs);
189
+ VALUE docids = rb_ary_new2(pairs.size());
190
+
191
+ for(documents::const_iterator i = pairs.begin(); i != pairs.end(); i++) {
192
+ bayon::Document* doc = i->first;
193
+ double point = i->second;
194
+
195
+ if (p->output_similairty_point_) {
196
+ VALUE docid_points = rb_ary_new3(2, LONG2NUM(doc->id()), rb_float_new(point));
197
+ rb_ary_push(docids, docid_points);
198
+ } else {
199
+ rb_ary_push(docids, LONG2NUM(doc->id()));
200
+ }
171
201
  }
172
202
 
173
203
  return docids;
@@ -185,6 +215,7 @@ public:
185
215
  rb_define_method(rb_cBayonAnalyzer, "add_document", __F(&add_document), 1);
186
216
  rb_define_method(rb_cBayonAnalyzer, "set_cluster_size_limit", __F(&set_cluster_size_limit), 1);
187
217
  rb_define_method(rb_cBayonAnalyzer, "set_eval_limit", __F(&set_eval_limit), 1);
218
+ rb_define_method(rb_cBayonAnalyzer, "set_output_similairty_point", __F(&set_output_similairty_point), 1);
188
219
  rb_define_method(rb_cBayonAnalyzer, "do_clustering", __F(&do_clustering), 1);
189
220
  rb_define_method(rb_cBayonAnalyzer, "get_next_result", __F(&get_next_result), 0);
190
221
 
data/lib/bayon.rb CHANGED
@@ -6,6 +6,7 @@ module Bayon
6
6
  @documents = []
7
7
  @cluster_size_limit = nil
8
8
  @eval_limit = nil
9
+ @output_similairty_point = nil
9
10
  end
10
11
 
11
12
  def cluster_size_limit=(limit)
@@ -24,6 +25,14 @@ module Bayon
24
25
  @eval_limit = limit
25
26
  end
26
27
 
28
+ def output_similairty_point=(output)
29
+ unless output.instance_of?(TrueClass) or output.instance_of?(FalseClass)
30
+ raise TypeError, "wrong argument type #{limit.class} (expected boolean value)"
31
+ end
32
+
33
+ @output_similairty_point = output
34
+ end
35
+
27
36
  def add_document(label, features)
28
37
  unless features.kind_of?(Hash)
29
38
  raise TypeError, "wrong argument type #{limit.class} (expected Hash)"
@@ -40,6 +49,7 @@ module Bayon
40
49
  analyzer = Analyzer.new
41
50
  analyzer.set_cluster_size_limit(@cluster_size_limit) if @cluster_size_limit
42
51
  analyzer.set_eval_limit(@eval_limit) if @eval_limit
52
+ analyzer.set_output_similairty_point(@output_similairty_point) if @output_similairty_point
43
53
 
44
54
  feature_set = []
45
55
 
@@ -59,7 +69,11 @@ module Bayon
59
69
  result = []
60
70
 
61
71
  while (cluster = analyzer.get_next_result)
62
- result << cluster.map {|doc_id| @documents[doc_id][0] }
72
+ if @output_similairty_point
73
+ result << cluster.map {|doc_id, point| [@documents[doc_id][0], point] }
74
+ else
75
+ result << cluster.map {|doc_id| @documents[doc_id][0] }
76
+ end
63
77
  end
64
78
 
65
79
  return result
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bayon
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - winebarrel
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-14 00:00:00 +09:00
12
+ date: 2009-06-24 00:00:00 +09:00
13
13
  default_executable:
14
14
  dependencies: []
15
15