bayon 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/README +9 -0
  2. data/ext/bayonext.cpp +37 -6
  3. data/lib/bayon.rb +15 -1
  4. metadata +2 -2
data/README CHANGED
@@ -31,6 +31,15 @@ gem install bayon
31
31
  result.each do |labels|
32
32
  puts labels.join(', ')
33
33
  end
34
+
35
+ docs.output_similairty_point = true
36
+ result = docs.do_clustering
37
+
38
+ result.each do |label_points|
39
+ puts label_points.map {|label, point|
40
+ "#{label}(#{point})"
41
+ }.join(', ')
42
+ end
34
43
 
35
44
  == Project Page
36
45
 
data/ext/bayonext.cpp CHANGED
@@ -85,6 +85,8 @@ VALUE CBayonDocument::rb_cBayonDocument = Qnil;
85
85
  class CBayonAnalyzer {
86
86
  bayon::Analyzer* analyzer_;
87
87
 
88
+ bool output_similairty_point_;
89
+
88
90
  static void free(CBayonAnalyzer *p) {
89
91
  if (p->analyzer_) {
90
92
  delete p->analyzer_;
@@ -107,6 +109,7 @@ class CBayonAnalyzer {
107
109
 
108
110
  Data_Get_Struct(self, CBayonAnalyzer, p);
109
111
  p->analyzer_ = new bayon::Analyzer;
112
+ p->output_similairty_point_ = false;
110
113
 
111
114
  return Qnil;
112
115
  }
@@ -144,6 +147,24 @@ class CBayonAnalyzer {
144
147
  return Qnil;
145
148
  }
146
149
 
150
+ static VALUE set_output_similairty_point(VALUE self, VALUE v_output) {
151
+ CBayonAnalyzer *p;
152
+ bool output = false;
153
+
154
+ if (TYPE(v_output) == T_TRUE) {
155
+ output = true;
156
+ } else if (TYPE(v_output) == T_FALSE) {
157
+ output = false;
158
+ } else {
159
+ rb_raise(rb_eTypeError, "wrong argument type %s (expected boolean value)");
160
+ }
161
+
162
+ Data_Get_Struct(self, CBayonAnalyzer, p);
163
+ p->output_similairty_point_ = output;
164
+
165
+ return Qnil;
166
+ }
167
+
147
168
  static VALUE do_clustering(VALUE self, VALUE v_method) {
148
169
  CBayonAnalyzer *p;
149
170
 
@@ -156,18 +177,27 @@ class CBayonAnalyzer {
156
177
  }
157
178
 
158
179
  static VALUE get_next_result(VALUE self) {
180
+ typedef std::vector< std::pair<bayon::Document *, double> > documents;
159
181
  CBayonAnalyzer *p;
160
182
  bayon::Cluster cluster;
161
183
 
162
184
  Data_Get_Struct(self, CBayonAnalyzer, p);
163
185
 
164
186
  if(p->analyzer_->get_next_result(cluster)) {
165
- const std::vector<bayon::Document *> documents = cluster.documents();
166
- VALUE docids = rb_ary_new2(documents.size());
167
-
168
- for(std::vector<bayon::Document *>::const_iterator i = documents.begin(); i != documents.end(); i++) {
169
- bayon::Document* doc = *i;
170
- rb_ary_push(docids, LONG2NUM(doc->id()));
187
+ std::vector< std::pair<bayon::Document *, double> > pairs;
188
+ cluster.sorted_documents(pairs);
189
+ VALUE docids = rb_ary_new2(pairs.size());
190
+
191
+ for(documents::const_iterator i = pairs.begin(); i != pairs.end(); i++) {
192
+ bayon::Document* doc = i->first;
193
+ double point = i->second;
194
+
195
+ if (p->output_similairty_point_) {
196
+ VALUE docid_points = rb_ary_new3(2, LONG2NUM(doc->id()), rb_float_new(point));
197
+ rb_ary_push(docids, docid_points);
198
+ } else {
199
+ rb_ary_push(docids, LONG2NUM(doc->id()));
200
+ }
171
201
  }
172
202
 
173
203
  return docids;
@@ -185,6 +215,7 @@ public:
185
215
  rb_define_method(rb_cBayonAnalyzer, "add_document", __F(&add_document), 1);
186
216
  rb_define_method(rb_cBayonAnalyzer, "set_cluster_size_limit", __F(&set_cluster_size_limit), 1);
187
217
  rb_define_method(rb_cBayonAnalyzer, "set_eval_limit", __F(&set_eval_limit), 1);
218
+ rb_define_method(rb_cBayonAnalyzer, "set_output_similairty_point", __F(&set_output_similairty_point), 1);
188
219
  rb_define_method(rb_cBayonAnalyzer, "do_clustering", __F(&do_clustering), 1);
189
220
  rb_define_method(rb_cBayonAnalyzer, "get_next_result", __F(&get_next_result), 0);
190
221
 
data/lib/bayon.rb CHANGED
@@ -6,6 +6,7 @@ module Bayon
6
6
  @documents = []
7
7
  @cluster_size_limit = nil
8
8
  @eval_limit = nil
9
+ @output_similairty_point = nil
9
10
  end
10
11
 
11
12
  def cluster_size_limit=(limit)
@@ -24,6 +25,14 @@ module Bayon
24
25
  @eval_limit = limit
25
26
  end
26
27
 
28
+ def output_similairty_point=(output)
29
+ unless output.instance_of?(TrueClass) or output.instance_of?(FalseClass)
30
+ raise TypeError, "wrong argument type #{limit.class} (expected boolean value)"
31
+ end
32
+
33
+ @output_similairty_point = output
34
+ end
35
+
27
36
  def add_document(label, features)
28
37
  unless features.kind_of?(Hash)
29
38
  raise TypeError, "wrong argument type #{limit.class} (expected Hash)"
@@ -40,6 +49,7 @@ module Bayon
40
49
  analyzer = Analyzer.new
41
50
  analyzer.set_cluster_size_limit(@cluster_size_limit) if @cluster_size_limit
42
51
  analyzer.set_eval_limit(@eval_limit) if @eval_limit
52
+ analyzer.set_output_similairty_point(@output_similairty_point) if @output_similairty_point
43
53
 
44
54
  feature_set = []
45
55
 
@@ -59,7 +69,11 @@ module Bayon
59
69
  result = []
60
70
 
61
71
  while (cluster = analyzer.get_next_result)
62
- result << cluster.map {|doc_id| @documents[doc_id][0] }
72
+ if @output_similairty_point
73
+ result << cluster.map {|doc_id, point| [@documents[doc_id][0], point] }
74
+ else
75
+ result << cluster.map {|doc_id| @documents[doc_id][0] }
76
+ end
63
77
  end
64
78
 
65
79
  return result
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bayon
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - winebarrel
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-14 00:00:00 +09:00
12
+ date: 2009-06-24 00:00:00 +09:00
13
13
  default_executable:
14
14
  dependencies: []
15
15