sooth 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 27347b90bbbfd21db684190405f633f9932cff4a
4
- data.tar.gz: 122ebc71eefabd6d1d95ad8ef15712033bcb9b9f
3
+ metadata.gz: 1bd89c2dd37bdeec58eb72ecca31a1075dd28350
4
+ data.tar.gz: 8bc31931e77993880f3ff5f516a362933c061f8d
5
5
  SHA512:
6
- metadata.gz: 4462abb25b5f6c0a719be89b2e20cfe477f990757a54ae69f427cc3280938178f89cdfab01b2e9e605d414b83d17702e6a764c524669fec0b7204a85efad7db0
7
- data.tar.gz: fccf92fb019587081ed45645cd2effe4028e81bdd69ee3e38b1528b5e1eca20d1f2d4dc455f2c17b915345c4e3fbaf7f340b627e6fc2f2fe014a58a2c42b7cc5
6
+ metadata.gz: 3c79c5b70b6ee7df2e90b2b1fcaabaeae98da17dd8bc0c0338292b5b5ca00bcdd25b9640e1ed6599be982fce7704e1d79803adaded651dd417cb03e9227fc521
7
+ data.tar.gz: 9cb14586481824dc2d8b6662d0e5d31c27c5b00c1a9932fa1c4ccc4cd5ddf5753b65d403a27c604a518fa11c5af4206be0ca7fcfbf5ad823784ead1f97d1496c
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.0
1
+ 0.5.0
@@ -35,6 +35,12 @@ void method_sooth_native_deallocate(void * predictor);
35
35
  * def select(bigram, limit)
36
36
  * # (native code)
37
37
  * end
38
+ * def uncertainty(bigram)
39
+ * # (native code)
40
+ * end
41
+ * def surprise(bigram, symbol)
42
+ * # (native code)
43
+ * end
38
44
  * end
39
45
  * end
40
46
  *
@@ -106,6 +112,32 @@ VALUE method_sooth_native_count(VALUE self, VALUE bigram);
106
112
  */
107
113
  VALUE method_sooth_native_select(VALUE self, VALUE bigram, VALUE limit);
108
114
 
115
+ /*
116
+ * Return a number indicating how uncertain the predictor is about which symbol
117
+ * is likely to be observed after the given bigram. Note that nil will be
118
+ * returned if the bigram has never been observed.
119
+ *
120
+ * @param [Array] bigram A pair of symbols.
121
+ * @return [Float] The uncertainty, which is calculated to be the shannon entropy
122
+ * of the probability distribution over the alphabet of symbols
123
+ * in the context of the bigram.
124
+ */
125
+ VALUE method_sooth_native_uncertainty(VALUE self, VALUE bigram);
126
+
127
+ /*
128
+ * Return a number indicating the surprise received by the predictor when it
129
+ * observed the given symbol after the given bigram. Note that nil will be
130
+ * returned if the symbol has never been observed after the bigram.
131
+ *
132
+ * @param [Array] bigram A pair of symbols.
133
+ * @param [Fixnum] symbol The symbol that has been observed.
134
+ * @return [Float] The surprise, which is calculated to be the shannon pointwise
135
+ * mutual information of the symbol according to the probability
136
+ * distribution over the alphabet of symbols in the context of
137
+ * the bigram.
138
+ */
139
+ VALUE method_sooth_native_surprise(VALUE self, VALUE bigram, VALUE limit);
140
+
109
141
  //------------------------------------------------------------------------------
110
142
 
111
143
  void Init_sooth_native()
@@ -123,6 +155,8 @@ void Init_sooth_native()
123
155
  rb_define_method(SoothNative, "observe", method_sooth_native_observe, 2);
124
156
  rb_define_method(SoothNative, "count", method_sooth_native_count, 1);
125
157
  rb_define_method(SoothNative, "select", method_sooth_native_select, 2);
158
+ rb_define_method(SoothNative, "uncertainty", method_sooth_native_uncertainty, 1);
159
+ rb_define_method(SoothNative, "surprise", method_sooth_native_surprise, 2);
126
160
  }
127
161
 
128
162
  //------------------------------------------------------------------------------
@@ -258,4 +292,51 @@ method_sooth_native_select(VALUE self, VALUE bigram, VALUE limit)
258
292
  return UINT2NUM(symbol);
259
293
  }
260
294
 
295
+ //------------------------------------------------------------------------------
296
+
297
+ VALUE
298
+ method_sooth_native_uncertainty(VALUE self, VALUE bigram)
299
+ {
300
+ SoothPredictor * predictor = NULL;
301
+ Check_Type(bigram, T_ARRAY);
302
+ if (RARRAY_LEN(bigram) != 2)
303
+ {
304
+ rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
305
+ }
306
+ Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
307
+ Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
308
+ Data_Get_Struct(self, SoothPredictor, predictor);
309
+ uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
310
+ double uncertainty = sooth_predictor_uncertainty(predictor, c_bigram);
311
+ if (uncertainty < 0)
312
+ {
313
+ return Qnil;
314
+ }
315
+ return DBL2NUM(uncertainty);
316
+ }
317
+
318
+ //------------------------------------------------------------------------------
319
+
320
+ VALUE
321
+ method_sooth_native_surprise(VALUE self, VALUE bigram, VALUE symbol)
322
+ {
323
+ SoothPredictor * predictor = NULL;
324
+ Check_Type(symbol, T_FIXNUM);
325
+ Check_Type(bigram, T_ARRAY);
326
+ if (RARRAY_LEN(bigram) != 2)
327
+ {
328
+ rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
329
+ }
330
+ Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
331
+ Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
332
+ Data_Get_Struct(self, SoothPredictor, predictor);
333
+ uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
334
+ double surprise = sooth_predictor_surprise(predictor, c_bigram, NUM2UINT(symbol));
335
+ if (surprise < 0)
336
+ {
337
+ return Qnil;
338
+ }
339
+ return DBL2NUM(surprise);
340
+ }
341
+
261
342
  //==============================================================================
@@ -3,6 +3,7 @@
3
3
  #include <stdio.h>
4
4
  #include <stdlib.h>
5
5
  #include <string.h>
6
+ #include <math.h>
6
7
 
7
8
  #include "sooth_predictor.h"
8
9
 
@@ -337,4 +338,52 @@ sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t
337
338
  return predictor->error_symbol;
338
339
  }
339
340
 
341
+ //------------------------------------------------------------------------------
342
+
343
+ double
344
+ sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t bigram[2])
345
+ {
346
+ SoothContext * context = sooth_predictor_find_context(predictor, bigram);
347
+
348
+ if (context == NULL || context->count == 0)
349
+ {
350
+ return -1;
351
+ }
352
+
353
+ double uncertainty = 0.0;
354
+ for (uint32_t i = 0; i < context->statistics_size; ++i)
355
+ {
356
+ if (context->statistics[i].count > 0)
357
+ {
358
+ double frequency = (double)context->statistics[i].count / (double)context->count;
359
+ uncertainty -= frequency * log2(frequency);
360
+ }
361
+ }
362
+
363
+ return uncertainty;
364
+ }
365
+
366
+ //------------------------------------------------------------------------------
367
+
368
+ double
369
+ sooth_predictor_surprise(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol)
370
+ {
371
+ SoothContext * context = sooth_predictor_find_context(predictor, bigram);
372
+
373
+ if (context == NULL || context->count == 0)
374
+ {
375
+ return -1;
376
+ }
377
+
378
+ SoothStatistic * statistic = sooth_predictor_find_statistic(context, symbol);
379
+
380
+ if (statistic == NULL || statistic->count == 0)
381
+ {
382
+ return -1;
383
+ }
384
+
385
+ double frequency = (double)statistic->count / (double)context->count;
386
+ return -log2(frequency);
387
+ }
388
+
340
389
  //==============================================================================
@@ -26,6 +26,8 @@ bool sooth_predictor_save(const char * const filename, SoothPredictor * predicto
26
26
  uint32_t sooth_predictor_observe(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol);
27
27
  uint32_t sooth_predictor_count(SoothPredictor * predictor, uint32_t bigram[2]);
28
28
  uint32_t sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t limit);
29
+ double sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t bigram[2]);
30
+ double sooth_predictor_surprise(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol);
29
31
 
30
32
  //==============================================================================
31
33
 
data/sooth.gemspec CHANGED
@@ -2,12 +2,12 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: sooth 0.4.0 ruby lib
5
+ # stub: sooth 0.5.0 ruby lib
6
6
  # stub: ext/sooth_native/extconf.rb
7
7
 
8
8
  Gem::Specification.new do |s|
9
9
  s.name = "sooth"
10
- s.version = "0.4.0"
10
+ s.version = "0.5.0"
11
11
 
12
12
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
13
13
  s.require_paths = ["lib"]
@@ -134,4 +134,45 @@ describe Sooth::Predictor do
134
134
  end
135
135
 
136
136
  end
137
+
138
+ describe "#uncertainty" do
139
+
140
+ it "has no uncertainty for a new context" do
141
+ expect(predictor.uncertainty([1, 2])).to be_nil
142
+ expect(predictor.count([1, 2])).to eq(0)
143
+ expect(predictor.uncertainty([1, 2])).to be_nil
144
+ end
145
+
146
+ it "has zero uncertainty for a lone context" do
147
+ predictor.observe([1, 2], 3)
148
+ expect(predictor.uncertainty([1, 2])).to eq(0)
149
+ end
150
+
151
+ it "has maximal uncertainty for a uniform distribution" do
152
+ (1..256).each { |i| predictor.observe([1, 2], i) }
153
+ expect(predictor.uncertainty([1, 2])).to eq(8)
154
+ end
155
+
156
+ end
157
+
158
+ describe "#surprise" do
159
+
160
+ it "has no surprise for a new context or symbol" do
161
+ expect(predictor.surprise([1, 2], 3)).to be_nil
162
+ expect(predictor.count([1, 2])).to eq(0)
163
+ expect(predictor.surprise([1, 2], 3)).to be_nil
164
+ end
165
+
166
+ it "has zero surprise for a lone context" do
167
+ predictor.observe([1, 2], 3)
168
+ expect(predictor.surprise([1, 2], 3)).to eq(0)
169
+ end
170
+
171
+ it "has uniform surprise for a uniform distribution" do
172
+ (1..256).each { |i| predictor.observe([1, 2], i) }
173
+ expect(predictor.surprise([1, 2], 3)).to eq(8)
174
+ end
175
+
176
+ end
177
+
137
178
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sooth
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jason Hutchens