sooth 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 27347b90bbbfd21db684190405f633f9932cff4a
4
- data.tar.gz: 122ebc71eefabd6d1d95ad8ef15712033bcb9b9f
3
+ metadata.gz: 1bd89c2dd37bdeec58eb72ecca31a1075dd28350
4
+ data.tar.gz: 8bc31931e77993880f3ff5f516a362933c061f8d
5
5
  SHA512:
6
- metadata.gz: 4462abb25b5f6c0a719be89b2e20cfe477f990757a54ae69f427cc3280938178f89cdfab01b2e9e605d414b83d17702e6a764c524669fec0b7204a85efad7db0
7
- data.tar.gz: fccf92fb019587081ed45645cd2effe4028e81bdd69ee3e38b1528b5e1eca20d1f2d4dc455f2c17b915345c4e3fbaf7f340b627e6fc2f2fe014a58a2c42b7cc5
6
+ metadata.gz: 3c79c5b70b6ee7df2e90b2b1fcaabaeae98da17dd8bc0c0338292b5b5ca00bcdd25b9640e1ed6599be982fce7704e1d79803adaded651dd417cb03e9227fc521
7
+ data.tar.gz: 9cb14586481824dc2d8b6662d0e5d31c27c5b00c1a9932fa1c4ccc4cd5ddf5753b65d403a27c604a518fa11c5af4206be0ca7fcfbf5ad823784ead1f97d1496c
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.0
1
+ 0.5.0
@@ -35,6 +35,12 @@ void method_sooth_native_deallocate(void * predictor);
35
35
  * def select(bigram, limit)
36
36
  * # (native code)
37
37
  * end
38
+ * def uncertainty(bigram)
39
+ * # (native code)
40
+ * end
41
+ * def surprise(bigram, symbol)
42
+ * # (native code)
43
+ * end
38
44
  * end
39
45
  * end
40
46
  *
@@ -106,6 +112,32 @@ VALUE method_sooth_native_count(VALUE self, VALUE bigram);
106
112
  */
107
113
  VALUE method_sooth_native_select(VALUE self, VALUE bigram, VALUE limit);
108
114
 
115
+ /*
116
+ * Return a number indicating how uncertain the predictor is about which symbol
117
+ * is likely to be observed after the given bigram. Note that nil will be
118
+ * returned if the bigram has never been observed.
119
+ *
120
+ * @param [Array] bigram A pair of symbols.
121
+ * @return [Float] The uncertainty, which is calculated to be the shannon entropy
122
+ * of the probability distribution over the alphabet of symbols
123
+ * in the context of the bigram.
124
+ */
125
+ VALUE method_sooth_native_uncertainty(VALUE self, VALUE bigram);
126
+
127
+ /*
128
+ * Return a number indicating the surprise received by the predictor when it
129
+ * observed the given symbol after the given bigram. Note that nil will be
130
+ * returned if the symbol has never been observed after the bigram.
131
+ *
132
+ * @param [Array] bigram A pair of symbols.
133
+ * @param [Fixnum] symbol The symbol that has been observed.
134
+ * @return [Float] The surprise, which is calculated to be the shannon pointwise
135
+ * mutual information of the symbol according to the probability
136
+ * distribution over the alphabet of symbols in the context of
137
+ * the bigram.
138
+ */
139
+ VALUE method_sooth_native_surprise(VALUE self, VALUE bigram, VALUE limit);
140
+
109
141
  //------------------------------------------------------------------------------
110
142
 
111
143
  void Init_sooth_native()
@@ -123,6 +155,8 @@ void Init_sooth_native()
123
155
  rb_define_method(SoothNative, "observe", method_sooth_native_observe, 2);
124
156
  rb_define_method(SoothNative, "count", method_sooth_native_count, 1);
125
157
  rb_define_method(SoothNative, "select", method_sooth_native_select, 2);
158
+ rb_define_method(SoothNative, "uncertainty", method_sooth_native_uncertainty, 1);
159
+ rb_define_method(SoothNative, "surprise", method_sooth_native_surprise, 2);
126
160
  }
127
161
 
128
162
  //------------------------------------------------------------------------------
@@ -258,4 +292,51 @@ method_sooth_native_select(VALUE self, VALUE bigram, VALUE limit)
258
292
  return UINT2NUM(symbol);
259
293
  }
260
294
 
295
+ //------------------------------------------------------------------------------
296
+
297
+ VALUE
298
+ method_sooth_native_uncertainty(VALUE self, VALUE bigram)
299
+ {
300
+ SoothPredictor * predictor = NULL;
301
+ Check_Type(bigram, T_ARRAY);
302
+ if (RARRAY_LEN(bigram) != 2)
303
+ {
304
+ rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
305
+ }
306
+ Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
307
+ Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
308
+ Data_Get_Struct(self, SoothPredictor, predictor);
309
+ uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
310
+ double uncertainty = sooth_predictor_uncertainty(predictor, c_bigram);
311
+ if (uncertainty < 0)
312
+ {
313
+ return Qnil;
314
+ }
315
+ return DBL2NUM(uncertainty);
316
+ }
317
+
318
+ //------------------------------------------------------------------------------
319
+
320
+ VALUE
321
+ method_sooth_native_surprise(VALUE self, VALUE bigram, VALUE symbol)
322
+ {
323
+ SoothPredictor * predictor = NULL;
324
+ Check_Type(symbol, T_FIXNUM);
325
+ Check_Type(bigram, T_ARRAY);
326
+ if (RARRAY_LEN(bigram) != 2)
327
+ {
328
+ rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
329
+ }
330
+ Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
331
+ Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
332
+ Data_Get_Struct(self, SoothPredictor, predictor);
333
+ uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
334
+ double surprise = sooth_predictor_surprise(predictor, c_bigram, NUM2UINT(symbol));
335
+ if (surprise < 0)
336
+ {
337
+ return Qnil;
338
+ }
339
+ return DBL2NUM(surprise);
340
+ }
341
+
261
342
  //==============================================================================
@@ -3,6 +3,7 @@
3
3
  #include <stdio.h>
4
4
  #include <stdlib.h>
5
5
  #include <string.h>
6
+ #include <math.h>
6
7
 
7
8
  #include "sooth_predictor.h"
8
9
 
@@ -337,4 +338,52 @@ sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t
337
338
  return predictor->error_symbol;
338
339
  }
339
340
 
341
+ //------------------------------------------------------------------------------
342
+
343
+ double
344
+ sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t bigram[2])
345
+ {
346
+ SoothContext * context = sooth_predictor_find_context(predictor, bigram);
347
+
348
+ if (context == NULL || context->count == 0)
349
+ {
350
+ return -1;
351
+ }
352
+
353
+ double uncertainty = 0.0;
354
+ for (uint32_t i = 0; i < context->statistics_size; ++i)
355
+ {
356
+ if (context->statistics[i].count > 0)
357
+ {
358
+ double frequency = (double)context->statistics[i].count / (double)context->count;
359
+ uncertainty -= frequency * log2(frequency);
360
+ }
361
+ }
362
+
363
+ return uncertainty;
364
+ }
365
+
366
+ //------------------------------------------------------------------------------
367
+
368
+ double
369
+ sooth_predictor_surprise(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol)
370
+ {
371
+ SoothContext * context = sooth_predictor_find_context(predictor, bigram);
372
+
373
+ if (context == NULL || context->count == 0)
374
+ {
375
+ return -1;
376
+ }
377
+
378
+ SoothStatistic * statistic = sooth_predictor_find_statistic(context, symbol);
379
+
380
+ if (statistic == NULL || statistic->count == 0)
381
+ {
382
+ return -1;
383
+ }
384
+
385
+ double frequency = (double)statistic->count / (double)context->count;
386
+ return -log2(frequency);
387
+ }
388
+
340
389
  //==============================================================================
@@ -26,6 +26,8 @@ bool sooth_predictor_save(const char * const filename, SoothPredictor * predicto
26
26
  uint32_t sooth_predictor_observe(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol);
27
27
  uint32_t sooth_predictor_count(SoothPredictor * predictor, uint32_t bigram[2]);
28
28
  uint32_t sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t limit);
29
+ double sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t bigram[2]);
30
+ double sooth_predictor_surprise(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol);
29
31
 
30
32
  //==============================================================================
31
33
 
data/sooth.gemspec CHANGED
@@ -2,12 +2,12 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: sooth 0.4.0 ruby lib
5
+ # stub: sooth 0.5.0 ruby lib
6
6
  # stub: ext/sooth_native/extconf.rb
7
7
 
8
8
  Gem::Specification.new do |s|
9
9
  s.name = "sooth"
10
- s.version = "0.4.0"
10
+ s.version = "0.5.0"
11
11
 
12
12
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
13
13
  s.require_paths = ["lib"]
@@ -134,4 +134,45 @@ describe Sooth::Predictor do
134
134
  end
135
135
 
136
136
  end
137
+
138
+ describe "#uncertainty" do
139
+
140
+ it "has no uncertainty for a new context" do
141
+ expect(predictor.uncertainty([1, 2])).to be_nil
142
+ expect(predictor.count([1, 2])).to eq(0)
143
+ expect(predictor.uncertainty([1, 2])).to be_nil
144
+ end
145
+
146
+ it "has zero uncertainty for a lone context" do
147
+ predictor.observe([1, 2], 3)
148
+ expect(predictor.uncertainty([1, 2])).to eq(0)
149
+ end
150
+
151
+ it "has maximal uncertainty for a uniform distribution" do
152
+ (1..256).each { |i| predictor.observe([1, 2], i) }
153
+ expect(predictor.uncertainty([1, 2])).to eq(8)
154
+ end
155
+
156
+ end
157
+
158
+ describe "#surprise" do
159
+
160
+ it "has no surprise for a new context or symbol" do
161
+ expect(predictor.surprise([1, 2], 3)).to be_nil
162
+ expect(predictor.count([1, 2])).to eq(0)
163
+ expect(predictor.surprise([1, 2], 3)).to be_nil
164
+ end
165
+
166
+ it "has zero surprise for a lone context" do
167
+ predictor.observe([1, 2], 3)
168
+ expect(predictor.surprise([1, 2], 3)).to eq(0)
169
+ end
170
+
171
+ it "has uniform surprise for a uniform distribution" do
172
+ (1..256).each { |i| predictor.observe([1, 2], i) }
173
+ expect(predictor.surprise([1, 2], 3)).to eq(8)
174
+ end
175
+
176
+ end
177
+
137
178
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sooth
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jason Hutchens