RubyGems - sooth - Versions diffs - 0.4.0 → 0.5.0 - Mend

sooth 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/VERSION +1 -1
data/ext/sooth_native/native.c +81 -0
data/ext/sooth_native/sooth_predictor.c +49 -0
data/ext/sooth_native/sooth_predictor.h +2 -0
data/sooth.gemspec +2 -2
data/spec/predictor_spec.rb +41 -0
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 27347b90bbbfd21db684190405f633f9932cff4a
-  data.tar.gz: 122ebc71eefabd6d1d95ad8ef15712033bcb9b9f
+  metadata.gz: 1bd89c2dd37bdeec58eb72ecca31a1075dd28350
+  data.tar.gz: 8bc31931e77993880f3ff5f516a362933c061f8d
 SHA512:
-  metadata.gz: 4462abb25b5f6c0a719be89b2e20cfe477f990757a54ae69f427cc3280938178f89cdfab01b2e9e605d414b83d17702e6a764c524669fec0b7204a85efad7db0
-  data.tar.gz: fccf92fb019587081ed45645cd2effe4028e81bdd69ee3e38b1528b5e1eca20d1f2d4dc455f2c17b915345c4e3fbaf7f340b627e6fc2f2fe014a58a2c42b7cc5
+  metadata.gz: 3c79c5b70b6ee7df2e90b2b1fcaabaeae98da17dd8bc0c0338292b5b5ca00bcdd25b9640e1ed6599be982fce7704e1d79803adaded651dd417cb03e9227fc521
+  data.tar.gz: 9cb14586481824dc2d8b6662d0e5d31c27c5b00c1a9932fa1c4ccc4cd5ddf5753b65d403a27c604a518fa11c5af4206be0ca7fcfbf5ad823784ead1f97d1496c

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.4.0
1	+ 0.5.0

data/ext/sooth_native/native.c CHANGED Viewed

@@ -35,6 +35,12 @@ void method_sooth_native_deallocate(void * predictor);
  *       def select(bigram, limit)
  *         # (native code)
  *       end
+ *       def uncertainty(bigram)
+ *         # (native code)
+ *       end
+ *       def surprise(bigram, symbol)
+ *         # (native code)
+ *       end
  *     end
  *   end
  *
@@ -106,6 +112,32 @@ VALUE method_sooth_native_count(VALUE self, VALUE bigram);
  */
 VALUE method_sooth_native_select(VALUE self, VALUE bigram, VALUE limit);
+/*
+ * Return a number indicating how uncertain the predictor is about which symbol
+ * is likely to be observed after the given bigram. Note that nil will be
+ * returned if the bigram has never been observed.
+ *
+ * @param [Array] bigram A pair of symbols.
+ * @return [Float] The uncertainty, which is calculated to be the shannon entropy
+ *                 of the probability distribution over the alphabet of symbols
+ *                 in the context of the bigram.
+ */
+VALUE method_sooth_native_uncertainty(VALUE self, VALUE bigram);
+/*
+ * Return a number indicating the surprise received by the predictor when it
+ * observed the given symbol after the given bigram. Note that nil will be
+ * returned if the symbol has never been observed after the bigram.
+ *
+ * @param [Array] bigram A pair of symbols.
+ * @param [Fixnum] symbol The symbol that has been observed.
+ * @return [Float] The surprise, which is calculated to be the shannon pointwise
+ *                 mutual information of the symbol according to the probability
+ *                 distribution over the alphabet of symbols in the context of
+ *                 the bigram.
+ */
+VALUE method_sooth_native_surprise(VALUE self, VALUE bigram, VALUE limit);
 //------------------------------------------------------------------------------
 void Init_sooth_native()
@@ -123,6 +155,8 @@ void Init_sooth_native()
   rb_define_method(SoothNative, "observe", method_sooth_native_observe, 2);
   rb_define_method(SoothNative, "count", method_sooth_native_count, 1);
   rb_define_method(SoothNative, "select", method_sooth_native_select, 2);
+  rb_define_method(SoothNative, "uncertainty", method_sooth_native_uncertainty, 1);
+  rb_define_method(SoothNative, "surprise", method_sooth_native_surprise, 2);
 }
 //------------------------------------------------------------------------------
@@ -258,4 +292,51 @@ method_sooth_native_select(VALUE self, VALUE bigram, VALUE limit)
   return UINT2NUM(symbol);
 }
+//------------------------------------------------------------------------------
+VALUE
+method_sooth_native_uncertainty(VALUE self, VALUE bigram)
+{
+  SoothPredictor * predictor = NULL;
+  Check_Type(bigram, T_ARRAY);
+  if (RARRAY_LEN(bigram) != 2)
+  {
+    rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
+  }
+  Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
+  Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
+  Data_Get_Struct(self, SoothPredictor, predictor);
+  uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
+  double uncertainty = sooth_predictor_uncertainty(predictor, c_bigram);
+  if (uncertainty < 0)
+  {
+    return Qnil;
+  }
+  return DBL2NUM(uncertainty);
+}
+//------------------------------------------------------------------------------
+VALUE
+method_sooth_native_surprise(VALUE self, VALUE bigram, VALUE symbol)
+{
+  SoothPredictor * predictor = NULL;
+  Check_Type(symbol, T_FIXNUM);
+  Check_Type(bigram, T_ARRAY);
+  if (RARRAY_LEN(bigram) != 2)
+  {
+    rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
+  }
+  Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
+  Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
+  Data_Get_Struct(self, SoothPredictor, predictor);
+  uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
+  double surprise = sooth_predictor_surprise(predictor, c_bigram, NUM2UINT(symbol));
+  if (surprise < 0)
+  {
+    return Qnil;
+  }
+  return DBL2NUM(surprise);
+}
 //==============================================================================

data/ext/sooth_native/sooth_predictor.c CHANGED Viewed

@@ -3,6 +3,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <math.h>
 #include "sooth_predictor.h"
@@ -337,4 +338,52 @@ sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t
   return predictor->error_symbol;
 }
+//------------------------------------------------------------------------------
+double
+sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t bigram[2])
+{
+  SoothContext * context = sooth_predictor_find_context(predictor, bigram);
+  if (context == NULL || context->count == 0)
+  {
+    return -1;
+  }
+  double uncertainty = 0.0;
+  for (uint32_t i = 0; i < context->statistics_size; ++i)
+  {
+    if (context->statistics[i].count > 0)
+    {
+      double frequency = (double)context->statistics[i].count / (double)context->count;
+      uncertainty -= frequency * log2(frequency);
+    }
+  }
+  return uncertainty;
+}
+//------------------------------------------------------------------------------
+double
+sooth_predictor_surprise(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol)
+{
+  SoothContext * context = sooth_predictor_find_context(predictor, bigram);
+  if (context == NULL || context->count == 0)
+  {
+    return -1;
+  }
+  SoothStatistic * statistic = sooth_predictor_find_statistic(context, symbol);
+  if (statistic == NULL || statistic->count == 0)
+  {
+    return -1;
+  }
+  double frequency = (double)statistic->count / (double)context->count;
+  return -log2(frequency);
+}
 //==============================================================================

data/ext/sooth_native/sooth_predictor.h CHANGED Viewed

@@ -26,6 +26,8 @@ bool sooth_predictor_save(const char * const filename, SoothPredictor * predicto
 uint32_t sooth_predictor_observe(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol);
 uint32_t sooth_predictor_count(SoothPredictor * predictor, uint32_t bigram[2]);
 uint32_t sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t limit);
+double sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t bigram[2]);
+double sooth_predictor_surprise(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol);
 //==============================================================================

data/sooth.gemspec CHANGED Viewed

@@ -2,12 +2,12 @@
 # DO NOT EDIT THIS FILE DIRECTLY
 # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
 # -*- encoding: utf-8 -*-
-# stub: sooth 0.4.0 ruby lib
+# stub: sooth 0.5.0 ruby lib
 # stub: ext/sooth_native/extconf.rb
 Gem::Specification.new do |s|
   s.name = "sooth"
-  s.version = "0.4.0"
+  s.version = "0.5.0"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.require_paths = ["lib"]

data/spec/predictor_spec.rb CHANGED Viewed

@@ -134,4 +134,45 @@ describe Sooth::Predictor do
     end
   end
+  describe "#uncertainty" do
+    it "has no uncertainty for a new context" do
+      expect(predictor.uncertainty([1, 2])).to be_nil
+      expect(predictor.count([1, 2])).to eq(0)
+      expect(predictor.uncertainty([1, 2])).to be_nil
+    end
+    it "has zero uncertainty for a lone context" do
+      predictor.observe([1, 2], 3)
+      expect(predictor.uncertainty([1, 2])).to eq(0)
+    end
+    it "has maximal uncertainty for a uniform distribution" do
+      (1..256).each { |i| predictor.observe([1, 2], i) }
+      expect(predictor.uncertainty([1, 2])).to eq(8)
+    end
+  end
+  describe "#surprise" do
+    it "has no surprise for a new context or symbol" do
+      expect(predictor.surprise([1, 2], 3)).to be_nil
+      expect(predictor.count([1, 2])).to eq(0)
+      expect(predictor.surprise([1, 2], 3)).to be_nil
+    end
+    it "has zero surprise for a lone context" do
+      predictor.observe([1, 2], 3)
+      expect(predictor.surprise([1, 2], 3)).to eq(0)
+    end
+    it "has uniform surprise for a uniform distribution" do
+      (1..256).each { |i| predictor.observe([1, 2], i) }
+      expect(predictor.surprise([1, 2], 3)).to eq(8)
+    end
+  end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: sooth
 version: !ruby/object:Gem::Version
-  version: 0.4.0
+  version: 0.5.0
 platform: ruby
 authors:
 - Jason Hutchens