sooth 1.0.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 85592b0c52f53800465928af00ab97da342b71d4
4
- data.tar.gz: dd9c4276380250e4cc0219264e67d138dcc265ee
3
+ metadata.gz: 9bac8ab9338973373ec7328dd58dcae462e86485
4
+ data.tar.gz: ea7f2ecb422e0301e36d5f6826aa8ccdbaefb619
5
5
  SHA512:
6
- metadata.gz: d8969158143b9e4af257c881abccfa0d0624b25e1232ad4fa6f2cde5ba4e5ca154afbbf52fa4f97eca274f6bf3d7d64389d0637e2f05eddb82e32ebcf19bfed0
7
- data.tar.gz: 1f9a33941477759b9bbb349d203b1a40d5bdfa18f627fe75e175beba836111be8d28ca405e96ec018b40c43078f2607b6f7687eb60519945f2cb676cb7ad271d
6
+ metadata.gz: bf39ddf48fba680f4e6f84d42d5f9dc2f59999c83aabafc9f6ba8761f77d24835cfd5b6a6e9f349d61c0e44caec9b7ce2276ba181e4cb13c96980f177481e5ee
7
+ data.tar.gz: 4ab08bdf68a0f38b676c033f54949bf6a6854f2ade41a2f32b2a62b8a3cff49dcd58b3d89b3f6afa60cef5e6c3863fd22d4a6afd6223285844e72bef4b63d24c
data/CHANGELOG.md CHANGED
@@ -1,35 +1,30 @@
1
1
  # Change Log
2
2
 
3
- ## [v1.0.2](https://github.com/jasonhutchens/sooth/tree/v1.0.2) (2015-05-26)
3
+ ## [v1.0.3](https://github.com/jasonhutchens/sooth/tree/v1.0.3) (2015-10-03)
4
+ [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v1.0.2...v1.0.3)
4
5
 
6
+ ## [v1.0.2](https://github.com/jasonhutchens/sooth/tree/v1.0.2) (2015-05-26)
5
7
  [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v1.0.1...v1.0.2)
6
8
 
7
9
  ## [v1.0.1](https://github.com/jasonhutchens/sooth/tree/v1.0.1) (2015-05-26)
8
-
9
10
  [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v1.0.0...v1.0.1)
10
11
 
11
12
  ## [v1.0.0](https://github.com/jasonhutchens/sooth/tree/v1.0.0) (2014-12-17)
12
-
13
13
  [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.5.0...v1.0.0)
14
14
 
15
15
  ## [v0.5.0](https://github.com/jasonhutchens/sooth/tree/v0.5.0) (2014-12-16)
16
-
17
16
  [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.4.0...v0.5.0)
18
17
 
19
18
  ## [v0.4.0](https://github.com/jasonhutchens/sooth/tree/v0.4.0) (2014-12-16)
20
-
21
19
  [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.3.0...v0.4.0)
22
20
 
23
21
  ## [v0.3.0](https://github.com/jasonhutchens/sooth/tree/v0.3.0) (2014-12-11)
24
-
25
22
  [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.2.0...v0.3.0)
26
23
 
27
24
  ## [v0.2.0](https://github.com/jasonhutchens/sooth/tree/v0.2.0) (2014-12-10)
28
-
29
25
  [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.1.0...v0.2.0)
30
26
 
31
27
  ## [v0.1.0](https://github.com/jasonhutchens/sooth/tree/v0.1.0) (2014-12-10)
32
28
 
33
29
 
34
-
35
30
  \* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)*
data/Gemfile CHANGED
@@ -7,7 +7,7 @@ group :development do
7
7
  gem 'bundler', '~> 1.7'
8
8
  gem 'jeweler', '~> 2.0'
9
9
  gem 'simplecov', '~> 0.9'
10
- gem 'byebug', '~> 6.0'
10
+ gem 'byebug', '~> 8.2'
11
11
  gem 'rake-compiler', '~> 0.9'
12
12
  gem 'github_changelog_generator', '~> 1.4'
13
13
  end
data/Gemfile.lock CHANGED
@@ -1,31 +1,40 @@
1
1
  GEM
2
2
  remote: https://rubygems.org/
3
3
  specs:
4
- addressable (2.3.8)
4
+ addressable (2.4.0)
5
+ ast (2.2.0)
5
6
  builder (3.2.2)
6
- byebug (6.0.2)
7
+ byebug (8.2.2)
8
+ childprocess (0.5.9)
9
+ ffi (~> 1.0, >= 1.0.11)
7
10
  colorize (0.7.7)
8
11
  descendants_tracker (0.0.4)
9
12
  thread_safe (~> 0.3, >= 0.3.1)
10
13
  diff-lcs (1.2.5)
11
14
  docile (1.1.5)
12
- faraday (0.9.1)
15
+ faraday (0.9.2)
13
16
  multipart-post (>= 1.2, < 3)
14
- git (1.2.9.1)
15
- github_api (0.12.4)
16
- addressable (~> 2.3)
17
+ ffi (1.9.10)
18
+ git (1.3.0)
19
+ github_api (0.13.1)
20
+ addressable (~> 2.4.0)
17
21
  descendants_tracker (~> 0.0.4)
18
22
  faraday (~> 0.8, < 0.10)
19
23
  hashie (>= 3.4)
20
24
  multi_json (>= 1.7.5, < 2.0)
21
- nokogiri (~> 1.6.6)
22
25
  oauth2
23
- github_changelog_generator (1.9.0)
26
+ github_changelog_generator (1.12.0)
27
+ bundler (>= 1.7)
24
28
  colorize (~> 0.7)
25
29
  github_api (~> 0.12)
26
- hashie (3.4.2)
27
- highline (1.7.7)
28
- jeweler (2.0.1)
30
+ overcommit (>= 0.31)
31
+ rake (>= 10.0)
32
+ rspec (>= 3.2)
33
+ rubocop (>= 0.31)
34
+ hashie (3.4.3)
35
+ highline (1.7.8)
36
+ iniparse (1.4.2)
37
+ jeweler (2.1.1)
29
38
  builder
30
39
  bundler (>= 1.0)
31
40
  git (>= 1.2.5)
@@ -34,44 +43,62 @@ GEM
34
43
  nokogiri (>= 1.5.10)
35
44
  rake
36
45
  rdoc
46
+ semver
37
47
  json (1.8.3)
38
48
  jwt (1.5.1)
39
- mini_portile (0.6.2)
49
+ mini_portile2 (2.0.0)
40
50
  multi_json (1.11.2)
41
51
  multi_xml (0.5.5)
42
52
  multipart-post (2.0.0)
43
- nokogiri (1.6.6.2)
44
- mini_portile (~> 0.6.0)
45
- oauth2 (1.0.0)
53
+ nokogiri (1.6.7.2)
54
+ mini_portile2 (~> 2.0.0.rc2)
55
+ oauth2 (1.1.0)
46
56
  faraday (>= 0.8, < 0.10)
47
- jwt (~> 1.0)
57
+ jwt (~> 1.0, < 1.5.2)
48
58
  multi_json (~> 1.3)
49
59
  multi_xml (~> 0.5)
50
- rack (~> 1.2)
60
+ rack (>= 1.2, < 3)
61
+ overcommit (0.33.0)
62
+ childprocess (~> 0.5.8)
63
+ iniparse (~> 1.4)
64
+ parser (2.3.0.7)
65
+ ast (~> 2.2)
66
+ powerpack (0.1.1)
51
67
  rack (1.6.4)
52
- rake (10.4.2)
53
- rake-compiler (0.9.5)
68
+ rainbow (2.1.0)
69
+ rake (11.1.2)
70
+ rake-compiler (0.9.7)
54
71
  rake
55
- rdoc (4.2.0)
56
- rspec (3.3.0)
57
- rspec-core (~> 3.3.0)
58
- rspec-expectations (~> 3.3.0)
59
- rspec-mocks (~> 3.3.0)
60
- rspec-core (3.3.2)
61
- rspec-support (~> 3.3.0)
62
- rspec-expectations (3.3.1)
72
+ rdoc (4.2.2)
73
+ json (~> 1.4)
74
+ rspec (3.4.0)
75
+ rspec-core (~> 3.4.0)
76
+ rspec-expectations (~> 3.4.0)
77
+ rspec-mocks (~> 3.4.0)
78
+ rspec-core (3.4.4)
79
+ rspec-support (~> 3.4.0)
80
+ rspec-expectations (3.4.0)
63
81
  diff-lcs (>= 1.2.0, < 2.0)
64
- rspec-support (~> 3.3.0)
65
- rspec-mocks (3.3.2)
82
+ rspec-support (~> 3.4.0)
83
+ rspec-mocks (3.4.1)
66
84
  diff-lcs (>= 1.2.0, < 2.0)
67
- rspec-support (~> 3.3.0)
68
- rspec-support (3.3.0)
69
- simplecov (0.10.0)
85
+ rspec-support (~> 3.4.0)
86
+ rspec-support (3.4.1)
87
+ rubocop (0.39.0)
88
+ parser (>= 2.3.0.7, < 3.0)
89
+ powerpack (~> 0.1)
90
+ rainbow (>= 1.99.1, < 3.0)
91
+ ruby-progressbar (~> 1.7)
92
+ unicode-display_width (~> 1.0, >= 1.0.1)
93
+ ruby-progressbar (1.7.5)
94
+ semver (1.0.1)
95
+ simplecov (0.11.2)
70
96
  docile (~> 1.1.0)
71
97
  json (~> 1.8)
72
98
  simplecov-html (~> 0.10.0)
73
99
  simplecov-html (0.10.0)
74
100
  thread_safe (0.3.5)
101
+ unicode-display_width (1.0.3)
75
102
  yard (0.8.7.6)
76
103
 
77
104
  PLATFORMS
@@ -79,7 +106,7 @@ PLATFORMS
79
106
 
80
107
  DEPENDENCIES
81
108
  bundler (~> 1.7)
82
- byebug (~> 6.0)
109
+ byebug (~> 8.2)
83
110
  github_changelog_generator (~> 1.4)
84
111
  jeweler (~> 2.0)
85
112
  rake-compiler (~> 0.9)
@@ -89,4 +116,4 @@ DEPENDENCIES
89
116
  yard (~> 0.8)
90
117
 
91
118
  BUNDLED WITH
92
- 1.10.5
119
+ 1.11.2
data/README.md CHANGED
@@ -5,15 +5,18 @@
5
5
  Sooth
6
6
  =====
7
7
 
8
- Sooth is a simple stochastic predictive model. It is used by
9
- [MegaHAL](https://github.com/jasonhutchens/megahal), a learning chatterbot.
8
+ Sooth is a minimal stochastic predictive model. It is used by [MegaHAL](https://github.com/jasonhutchens/megahal), a learning chatterbot, and by [Typing Simulator](https://github.com/jasonhutchens/typing_simulator), a program that pretends to type text files like a human being.
10
9
 
11
10
  Getting Started
12
11
  ---------------
13
12
 
14
- Apart from the specs, look at the [API docs](http://www.rubydoc.info/gems/sooth/) to get up to speed.
13
+ The basic philisophy is to keep things simple. Sooth can make _predictions_ about _events_ that occur within some _context_.
14
+
15
+ A _context_ is just a number; it's up to you to make it significant. It might represent a word, or an array of words, or something else entirely. Sooth doesn't care. Likewise, an _event_ is a number, but doesn't need to represent the same kind of thing as a _context_. It's perfectly fine for a _context_ to be an array of words and an _event_ to be a single word, which is what MegaHAL does. Or a _context_ could be an array of characters and an _event_ could be a time in milliseconds, which is what TypingSimulator does. Or they could be something else entirely...
16
+
17
+ Look at the [API docs](http://www.rubydoc.info/gems/sooth/Sooth/Predictor) to get up to speed, and at the specs for example usage.
15
18
 
16
19
  Copyright
17
20
  ---------
18
21
 
19
- Copyright (c) 2015 Jason Hutchens. See UNLICENSE for further details.
22
+ Copyright (c) 2016 Jason Hutchens. See UNLICENSE for further details.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.3
1
+ 2.0.0
@@ -1,5 +1,5 @@
1
1
  require 'mkmf'
2
2
 
3
- $CFLAGS << ' -Wall -O3 -std=gnu99'
3
+ $CFLAGS << ' -Wall -Ofast -std=gnu99'
4
4
 
5
5
  create_makefile('sooth_native')
@@ -14,11 +14,10 @@ void method_sooth_native_deallocate(void * predictor);
14
14
 
15
15
  /* @!parse [ruby]
16
16
  * module Sooth
17
- * # A very simple stochastic predictor. Implemented in C for efficiency.
18
- * # The idea here is to build up more complicated learning algorithms using
19
- * # a trivial Markovian predictor.
17
+ * # A minimal stochastic predictive model, implemented in C for efficiency.
18
+ * # No assumptions about PRNG or real-world significance of context/event.
20
19
  * class Predictor
21
- * def initialize(error_symbol)
20
+ * def initialize(error_event)
22
21
  * end
23
22
  * def clear
24
23
  * end
@@ -26,19 +25,25 @@ void method_sooth_native_deallocate(void * predictor);
26
25
  * end
27
26
  * def save(filename)
28
27
  * end
29
- * def observe(bigram, symbol)
28
+ * def size(context)
30
29
  * # (native code)
31
30
  * end
32
- * def count(bigram)
31
+ * def count(context)
33
32
  * # (native code)
34
33
  * end
35
- * def select(bigram, limit)
34
+ * def observe(context, event)
36
35
  * # (native code)
37
36
  * end
38
- * def uncertainty(bigram)
37
+ * def select(context, limit)
39
38
  * # (native code)
40
39
  * end
41
- * def surprise(bigram, symbol)
40
+ * def distribution(context)
41
+ * # (native code)
42
+ * end
43
+ * def uncertainty(context)
44
+ * # (native code)
45
+ * end
46
+ * def surprise(context, evemt)
42
47
  * # (native code)
43
48
  * end
44
49
  * end
@@ -46,13 +51,13 @@ void method_sooth_native_deallocate(void * predictor);
46
51
  *
47
52
  * Returns a new Sooth::Predictor instance.
48
53
  *
49
- * @param [Fixnum] error_symbol The symbol to be returned by #select when no
50
- * prediction can be made.
54
+ * @param [Fixnum] error_event The event to be returned by #select when no
55
+ * observations have been made for the context.
51
56
  */
52
- VALUE method_sooth_native_initialize(VALUE self, VALUE error_symbol);
57
+ VALUE method_sooth_native_initialize(VALUE self, VALUE error_event);
53
58
 
54
59
  /*
55
- * Clear the predictor to a fresh slate.
60
+ * Clear the predictor to a blank slate.
56
61
  */
57
62
  VALUE method_sooth_native_clear(VALUE self);
58
63
 
@@ -65,76 +70,92 @@ VALUE method_sooth_native_clear(VALUE self);
65
70
  VALUE method_sooth_native_load(VALUE self, VALUE filename);
66
71
 
67
72
  /*
68
- * Save the predictor to a file that can be loaded or merged later.
73
+ * Save the predictor to a file that can be loaded later.
69
74
  *
70
75
  * @param [String] filename The path of the file to be merge.
71
76
  */
72
77
  VALUE method_sooth_native_save(VALUE self, VALUE filename);
73
78
 
74
79
  /*
75
- * Add an observation of the given symbol in the context of the bigram.
80
+ * Return the number of different events that have been observed within the
81
+ * given context.
76
82
  *
77
- * @param [Array] bigram A pair of symbols that provide context, allowing the
78
- * predictor to maintain observation statistics for
79
- * different contexts.
80
- * @param [Fixnum] symbol The symbol that has been observed.
81
- * @return [Fixnum] A count of the number of times the symbol has been
82
- * observed in the context of the bigram.
83
+ * @param [Fixnum] context A number that provides a context for observations.
84
+ * @return [Fixnum] The number of distinct events that have been observed
85
+ * within the given context. This is guaranteed to be equal
86
+ * to the length of the #distribution for the context.
83
87
  */
84
- VALUE method_sooth_native_observe(VALUE self, VALUE bigram, VALUE symbol);
88
+
89
+ VALUE method_sooth_native_size(VALUE self, VALUE context);
85
90
 
86
91
  /*
87
- * Return a count of the number of times the bigram has been observed.
92
+ * Return the number of times the context has been observed.
88
93
  *
89
- * @param [Array] bigram A pair of symbols.
90
- * @return [Fixnum] A count of the number of times the bigram has been
94
+ * @param [Fixnum] context A number that provides a context for observations.
95
+ * @return [Fixnum] A count of the number of times the context has been
91
96
  * observed. This is guaranteed to be equal to the sum
92
- * of the counts of observations of all symbols in the
93
- * context of the bigram.
97
+ * of the counts of observations of all events observed in
98
+ * the context.
99
+ */
100
+ VALUE method_sooth_native_count(VALUE self, VALUE context);
101
+
102
+ /*
103
+ * Register an observation of the given event within the given context.
104
+ *
105
+ * @param [Fixnum] context A number that provides a context for the event,
106
+ * allowing the predictor to maintain observation
107
+ * statistics for different contexts.
108
+ * @param [Fixnum] event A number representing the observed event.
109
+ * @return [Fixnum] A count of the number of times the event has been
110
+ * observed in the given context.
111
+ */
112
+ VALUE method_sooth_native_observe(VALUE self, VALUE context, VALUE event);
113
+
114
+ /*
115
+ * Return an event that may occur in the given context, based on the limit,
116
+ * which should be between 1 and #count. The event is selected by iterating
117
+ * through all observed events for the context, subtracting the observation
118
+ * count of each event from the limit until it is zero or less.
119
+ *
120
+ * @param [Fixnum] context A number that provides a context for observations.
121
+ * @param [Fixnum] limit The total number of event observations to be
122
+ * analysed before returning a event.
123
+ * @return [Fixnum] An event that has been previously observed in the given
124
+ * context, or the error_event if the #count of the context
125
+ * is zero, or if limit exceeds the #count of the context
94
126
  */
95
- VALUE method_sooth_native_count(VALUE self, VALUE bigram);
127
+ VALUE method_sooth_native_select(VALUE self, VALUE context, VALUE limit);
96
128
 
97
129
  /*
98
- * Return a symbol that may occur in the context of the bigram. The
99
- * limit is used to select a symbol. This is done by iterating through
100
- * all of the symbols that have been observed in the context of the
101
- * bigram, subtracting the observation count of each symbol from the
102
- * supplied limit. For this reason, limit should be between 1 and the
103
- * observation count of the bigram itself, as returned by #count.
130
+ * Return an Enumerator that yields each observed event within the context
131
+ * together with its probability.
104
132
  *
105
- * @param [Array] bigram A pair of symbols.
106
- * @param [Fixnum] limit The total numbe of symbol observations to be
107
- * analysed before returning a symbol.
108
- * @return [Fixnum] A symbol that has been observed previously in the
109
- * context of the bigram, or the error_symbol if no
110
- * such symbol exists, or if the supplied limit was
111
- * too large.
133
+ * @param [Fixnum] context A number that provides a context for observations.
134
+ * @return [Array] A list of event-probability pairs.
112
135
  */
113
- VALUE method_sooth_native_select(VALUE self, VALUE bigram, VALUE limit);
136
+ VALUE method_sooth_native_distribution(VALUE self, VALUE context);
114
137
 
115
138
  /*
116
- * Return a number indicating how uncertain the predictor is about which symbol
117
- * is likely to be observed after the given bigram. Note that nil will be
118
- * returned if the bigram has never been observed.
139
+ * Return a number indicating how uncertain the predictor is about which event
140
+ * is likely to be observed after the given context. Note that nil will be
141
+ * returned if the context has never been observed.
119
142
  *
120
- * @param [Array] bigram A pair of symbols.
121
- * @return [Float] The uncertainty, which is calculated to be the shannon entropy
122
- * of the probability distribution over the alphabet of symbols
123
- * in the context of the bigram.
143
+ * @param [Fixnum] context A number that provides a context for observations.
144
+ * @return [Float] The uncertainty, which is calculated to be the Shannon entropy
145
+ * of the #distribution over the context.
124
146
  */
125
- VALUE method_sooth_native_uncertainty(VALUE self, VALUE bigram);
147
+ VALUE method_sooth_native_uncertainty(VALUE self, VALUE context);
126
148
 
127
149
  /*
128
150
  * Return a number indicating the surprise received by the predictor when it
129
- * observed the given symbol after the given bigram. Note that nil will be
130
- * returned if the symbol has never been observed after the bigram.
151
+ * observed the given event within the given context. Note that nil will be
152
+ * returned if the event has never been observed within the context.
131
153
  *
132
- * @param [Array] bigram A pair of symbols.
133
- * @param [Fixnum] symbol The symbol that has been observed.
134
- * @return [Float] The surprise, which is calculated to be the shannon pointwise
135
- * mutual information of the symbol according to the probability
136
- * distribution over the alphabet of symbols in the context of
137
- * the bigram.
154
+ * @param [Fixnum] context A number that provides a context for observations.
155
+ * @param [Fixnum] event A number representing the observed event.
156
+ * @return [Float] The surprise, which is calculated to be the Shannon pointwise
157
+ * mutual information of the event according to the
158
+ * #distribution over the context.
138
159
  */
139
160
  VALUE method_sooth_native_surprise(VALUE self, VALUE bigram, VALUE limit);
140
161
 
@@ -152,9 +173,12 @@ void Init_sooth_native()
152
173
  rb_define_method(SoothNative, "load", method_sooth_native_load, 1);
153
174
  rb_define_method(SoothNative, "save", method_sooth_native_save, 1);
154
175
 
155
- rb_define_method(SoothNative, "observe", method_sooth_native_observe, 2);
176
+ rb_define_method(SoothNative, "size", method_sooth_native_size, 1);
156
177
  rb_define_method(SoothNative, "count", method_sooth_native_count, 1);
178
+ rb_define_method(SoothNative, "observe", method_sooth_native_observe, 2);
157
179
  rb_define_method(SoothNative, "select", method_sooth_native_select, 2);
180
+
181
+ rb_define_method(SoothNative, "distribution", method_sooth_native_distribution, 1);
158
182
  rb_define_method(SoothNative, "uncertainty", method_sooth_native_uncertainty, 1);
159
183
  rb_define_method(SoothNative, "surprise", method_sooth_native_surprise, 2);
160
184
  }
@@ -183,12 +207,12 @@ method_sooth_native_deallocate(void * predictor)
183
207
  //------------------------------------------------------------------------------
184
208
 
185
209
  VALUE
186
- method_sooth_native_initialize(VALUE self, VALUE error_symbol)
210
+ method_sooth_native_initialize(VALUE self, VALUE error_event)
187
211
  {
188
212
  SoothPredictor * predictor = NULL;
189
- Check_Type(error_symbol, T_FIXNUM);
213
+ Check_Type(error_event, T_FIXNUM);
190
214
  Data_Get_Struct(self, SoothPredictor, predictor);
191
- predictor->error_symbol = NUM2UINT(error_symbol);
215
+ predictor->error_event = NUM2UINT(error_event);
192
216
  return self;
193
217
  }
194
218
 
@@ -236,78 +260,90 @@ method_sooth_native_save(VALUE self, VALUE filename)
236
260
  //------------------------------------------------------------------------------
237
261
 
238
262
  VALUE
239
- method_sooth_native_observe(VALUE self, VALUE bigram, VALUE symbol)
263
+ method_sooth_native_size(VALUE self, VALUE context)
240
264
  {
241
265
  SoothPredictor * predictor = NULL;
242
- Check_Type(symbol, T_FIXNUM);
243
- Check_Type(bigram, T_ARRAY);
244
- if (RARRAY_LEN(bigram) != 2)
245
- {
246
- rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
247
- }
248
- Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
249
- Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
266
+ Check_Type(context, T_FIXNUM);
250
267
  Data_Get_Struct(self, SoothPredictor, predictor);
251
- uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
252
- uint32_t count = sooth_predictor_observe(predictor, c_bigram, NUM2UINT(symbol));
268
+ uint32_t size = sooth_predictor_size(predictor, NUM2UINT(context));
269
+ return UINT2NUM(size);
270
+ }
271
+
272
+ //------------------------------------------------------------------------------
273
+
274
+ VALUE
275
+ method_sooth_native_count(VALUE self, VALUE context)
276
+ {
277
+ SoothPredictor * predictor = NULL;
278
+ Check_Type(context, T_FIXNUM);
279
+ Data_Get_Struct(self, SoothPredictor, predictor);
280
+ uint32_t count = sooth_predictor_count(predictor, NUM2UINT(context));
253
281
  return UINT2NUM(count);
254
282
  }
255
283
 
256
284
  //------------------------------------------------------------------------------
257
285
 
258
286
  VALUE
259
- method_sooth_native_count(VALUE self, VALUE bigram)
287
+ method_sooth_native_observe(VALUE self, VALUE context, VALUE event)
260
288
  {
261
289
  SoothPredictor * predictor = NULL;
262
- Check_Type(bigram, T_ARRAY);
263
- if (RARRAY_LEN(bigram) != 2)
264
- {
265
- rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
266
- }
267
- Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
268
- Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
290
+ Check_Type(context, T_FIXNUM);
291
+ Check_Type(event, T_FIXNUM);
269
292
  Data_Get_Struct(self, SoothPredictor, predictor);
270
- uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
271
- uint32_t count = sooth_predictor_count(predictor, c_bigram);
293
+ uint32_t count = sooth_predictor_observe(predictor, NUM2UINT(context), NUM2UINT(event));
272
294
  return UINT2NUM(count);
273
295
  }
274
296
 
275
297
  //------------------------------------------------------------------------------
276
298
 
277
299
  VALUE
278
- method_sooth_native_select(VALUE self, VALUE bigram, VALUE limit)
300
+ method_sooth_native_select(VALUE self, VALUE context, VALUE limit)
279
301
  {
280
302
  SoothPredictor * predictor = NULL;
303
+ Check_Type(context, T_FIXNUM);
281
304
  Check_Type(limit, T_FIXNUM);
282
- Check_Type(bigram, T_ARRAY);
283
- if (RARRAY_LEN(bigram) != 2)
284
- {
285
- rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
286
- }
287
- Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
288
- Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
289
305
  Data_Get_Struct(self, SoothPredictor, predictor);
290
- uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
291
- uint32_t symbol = sooth_predictor_select(predictor, c_bigram, NUM2UINT(limit));
292
- return UINT2NUM(symbol);
306
+ uint32_t event = sooth_predictor_select(predictor, NUM2UINT(context), NUM2UINT(limit));
307
+ return UINT2NUM(event);
293
308
  }
294
309
 
295
310
  //------------------------------------------------------------------------------
296
311
 
297
312
  VALUE
298
- method_sooth_native_uncertainty(VALUE self, VALUE bigram)
313
+ method_sooth_native_distribution(VALUE self, VALUE context)
299
314
  {
300
315
  SoothPredictor * predictor = NULL;
301
- Check_Type(bigram, T_ARRAY);
302
- if (RARRAY_LEN(bigram) != 2)
316
+ Check_Type(context, T_FIXNUM);
317
+ Data_Get_Struct(self, SoothPredictor, predictor);
318
+ uint32_t c_context = NUM2UINT(context);
319
+ SoothStatistic * statistics = sooth_predictor_distribution(predictor, c_context);
320
+ if (statistics == NULL)
303
321
  {
304
- rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
322
+ return Qnil;
305
323
  }
306
- Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
307
- Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
324
+ uint32_t size = sooth_predictor_size(predictor, c_context);
325
+ double count = (double)sooth_predictor_count(predictor, c_context);
326
+ VALUE r_array = rb_ary_new2(size);
327
+ for (uint32_t i = 0; i < size; ++i)
328
+ {
329
+ SoothStatistic statistic = statistics[i];
330
+ VALUE pair = rb_ary_new2(2);
331
+ rb_ary_store(pair, 0, UINT2NUM(statistic.event));
332
+ rb_ary_store(pair, 1, DBL2NUM((double)statistic.count/count));
333
+ rb_ary_store(r_array, i, pair);
334
+ }
335
+ return r_array;
336
+ }
337
+
338
+ //------------------------------------------------------------------------------
339
+
340
+ VALUE
341
+ method_sooth_native_uncertainty(VALUE self, VALUE context)
342
+ {
343
+ SoothPredictor * predictor = NULL;
344
+ Check_Type(context, T_FIXNUM);
308
345
  Data_Get_Struct(self, SoothPredictor, predictor);
309
- uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
310
- double uncertainty = sooth_predictor_uncertainty(predictor, c_bigram);
346
+ double uncertainty = sooth_predictor_uncertainty(predictor, NUM2UINT(context));
311
347
  if (uncertainty < 0)
312
348
  {
313
349
  return Qnil;
@@ -318,20 +354,13 @@ method_sooth_native_uncertainty(VALUE self, VALUE bigram)
318
354
  //------------------------------------------------------------------------------
319
355
 
320
356
  VALUE
321
- method_sooth_native_surprise(VALUE self, VALUE bigram, VALUE symbol)
357
+ method_sooth_native_surprise(VALUE self, VALUE context, VALUE event)
322
358
  {
323
359
  SoothPredictor * predictor = NULL;
324
- Check_Type(symbol, T_FIXNUM);
325
- Check_Type(bigram, T_ARRAY);
326
- if (RARRAY_LEN(bigram) != 2)
327
- {
328
- rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
329
- }
330
- Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
331
- Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
360
+ Check_Type(context, T_FIXNUM);
361
+ Check_Type(event, T_FIXNUM);
332
362
  Data_Get_Struct(self, SoothPredictor, predictor);
333
- uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
334
- double surprise = sooth_predictor_surprise(predictor, c_bigram, NUM2UINT(symbol));
363
+ double surprise = sooth_predictor_surprise(predictor, NUM2UINT(context), NUM2UINT(event));
335
364
  if (surprise < 0)
336
365
  {
337
366
  return Qnil;