sooth 1.0.3 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 85592b0c52f53800465928af00ab97da342b71d4
4
- data.tar.gz: dd9c4276380250e4cc0219264e67d138dcc265ee
3
+ metadata.gz: 9bac8ab9338973373ec7328dd58dcae462e86485
4
+ data.tar.gz: ea7f2ecb422e0301e36d5f6826aa8ccdbaefb619
5
5
  SHA512:
6
- metadata.gz: d8969158143b9e4af257c881abccfa0d0624b25e1232ad4fa6f2cde5ba4e5ca154afbbf52fa4f97eca274f6bf3d7d64389d0637e2f05eddb82e32ebcf19bfed0
7
- data.tar.gz: 1f9a33941477759b9bbb349d203b1a40d5bdfa18f627fe75e175beba836111be8d28ca405e96ec018b40c43078f2607b6f7687eb60519945f2cb676cb7ad271d
6
+ metadata.gz: bf39ddf48fba680f4e6f84d42d5f9dc2f59999c83aabafc9f6ba8761f77d24835cfd5b6a6e9f349d61c0e44caec9b7ce2276ba181e4cb13c96980f177481e5ee
7
+ data.tar.gz: 4ab08bdf68a0f38b676c033f54949bf6a6854f2ade41a2f32b2a62b8a3cff49dcd58b3d89b3f6afa60cef5e6c3863fd22d4a6afd6223285844e72bef4b63d24c
data/CHANGELOG.md CHANGED
@@ -1,35 +1,30 @@
1
1
  # Change Log
2
2
 
3
- ## [v1.0.2](https://github.com/jasonhutchens/sooth/tree/v1.0.2) (2015-05-26)
3
+ ## [v1.0.3](https://github.com/jasonhutchens/sooth/tree/v1.0.3) (2015-10-03)
4
+ [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v1.0.2...v1.0.3)
4
5
 
6
+ ## [v1.0.2](https://github.com/jasonhutchens/sooth/tree/v1.0.2) (2015-05-26)
5
7
  [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v1.0.1...v1.0.2)
6
8
 
7
9
  ## [v1.0.1](https://github.com/jasonhutchens/sooth/tree/v1.0.1) (2015-05-26)
8
-
9
10
  [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v1.0.0...v1.0.1)
10
11
 
11
12
  ## [v1.0.0](https://github.com/jasonhutchens/sooth/tree/v1.0.0) (2014-12-17)
12
-
13
13
  [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.5.0...v1.0.0)
14
14
 
15
15
  ## [v0.5.0](https://github.com/jasonhutchens/sooth/tree/v0.5.0) (2014-12-16)
16
-
17
16
  [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.4.0...v0.5.0)
18
17
 
19
18
  ## [v0.4.0](https://github.com/jasonhutchens/sooth/tree/v0.4.0) (2014-12-16)
20
-
21
19
  [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.3.0...v0.4.0)
22
20
 
23
21
  ## [v0.3.0](https://github.com/jasonhutchens/sooth/tree/v0.3.0) (2014-12-11)
24
-
25
22
  [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.2.0...v0.3.0)
26
23
 
27
24
  ## [v0.2.0](https://github.com/jasonhutchens/sooth/tree/v0.2.0) (2014-12-10)
28
-
29
25
  [Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.1.0...v0.2.0)
30
26
 
31
27
  ## [v0.1.0](https://github.com/jasonhutchens/sooth/tree/v0.1.0) (2014-12-10)
32
28
 
33
29
 
34
-
35
30
  \* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)*
data/Gemfile CHANGED
@@ -7,7 +7,7 @@ group :development do
7
7
  gem 'bundler', '~> 1.7'
8
8
  gem 'jeweler', '~> 2.0'
9
9
  gem 'simplecov', '~> 0.9'
10
- gem 'byebug', '~> 6.0'
10
+ gem 'byebug', '~> 8.2'
11
11
  gem 'rake-compiler', '~> 0.9'
12
12
  gem 'github_changelog_generator', '~> 1.4'
13
13
  end
data/Gemfile.lock CHANGED
@@ -1,31 +1,40 @@
1
1
  GEM
2
2
  remote: https://rubygems.org/
3
3
  specs:
4
- addressable (2.3.8)
4
+ addressable (2.4.0)
5
+ ast (2.2.0)
5
6
  builder (3.2.2)
6
- byebug (6.0.2)
7
+ byebug (8.2.2)
8
+ childprocess (0.5.9)
9
+ ffi (~> 1.0, >= 1.0.11)
7
10
  colorize (0.7.7)
8
11
  descendants_tracker (0.0.4)
9
12
  thread_safe (~> 0.3, >= 0.3.1)
10
13
  diff-lcs (1.2.5)
11
14
  docile (1.1.5)
12
- faraday (0.9.1)
15
+ faraday (0.9.2)
13
16
  multipart-post (>= 1.2, < 3)
14
- git (1.2.9.1)
15
- github_api (0.12.4)
16
- addressable (~> 2.3)
17
+ ffi (1.9.10)
18
+ git (1.3.0)
19
+ github_api (0.13.1)
20
+ addressable (~> 2.4.0)
17
21
  descendants_tracker (~> 0.0.4)
18
22
  faraday (~> 0.8, < 0.10)
19
23
  hashie (>= 3.4)
20
24
  multi_json (>= 1.7.5, < 2.0)
21
- nokogiri (~> 1.6.6)
22
25
  oauth2
23
- github_changelog_generator (1.9.0)
26
+ github_changelog_generator (1.12.0)
27
+ bundler (>= 1.7)
24
28
  colorize (~> 0.7)
25
29
  github_api (~> 0.12)
26
- hashie (3.4.2)
27
- highline (1.7.7)
28
- jeweler (2.0.1)
30
+ overcommit (>= 0.31)
31
+ rake (>= 10.0)
32
+ rspec (>= 3.2)
33
+ rubocop (>= 0.31)
34
+ hashie (3.4.3)
35
+ highline (1.7.8)
36
+ iniparse (1.4.2)
37
+ jeweler (2.1.1)
29
38
  builder
30
39
  bundler (>= 1.0)
31
40
  git (>= 1.2.5)
@@ -34,44 +43,62 @@ GEM
34
43
  nokogiri (>= 1.5.10)
35
44
  rake
36
45
  rdoc
46
+ semver
37
47
  json (1.8.3)
38
48
  jwt (1.5.1)
39
- mini_portile (0.6.2)
49
+ mini_portile2 (2.0.0)
40
50
  multi_json (1.11.2)
41
51
  multi_xml (0.5.5)
42
52
  multipart-post (2.0.0)
43
- nokogiri (1.6.6.2)
44
- mini_portile (~> 0.6.0)
45
- oauth2 (1.0.0)
53
+ nokogiri (1.6.7.2)
54
+ mini_portile2 (~> 2.0.0.rc2)
55
+ oauth2 (1.1.0)
46
56
  faraday (>= 0.8, < 0.10)
47
- jwt (~> 1.0)
57
+ jwt (~> 1.0, < 1.5.2)
48
58
  multi_json (~> 1.3)
49
59
  multi_xml (~> 0.5)
50
- rack (~> 1.2)
60
+ rack (>= 1.2, < 3)
61
+ overcommit (0.33.0)
62
+ childprocess (~> 0.5.8)
63
+ iniparse (~> 1.4)
64
+ parser (2.3.0.7)
65
+ ast (~> 2.2)
66
+ powerpack (0.1.1)
51
67
  rack (1.6.4)
52
- rake (10.4.2)
53
- rake-compiler (0.9.5)
68
+ rainbow (2.1.0)
69
+ rake (11.1.2)
70
+ rake-compiler (0.9.7)
54
71
  rake
55
- rdoc (4.2.0)
56
- rspec (3.3.0)
57
- rspec-core (~> 3.3.0)
58
- rspec-expectations (~> 3.3.0)
59
- rspec-mocks (~> 3.3.0)
60
- rspec-core (3.3.2)
61
- rspec-support (~> 3.3.0)
62
- rspec-expectations (3.3.1)
72
+ rdoc (4.2.2)
73
+ json (~> 1.4)
74
+ rspec (3.4.0)
75
+ rspec-core (~> 3.4.0)
76
+ rspec-expectations (~> 3.4.0)
77
+ rspec-mocks (~> 3.4.0)
78
+ rspec-core (3.4.4)
79
+ rspec-support (~> 3.4.0)
80
+ rspec-expectations (3.4.0)
63
81
  diff-lcs (>= 1.2.0, < 2.0)
64
- rspec-support (~> 3.3.0)
65
- rspec-mocks (3.3.2)
82
+ rspec-support (~> 3.4.0)
83
+ rspec-mocks (3.4.1)
66
84
  diff-lcs (>= 1.2.0, < 2.0)
67
- rspec-support (~> 3.3.0)
68
- rspec-support (3.3.0)
69
- simplecov (0.10.0)
85
+ rspec-support (~> 3.4.0)
86
+ rspec-support (3.4.1)
87
+ rubocop (0.39.0)
88
+ parser (>= 2.3.0.7, < 3.0)
89
+ powerpack (~> 0.1)
90
+ rainbow (>= 1.99.1, < 3.0)
91
+ ruby-progressbar (~> 1.7)
92
+ unicode-display_width (~> 1.0, >= 1.0.1)
93
+ ruby-progressbar (1.7.5)
94
+ semver (1.0.1)
95
+ simplecov (0.11.2)
70
96
  docile (~> 1.1.0)
71
97
  json (~> 1.8)
72
98
  simplecov-html (~> 0.10.0)
73
99
  simplecov-html (0.10.0)
74
100
  thread_safe (0.3.5)
101
+ unicode-display_width (1.0.3)
75
102
  yard (0.8.7.6)
76
103
 
77
104
  PLATFORMS
@@ -79,7 +106,7 @@ PLATFORMS
79
106
 
80
107
  DEPENDENCIES
81
108
  bundler (~> 1.7)
82
- byebug (~> 6.0)
109
+ byebug (~> 8.2)
83
110
  github_changelog_generator (~> 1.4)
84
111
  jeweler (~> 2.0)
85
112
  rake-compiler (~> 0.9)
@@ -89,4 +116,4 @@ DEPENDENCIES
89
116
  yard (~> 0.8)
90
117
 
91
118
  BUNDLED WITH
92
- 1.10.5
119
+ 1.11.2
data/README.md CHANGED
@@ -5,15 +5,18 @@
5
5
  Sooth
6
6
  =====
7
7
 
8
- Sooth is a simple stochastic predictive model. It is used by
9
- [MegaHAL](https://github.com/jasonhutchens/megahal), a learning chatterbot.
8
+ Sooth is a minimal stochastic predictive model. It is used by [MegaHAL](https://github.com/jasonhutchens/megahal), a learning chatterbot, and by [Typing Simulator](https://github.com/jasonhutchens/typing_simulator), a program that pretends to type text files like a human being.
10
9
 
11
10
  Getting Started
12
11
  ---------------
13
12
 
14
- Apart from the specs, look at the [API docs](http://www.rubydoc.info/gems/sooth/) to get up to speed.
13
+ The basic philisophy is to keep things simple. Sooth can make _predictions_ about _events_ that occur within some _context_.
14
+
15
+ A _context_ is just a number; it's up to you to make it significant. It might represent a word, or an array of words, or something else entirely. Sooth doesn't care. Likewise, an _event_ is a number, but doesn't need to represent the same kind of thing as a _context_. It's perfectly fine for a _context_ to be an array of words and an _event_ to be a single word, which is what MegaHAL does. Or a _context_ could be an array of characters and an _event_ could be a time in milliseconds, which is what TypingSimulator does. Or they could be something else entirely...
16
+
17
+ Look at the [API docs](http://www.rubydoc.info/gems/sooth/Sooth/Predictor) to get up to speed, and at the specs for example usage.
15
18
 
16
19
  Copyright
17
20
  ---------
18
21
 
19
- Copyright (c) 2015 Jason Hutchens. See UNLICENSE for further details.
22
+ Copyright (c) 2016 Jason Hutchens. See UNLICENSE for further details.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.3
1
+ 2.0.0
@@ -1,5 +1,5 @@
1
1
  require 'mkmf'
2
2
 
3
- $CFLAGS << ' -Wall -O3 -std=gnu99'
3
+ $CFLAGS << ' -Wall -Ofast -std=gnu99'
4
4
 
5
5
  create_makefile('sooth_native')
@@ -14,11 +14,10 @@ void method_sooth_native_deallocate(void * predictor);
14
14
 
15
15
  /* @!parse [ruby]
16
16
  * module Sooth
17
- * # A very simple stochastic predictor. Implemented in C for efficiency.
18
- * # The idea here is to build up more complicated learning algorithms using
19
- * # a trivial Markovian predictor.
17
+ * # A minimal stochastic predictive model, implemented in C for efficiency.
18
+ * # No assumptions about PRNG or real-world significance of context/event.
20
19
  * class Predictor
21
- * def initialize(error_symbol)
20
+ * def initialize(error_event)
22
21
  * end
23
22
  * def clear
24
23
  * end
@@ -26,19 +25,25 @@ void method_sooth_native_deallocate(void * predictor);
26
25
  * end
27
26
  * def save(filename)
28
27
  * end
29
- * def observe(bigram, symbol)
28
+ * def size(context)
30
29
  * # (native code)
31
30
  * end
32
- * def count(bigram)
31
+ * def count(context)
33
32
  * # (native code)
34
33
  * end
35
- * def select(bigram, limit)
34
+ * def observe(context, event)
36
35
  * # (native code)
37
36
  * end
38
- * def uncertainty(bigram)
37
+ * def select(context, limit)
39
38
  * # (native code)
40
39
  * end
41
- * def surprise(bigram, symbol)
40
+ * def distribution(context)
41
+ * # (native code)
42
+ * end
43
+ * def uncertainty(context)
44
+ * # (native code)
45
+ * end
46
+ * def surprise(context, evemt)
42
47
  * # (native code)
43
48
  * end
44
49
  * end
@@ -46,13 +51,13 @@ void method_sooth_native_deallocate(void * predictor);
46
51
  *
47
52
  * Returns a new Sooth::Predictor instance.
48
53
  *
49
- * @param [Fixnum] error_symbol The symbol to be returned by #select when no
50
- * prediction can be made.
54
+ * @param [Fixnum] error_event The event to be returned by #select when no
55
+ * observations have been made for the context.
51
56
  */
52
- VALUE method_sooth_native_initialize(VALUE self, VALUE error_symbol);
57
+ VALUE method_sooth_native_initialize(VALUE self, VALUE error_event);
53
58
 
54
59
  /*
55
- * Clear the predictor to a fresh slate.
60
+ * Clear the predictor to a blank slate.
56
61
  */
57
62
  VALUE method_sooth_native_clear(VALUE self);
58
63
 
@@ -65,76 +70,92 @@ VALUE method_sooth_native_clear(VALUE self);
65
70
  VALUE method_sooth_native_load(VALUE self, VALUE filename);
66
71
 
67
72
  /*
68
- * Save the predictor to a file that can be loaded or merged later.
73
+ * Save the predictor to a file that can be loaded later.
69
74
  *
70
75
  * @param [String] filename The path of the file to be merge.
71
76
  */
72
77
  VALUE method_sooth_native_save(VALUE self, VALUE filename);
73
78
 
74
79
  /*
75
- * Add an observation of the given symbol in the context of the bigram.
80
+ * Return the number of different events that have been observed within the
81
+ * given context.
76
82
  *
77
- * @param [Array] bigram A pair of symbols that provide context, allowing the
78
- * predictor to maintain observation statistics for
79
- * different contexts.
80
- * @param [Fixnum] symbol The symbol that has been observed.
81
- * @return [Fixnum] A count of the number of times the symbol has been
82
- * observed in the context of the bigram.
83
+ * @param [Fixnum] context A number that provides a context for observations.
84
+ * @return [Fixnum] The number of distinct events that have been observed
85
+ * within the given context. This is guaranteed to be equal
86
+ * to the length of the #distribution for the context.
83
87
  */
84
- VALUE method_sooth_native_observe(VALUE self, VALUE bigram, VALUE symbol);
88
+
89
+ VALUE method_sooth_native_size(VALUE self, VALUE context);
85
90
 
86
91
  /*
87
- * Return a count of the number of times the bigram has been observed.
92
+ * Return the number of times the context has been observed.
88
93
  *
89
- * @param [Array] bigram A pair of symbols.
90
- * @return [Fixnum] A count of the number of times the bigram has been
94
+ * @param [Fixnum] context A number that provides a context for observations.
95
+ * @return [Fixnum] A count of the number of times the context has been
91
96
  * observed. This is guaranteed to be equal to the sum
92
- * of the counts of observations of all symbols in the
93
- * context of the bigram.
97
+ * of the counts of observations of all events observed in
98
+ * the context.
99
+ */
100
+ VALUE method_sooth_native_count(VALUE self, VALUE context);
101
+
102
+ /*
103
+ * Register an observation of the given event within the given context.
104
+ *
105
+ * @param [Fixnum] context A number that provides a context for the event,
106
+ * allowing the predictor to maintain observation
107
+ * statistics for different contexts.
108
+ * @param [Fixnum] event A number representing the observed event.
109
+ * @return [Fixnum] A count of the number of times the event has been
110
+ * observed in the given context.
111
+ */
112
+ VALUE method_sooth_native_observe(VALUE self, VALUE context, VALUE event);
113
+
114
+ /*
115
+ * Return an event that may occur in the given context, based on the limit,
116
+ * which should be between 1 and #count. The event is selected by iterating
117
+ * through all observed events for the context, subtracting the observation
118
+ * count of each event from the limit until it is zero or less.
119
+ *
120
+ * @param [Fixnum] context A number that provides a context for observations.
121
+ * @param [Fixnum] limit The total number of event observations to be
122
+ * analysed before returning a event.
123
+ * @return [Fixnum] An event that has been previously observed in the given
124
+ * context, or the error_event if the #count of the context
125
+ * is zero, or if limit exceeds the #count of the context
94
126
  */
95
- VALUE method_sooth_native_count(VALUE self, VALUE bigram);
127
+ VALUE method_sooth_native_select(VALUE self, VALUE context, VALUE limit);
96
128
 
97
129
  /*
98
- * Return a symbol that may occur in the context of the bigram. The
99
- * limit is used to select a symbol. This is done by iterating through
100
- * all of the symbols that have been observed in the context of the
101
- * bigram, subtracting the observation count of each symbol from the
102
- * supplied limit. For this reason, limit should be between 1 and the
103
- * observation count of the bigram itself, as returned by #count.
130
+ * Return an Enumerator that yields each observed event within the context
131
+ * together with its probability.
104
132
  *
105
- * @param [Array] bigram A pair of symbols.
106
- * @param [Fixnum] limit The total numbe of symbol observations to be
107
- * analysed before returning a symbol.
108
- * @return [Fixnum] A symbol that has been observed previously in the
109
- * context of the bigram, or the error_symbol if no
110
- * such symbol exists, or if the supplied limit was
111
- * too large.
133
+ * @param [Fixnum] context A number that provides a context for observations.
134
+ * @return [Array] A list of event-probability pairs.
112
135
  */
113
- VALUE method_sooth_native_select(VALUE self, VALUE bigram, VALUE limit);
136
+ VALUE method_sooth_native_distribution(VALUE self, VALUE context);
114
137
 
115
138
  /*
116
- * Return a number indicating how uncertain the predictor is about which symbol
117
- * is likely to be observed after the given bigram. Note that nil will be
118
- * returned if the bigram has never been observed.
139
+ * Return a number indicating how uncertain the predictor is about which event
140
+ * is likely to be observed after the given context. Note that nil will be
141
+ * returned if the context has never been observed.
119
142
  *
120
- * @param [Array] bigram A pair of symbols.
121
- * @return [Float] The uncertainty, which is calculated to be the shannon entropy
122
- * of the probability distribution over the alphabet of symbols
123
- * in the context of the bigram.
143
+ * @param [Fixnum] context A number that provides a context for observations.
144
+ * @return [Float] The uncertainty, which is calculated to be the Shannon entropy
145
+ * of the #distribution over the context.
124
146
  */
125
- VALUE method_sooth_native_uncertainty(VALUE self, VALUE bigram);
147
+ VALUE method_sooth_native_uncertainty(VALUE self, VALUE context);
126
148
 
127
149
  /*
128
150
  * Return a number indicating the surprise received by the predictor when it
129
- * observed the given symbol after the given bigram. Note that nil will be
130
- * returned if the symbol has never been observed after the bigram.
151
+ * observed the given event within the given context. Note that nil will be
152
+ * returned if the event has never been observed within the context.
131
153
  *
132
- * @param [Array] bigram A pair of symbols.
133
- * @param [Fixnum] symbol The symbol that has been observed.
134
- * @return [Float] The surprise, which is calculated to be the shannon pointwise
135
- * mutual information of the symbol according to the probability
136
- * distribution over the alphabet of symbols in the context of
137
- * the bigram.
154
+ * @param [Fixnum] context A number that provides a context for observations.
155
+ * @param [Fixnum] event A number representing the observed event.
156
+ * @return [Float] The surprise, which is calculated to be the Shannon pointwise
157
+ * mutual information of the event according to the
158
+ * #distribution over the context.
138
159
  */
139
160
  VALUE method_sooth_native_surprise(VALUE self, VALUE bigram, VALUE limit);
140
161
 
@@ -152,9 +173,12 @@ void Init_sooth_native()
152
173
  rb_define_method(SoothNative, "load", method_sooth_native_load, 1);
153
174
  rb_define_method(SoothNative, "save", method_sooth_native_save, 1);
154
175
 
155
- rb_define_method(SoothNative, "observe", method_sooth_native_observe, 2);
176
+ rb_define_method(SoothNative, "size", method_sooth_native_size, 1);
156
177
  rb_define_method(SoothNative, "count", method_sooth_native_count, 1);
178
+ rb_define_method(SoothNative, "observe", method_sooth_native_observe, 2);
157
179
  rb_define_method(SoothNative, "select", method_sooth_native_select, 2);
180
+
181
+ rb_define_method(SoothNative, "distribution", method_sooth_native_distribution, 1);
158
182
  rb_define_method(SoothNative, "uncertainty", method_sooth_native_uncertainty, 1);
159
183
  rb_define_method(SoothNative, "surprise", method_sooth_native_surprise, 2);
160
184
  }
@@ -183,12 +207,12 @@ method_sooth_native_deallocate(void * predictor)
183
207
  //------------------------------------------------------------------------------
184
208
 
185
209
  VALUE
186
- method_sooth_native_initialize(VALUE self, VALUE error_symbol)
210
+ method_sooth_native_initialize(VALUE self, VALUE error_event)
187
211
  {
188
212
  SoothPredictor * predictor = NULL;
189
- Check_Type(error_symbol, T_FIXNUM);
213
+ Check_Type(error_event, T_FIXNUM);
190
214
  Data_Get_Struct(self, SoothPredictor, predictor);
191
- predictor->error_symbol = NUM2UINT(error_symbol);
215
+ predictor->error_event = NUM2UINT(error_event);
192
216
  return self;
193
217
  }
194
218
 
@@ -236,78 +260,90 @@ method_sooth_native_save(VALUE self, VALUE filename)
236
260
  //------------------------------------------------------------------------------
237
261
 
238
262
  VALUE
239
- method_sooth_native_observe(VALUE self, VALUE bigram, VALUE symbol)
263
+ method_sooth_native_size(VALUE self, VALUE context)
240
264
  {
241
265
  SoothPredictor * predictor = NULL;
242
- Check_Type(symbol, T_FIXNUM);
243
- Check_Type(bigram, T_ARRAY);
244
- if (RARRAY_LEN(bigram) != 2)
245
- {
246
- rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
247
- }
248
- Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
249
- Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
266
+ Check_Type(context, T_FIXNUM);
250
267
  Data_Get_Struct(self, SoothPredictor, predictor);
251
- uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
252
- uint32_t count = sooth_predictor_observe(predictor, c_bigram, NUM2UINT(symbol));
268
+ uint32_t size = sooth_predictor_size(predictor, NUM2UINT(context));
269
+ return UINT2NUM(size);
270
+ }
271
+
272
+ //------------------------------------------------------------------------------
273
+
274
+ VALUE
275
+ method_sooth_native_count(VALUE self, VALUE context)
276
+ {
277
+ SoothPredictor * predictor = NULL;
278
+ Check_Type(context, T_FIXNUM);
279
+ Data_Get_Struct(self, SoothPredictor, predictor);
280
+ uint32_t count = sooth_predictor_count(predictor, NUM2UINT(context));
253
281
  return UINT2NUM(count);
254
282
  }
255
283
 
256
284
  //------------------------------------------------------------------------------
257
285
 
258
286
  VALUE
259
- method_sooth_native_count(VALUE self, VALUE bigram)
287
+ method_sooth_native_observe(VALUE self, VALUE context, VALUE event)
260
288
  {
261
289
  SoothPredictor * predictor = NULL;
262
- Check_Type(bigram, T_ARRAY);
263
- if (RARRAY_LEN(bigram) != 2)
264
- {
265
- rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
266
- }
267
- Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
268
- Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
290
+ Check_Type(context, T_FIXNUM);
291
+ Check_Type(event, T_FIXNUM);
269
292
  Data_Get_Struct(self, SoothPredictor, predictor);
270
- uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
271
- uint32_t count = sooth_predictor_count(predictor, c_bigram);
293
+ uint32_t count = sooth_predictor_observe(predictor, NUM2UINT(context), NUM2UINT(event));
272
294
  return UINT2NUM(count);
273
295
  }
274
296
 
275
297
  //------------------------------------------------------------------------------
276
298
 
277
299
  VALUE
278
- method_sooth_native_select(VALUE self, VALUE bigram, VALUE limit)
300
+ method_sooth_native_select(VALUE self, VALUE context, VALUE limit)
279
301
  {
280
302
  SoothPredictor * predictor = NULL;
303
+ Check_Type(context, T_FIXNUM);
281
304
  Check_Type(limit, T_FIXNUM);
282
- Check_Type(bigram, T_ARRAY);
283
- if (RARRAY_LEN(bigram) != 2)
284
- {
285
- rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
286
- }
287
- Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
288
- Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
289
305
  Data_Get_Struct(self, SoothPredictor, predictor);
290
- uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
291
- uint32_t symbol = sooth_predictor_select(predictor, c_bigram, NUM2UINT(limit));
292
- return UINT2NUM(symbol);
306
+ uint32_t event = sooth_predictor_select(predictor, NUM2UINT(context), NUM2UINT(limit));
307
+ return UINT2NUM(event);
293
308
  }
294
309
 
295
310
  //------------------------------------------------------------------------------
296
311
 
297
312
  VALUE
298
- method_sooth_native_uncertainty(VALUE self, VALUE bigram)
313
+ method_sooth_native_distribution(VALUE self, VALUE context)
299
314
  {
300
315
  SoothPredictor * predictor = NULL;
301
- Check_Type(bigram, T_ARRAY);
302
- if (RARRAY_LEN(bigram) != 2)
316
+ Check_Type(context, T_FIXNUM);
317
+ Data_Get_Struct(self, SoothPredictor, predictor);
318
+ uint32_t c_context = NUM2UINT(context);
319
+ SoothStatistic * statistics = sooth_predictor_distribution(predictor, c_context);
320
+ if (statistics == NULL)
303
321
  {
304
- rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
322
+ return Qnil;
305
323
  }
306
- Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
307
- Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
324
+ uint32_t size = sooth_predictor_size(predictor, c_context);
325
+ double count = (double)sooth_predictor_count(predictor, c_context);
326
+ VALUE r_array = rb_ary_new2(size);
327
+ for (uint32_t i = 0; i < size; ++i)
328
+ {
329
+ SoothStatistic statistic = statistics[i];
330
+ VALUE pair = rb_ary_new2(2);
331
+ rb_ary_store(pair, 0, UINT2NUM(statistic.event));
332
+ rb_ary_store(pair, 1, DBL2NUM((double)statistic.count/count));
333
+ rb_ary_store(r_array, i, pair);
334
+ }
335
+ return r_array;
336
+ }
337
+
338
+ //------------------------------------------------------------------------------
339
+
340
+ VALUE
341
+ method_sooth_native_uncertainty(VALUE self, VALUE context)
342
+ {
343
+ SoothPredictor * predictor = NULL;
344
+ Check_Type(context, T_FIXNUM);
308
345
  Data_Get_Struct(self, SoothPredictor, predictor);
309
- uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
310
- double uncertainty = sooth_predictor_uncertainty(predictor, c_bigram);
346
+ double uncertainty = sooth_predictor_uncertainty(predictor, NUM2UINT(context));
311
347
  if (uncertainty < 0)
312
348
  {
313
349
  return Qnil;
@@ -318,20 +354,13 @@ method_sooth_native_uncertainty(VALUE self, VALUE bigram)
318
354
  //------------------------------------------------------------------------------
319
355
 
320
356
  VALUE
321
- method_sooth_native_surprise(VALUE self, VALUE bigram, VALUE symbol)
357
+ method_sooth_native_surprise(VALUE self, VALUE context, VALUE event)
322
358
  {
323
359
  SoothPredictor * predictor = NULL;
324
- Check_Type(symbol, T_FIXNUM);
325
- Check_Type(bigram, T_ARRAY);
326
- if (RARRAY_LEN(bigram) != 2)
327
- {
328
- rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
329
- }
330
- Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
331
- Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
360
+ Check_Type(context, T_FIXNUM);
361
+ Check_Type(event, T_FIXNUM);
332
362
  Data_Get_Struct(self, SoothPredictor, predictor);
333
- uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
334
- double surprise = sooth_predictor_surprise(predictor, c_bigram, NUM2UINT(symbol));
363
+ double surprise = sooth_predictor_surprise(predictor, NUM2UINT(context), NUM2UINT(event));
335
364
  if (surprise < 0)
336
365
  {
337
366
  return Qnil;