sooth 1.0.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -8
- data/Gemfile +1 -1
- data/Gemfile.lock +61 -34
- data/README.md +7 -4
- data/VERSION +1 -1
- data/ext/sooth_native/extconf.rb +1 -1
- data/ext/sooth_native/native.c +142 -113
- data/ext/sooth_native/sooth_context.h +1 -3
- data/ext/sooth_native/sooth_predictor.c +82 -62
- data/ext/sooth_native/sooth_predictor.h +8 -6
- data/ext/sooth_native/sooth_statistic.h +1 -3
- data/sooth.gemspec +7 -8
- data/spec/memory_spec.rb +24 -24
- data/spec/predictor_spec.rb +153 -115
- metadata +5 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9bac8ab9338973373ec7328dd58dcae462e86485
|
4
|
+
data.tar.gz: ea7f2ecb422e0301e36d5f6826aa8ccdbaefb619
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf39ddf48fba680f4e6f84d42d5f9dc2f59999c83aabafc9f6ba8761f77d24835cfd5b6a6e9f349d61c0e44caec9b7ce2276ba181e4cb13c96980f177481e5ee
|
7
|
+
data.tar.gz: 4ab08bdf68a0f38b676c033f54949bf6a6854f2ade41a2f32b2a62b8a3cff49dcd58b3d89b3f6afa60cef5e6c3863fd22d4a6afd6223285844e72bef4b63d24c
|
data/CHANGELOG.md
CHANGED
@@ -1,35 +1,30 @@
|
|
1
1
|
# Change Log
|
2
2
|
|
3
|
-
## [v1.0.
|
3
|
+
## [v1.0.3](https://github.com/jasonhutchens/sooth/tree/v1.0.3) (2015-10-03)
|
4
|
+
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v1.0.2...v1.0.3)
|
4
5
|
|
6
|
+
## [v1.0.2](https://github.com/jasonhutchens/sooth/tree/v1.0.2) (2015-05-26)
|
5
7
|
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v1.0.1...v1.0.2)
|
6
8
|
|
7
9
|
## [v1.0.1](https://github.com/jasonhutchens/sooth/tree/v1.0.1) (2015-05-26)
|
8
|
-
|
9
10
|
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v1.0.0...v1.0.1)
|
10
11
|
|
11
12
|
## [v1.0.0](https://github.com/jasonhutchens/sooth/tree/v1.0.0) (2014-12-17)
|
12
|
-
|
13
13
|
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.5.0...v1.0.0)
|
14
14
|
|
15
15
|
## [v0.5.0](https://github.com/jasonhutchens/sooth/tree/v0.5.0) (2014-12-16)
|
16
|
-
|
17
16
|
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.4.0...v0.5.0)
|
18
17
|
|
19
18
|
## [v0.4.0](https://github.com/jasonhutchens/sooth/tree/v0.4.0) (2014-12-16)
|
20
|
-
|
21
19
|
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.3.0...v0.4.0)
|
22
20
|
|
23
21
|
## [v0.3.0](https://github.com/jasonhutchens/sooth/tree/v0.3.0) (2014-12-11)
|
24
|
-
|
25
22
|
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.2.0...v0.3.0)
|
26
23
|
|
27
24
|
## [v0.2.0](https://github.com/jasonhutchens/sooth/tree/v0.2.0) (2014-12-10)
|
28
|
-
|
29
25
|
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.1.0...v0.2.0)
|
30
26
|
|
31
27
|
## [v0.1.0](https://github.com/jasonhutchens/sooth/tree/v0.1.0) (2014-12-10)
|
32
28
|
|
33
29
|
|
34
|
-
|
35
30
|
\* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)*
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,31 +1,40 @@
|
|
1
1
|
GEM
|
2
2
|
remote: https://rubygems.org/
|
3
3
|
specs:
|
4
|
-
addressable (2.
|
4
|
+
addressable (2.4.0)
|
5
|
+
ast (2.2.0)
|
5
6
|
builder (3.2.2)
|
6
|
-
byebug (
|
7
|
+
byebug (8.2.2)
|
8
|
+
childprocess (0.5.9)
|
9
|
+
ffi (~> 1.0, >= 1.0.11)
|
7
10
|
colorize (0.7.7)
|
8
11
|
descendants_tracker (0.0.4)
|
9
12
|
thread_safe (~> 0.3, >= 0.3.1)
|
10
13
|
diff-lcs (1.2.5)
|
11
14
|
docile (1.1.5)
|
12
|
-
faraday (0.9.
|
15
|
+
faraday (0.9.2)
|
13
16
|
multipart-post (>= 1.2, < 3)
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
+
ffi (1.9.10)
|
18
|
+
git (1.3.0)
|
19
|
+
github_api (0.13.1)
|
20
|
+
addressable (~> 2.4.0)
|
17
21
|
descendants_tracker (~> 0.0.4)
|
18
22
|
faraday (~> 0.8, < 0.10)
|
19
23
|
hashie (>= 3.4)
|
20
24
|
multi_json (>= 1.7.5, < 2.0)
|
21
|
-
nokogiri (~> 1.6.6)
|
22
25
|
oauth2
|
23
|
-
github_changelog_generator (1.
|
26
|
+
github_changelog_generator (1.12.0)
|
27
|
+
bundler (>= 1.7)
|
24
28
|
colorize (~> 0.7)
|
25
29
|
github_api (~> 0.12)
|
26
|
-
|
27
|
-
|
28
|
-
|
30
|
+
overcommit (>= 0.31)
|
31
|
+
rake (>= 10.0)
|
32
|
+
rspec (>= 3.2)
|
33
|
+
rubocop (>= 0.31)
|
34
|
+
hashie (3.4.3)
|
35
|
+
highline (1.7.8)
|
36
|
+
iniparse (1.4.2)
|
37
|
+
jeweler (2.1.1)
|
29
38
|
builder
|
30
39
|
bundler (>= 1.0)
|
31
40
|
git (>= 1.2.5)
|
@@ -34,44 +43,62 @@ GEM
|
|
34
43
|
nokogiri (>= 1.5.10)
|
35
44
|
rake
|
36
45
|
rdoc
|
46
|
+
semver
|
37
47
|
json (1.8.3)
|
38
48
|
jwt (1.5.1)
|
39
|
-
|
49
|
+
mini_portile2 (2.0.0)
|
40
50
|
multi_json (1.11.2)
|
41
51
|
multi_xml (0.5.5)
|
42
52
|
multipart-post (2.0.0)
|
43
|
-
nokogiri (1.6.
|
44
|
-
|
45
|
-
oauth2 (1.
|
53
|
+
nokogiri (1.6.7.2)
|
54
|
+
mini_portile2 (~> 2.0.0.rc2)
|
55
|
+
oauth2 (1.1.0)
|
46
56
|
faraday (>= 0.8, < 0.10)
|
47
|
-
jwt (~> 1.0)
|
57
|
+
jwt (~> 1.0, < 1.5.2)
|
48
58
|
multi_json (~> 1.3)
|
49
59
|
multi_xml (~> 0.5)
|
50
|
-
rack (
|
60
|
+
rack (>= 1.2, < 3)
|
61
|
+
overcommit (0.33.0)
|
62
|
+
childprocess (~> 0.5.8)
|
63
|
+
iniparse (~> 1.4)
|
64
|
+
parser (2.3.0.7)
|
65
|
+
ast (~> 2.2)
|
66
|
+
powerpack (0.1.1)
|
51
67
|
rack (1.6.4)
|
52
|
-
|
53
|
-
rake
|
68
|
+
rainbow (2.1.0)
|
69
|
+
rake (11.1.2)
|
70
|
+
rake-compiler (0.9.7)
|
54
71
|
rake
|
55
|
-
rdoc (4.2.
|
56
|
-
|
57
|
-
|
58
|
-
rspec-
|
59
|
-
rspec-
|
60
|
-
|
61
|
-
|
62
|
-
|
72
|
+
rdoc (4.2.2)
|
73
|
+
json (~> 1.4)
|
74
|
+
rspec (3.4.0)
|
75
|
+
rspec-core (~> 3.4.0)
|
76
|
+
rspec-expectations (~> 3.4.0)
|
77
|
+
rspec-mocks (~> 3.4.0)
|
78
|
+
rspec-core (3.4.4)
|
79
|
+
rspec-support (~> 3.4.0)
|
80
|
+
rspec-expectations (3.4.0)
|
63
81
|
diff-lcs (>= 1.2.0, < 2.0)
|
64
|
-
rspec-support (~> 3.
|
65
|
-
rspec-mocks (3.
|
82
|
+
rspec-support (~> 3.4.0)
|
83
|
+
rspec-mocks (3.4.1)
|
66
84
|
diff-lcs (>= 1.2.0, < 2.0)
|
67
|
-
rspec-support (~> 3.
|
68
|
-
rspec-support (3.
|
69
|
-
|
85
|
+
rspec-support (~> 3.4.0)
|
86
|
+
rspec-support (3.4.1)
|
87
|
+
rubocop (0.39.0)
|
88
|
+
parser (>= 2.3.0.7, < 3.0)
|
89
|
+
powerpack (~> 0.1)
|
90
|
+
rainbow (>= 1.99.1, < 3.0)
|
91
|
+
ruby-progressbar (~> 1.7)
|
92
|
+
unicode-display_width (~> 1.0, >= 1.0.1)
|
93
|
+
ruby-progressbar (1.7.5)
|
94
|
+
semver (1.0.1)
|
95
|
+
simplecov (0.11.2)
|
70
96
|
docile (~> 1.1.0)
|
71
97
|
json (~> 1.8)
|
72
98
|
simplecov-html (~> 0.10.0)
|
73
99
|
simplecov-html (0.10.0)
|
74
100
|
thread_safe (0.3.5)
|
101
|
+
unicode-display_width (1.0.3)
|
75
102
|
yard (0.8.7.6)
|
76
103
|
|
77
104
|
PLATFORMS
|
@@ -79,7 +106,7 @@ PLATFORMS
|
|
79
106
|
|
80
107
|
DEPENDENCIES
|
81
108
|
bundler (~> 1.7)
|
82
|
-
byebug (~>
|
109
|
+
byebug (~> 8.2)
|
83
110
|
github_changelog_generator (~> 1.4)
|
84
111
|
jeweler (~> 2.0)
|
85
112
|
rake-compiler (~> 0.9)
|
@@ -89,4 +116,4 @@ DEPENDENCIES
|
|
89
116
|
yard (~> 0.8)
|
90
117
|
|
91
118
|
BUNDLED WITH
|
92
|
-
1.
|
119
|
+
1.11.2
|
data/README.md
CHANGED
@@ -5,15 +5,18 @@
|
|
5
5
|
Sooth
|
6
6
|
=====
|
7
7
|
|
8
|
-
Sooth is a
|
9
|
-
[MegaHAL](https://github.com/jasonhutchens/megahal), a learning chatterbot.
|
8
|
+
Sooth is a minimal stochastic predictive model. It is used by [MegaHAL](https://github.com/jasonhutchens/megahal), a learning chatterbot, and by [Typing Simulator](https://github.com/jasonhutchens/typing_simulator), a program that pretends to type text files like a human being.
|
10
9
|
|
11
10
|
Getting Started
|
12
11
|
---------------
|
13
12
|
|
14
|
-
|
13
|
+
The basic philisophy is to keep things simple. Sooth can make _predictions_ about _events_ that occur within some _context_.
|
14
|
+
|
15
|
+
A _context_ is just a number; it's up to you to make it significant. It might represent a word, or an array of words, or something else entirely. Sooth doesn't care. Likewise, an _event_ is a number, but doesn't need to represent the same kind of thing as a _context_. It's perfectly fine for a _context_ to be an array of words and an _event_ to be a single word, which is what MegaHAL does. Or a _context_ could be an array of characters and an _event_ could be a time in milliseconds, which is what TypingSimulator does. Or they could be something else entirely...
|
16
|
+
|
17
|
+
Look at the [API docs](http://www.rubydoc.info/gems/sooth/Sooth/Predictor) to get up to speed, and at the specs for example usage.
|
15
18
|
|
16
19
|
Copyright
|
17
20
|
---------
|
18
21
|
|
19
|
-
Copyright (c)
|
22
|
+
Copyright (c) 2016 Jason Hutchens. See UNLICENSE for further details.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.0.0
|
data/ext/sooth_native/extconf.rb
CHANGED
data/ext/sooth_native/native.c
CHANGED
@@ -14,11 +14,10 @@ void method_sooth_native_deallocate(void * predictor);
|
|
14
14
|
|
15
15
|
/* @!parse [ruby]
|
16
16
|
* module Sooth
|
17
|
-
* # A
|
18
|
-
* #
|
19
|
-
* # a trivial Markovian predictor.
|
17
|
+
* # A minimal stochastic predictive model, implemented in C for efficiency.
|
18
|
+
* # No assumptions about PRNG or real-world significance of context/event.
|
20
19
|
* class Predictor
|
21
|
-
* def initialize(
|
20
|
+
* def initialize(error_event)
|
22
21
|
* end
|
23
22
|
* def clear
|
24
23
|
* end
|
@@ -26,19 +25,25 @@ void method_sooth_native_deallocate(void * predictor);
|
|
26
25
|
* end
|
27
26
|
* def save(filename)
|
28
27
|
* end
|
29
|
-
* def
|
28
|
+
* def size(context)
|
30
29
|
* # (native code)
|
31
30
|
* end
|
32
|
-
* def count(
|
31
|
+
* def count(context)
|
33
32
|
* # (native code)
|
34
33
|
* end
|
35
|
-
* def
|
34
|
+
* def observe(context, event)
|
36
35
|
* # (native code)
|
37
36
|
* end
|
38
|
-
* def
|
37
|
+
* def select(context, limit)
|
39
38
|
* # (native code)
|
40
39
|
* end
|
41
|
-
* def
|
40
|
+
* def distribution(context)
|
41
|
+
* # (native code)
|
42
|
+
* end
|
43
|
+
* def uncertainty(context)
|
44
|
+
* # (native code)
|
45
|
+
* end
|
46
|
+
* def surprise(context, evemt)
|
42
47
|
* # (native code)
|
43
48
|
* end
|
44
49
|
* end
|
@@ -46,13 +51,13 @@ void method_sooth_native_deallocate(void * predictor);
|
|
46
51
|
*
|
47
52
|
* Returns a new Sooth::Predictor instance.
|
48
53
|
*
|
49
|
-
* @param [Fixnum]
|
50
|
-
*
|
54
|
+
* @param [Fixnum] error_event The event to be returned by #select when no
|
55
|
+
* observations have been made for the context.
|
51
56
|
*/
|
52
|
-
VALUE method_sooth_native_initialize(VALUE self, VALUE
|
57
|
+
VALUE method_sooth_native_initialize(VALUE self, VALUE error_event);
|
53
58
|
|
54
59
|
/*
|
55
|
-
* Clear the predictor to a
|
60
|
+
* Clear the predictor to a blank slate.
|
56
61
|
*/
|
57
62
|
VALUE method_sooth_native_clear(VALUE self);
|
58
63
|
|
@@ -65,76 +70,92 @@ VALUE method_sooth_native_clear(VALUE self);
|
|
65
70
|
VALUE method_sooth_native_load(VALUE self, VALUE filename);
|
66
71
|
|
67
72
|
/*
|
68
|
-
* Save the predictor to a file that can be loaded
|
73
|
+
* Save the predictor to a file that can be loaded later.
|
69
74
|
*
|
70
75
|
* @param [String] filename The path of the file to be merge.
|
71
76
|
*/
|
72
77
|
VALUE method_sooth_native_save(VALUE self, VALUE filename);
|
73
78
|
|
74
79
|
/*
|
75
|
-
*
|
80
|
+
* Return the number of different events that have been observed within the
|
81
|
+
* given context.
|
76
82
|
*
|
77
|
-
* @param [
|
78
|
-
*
|
79
|
-
*
|
80
|
-
*
|
81
|
-
* @return [Fixnum] A count of the number of times the symbol has been
|
82
|
-
* observed in the context of the bigram.
|
83
|
+
* @param [Fixnum] context A number that provides a context for observations.
|
84
|
+
* @return [Fixnum] The number of distinct events that have been observed
|
85
|
+
* within the given context. This is guaranteed to be equal
|
86
|
+
* to the length of the #distribution for the context.
|
83
87
|
*/
|
84
|
-
|
88
|
+
|
89
|
+
VALUE method_sooth_native_size(VALUE self, VALUE context);
|
85
90
|
|
86
91
|
/*
|
87
|
-
* Return
|
92
|
+
* Return the number of times the context has been observed.
|
88
93
|
*
|
89
|
-
* @param [
|
90
|
-
* @return [Fixnum] A count of the number of times the
|
94
|
+
* @param [Fixnum] context A number that provides a context for observations.
|
95
|
+
* @return [Fixnum] A count of the number of times the context has been
|
91
96
|
* observed. This is guaranteed to be equal to the sum
|
92
|
-
* of the counts of observations of all
|
93
|
-
*
|
97
|
+
* of the counts of observations of all events observed in
|
98
|
+
* the context.
|
99
|
+
*/
|
100
|
+
VALUE method_sooth_native_count(VALUE self, VALUE context);
|
101
|
+
|
102
|
+
/*
|
103
|
+
* Register an observation of the given event within the given context.
|
104
|
+
*
|
105
|
+
* @param [Fixnum] context A number that provides a context for the event,
|
106
|
+
* allowing the predictor to maintain observation
|
107
|
+
* statistics for different contexts.
|
108
|
+
* @param [Fixnum] event A number representing the observed event.
|
109
|
+
* @return [Fixnum] A count of the number of times the event has been
|
110
|
+
* observed in the given context.
|
111
|
+
*/
|
112
|
+
VALUE method_sooth_native_observe(VALUE self, VALUE context, VALUE event);
|
113
|
+
|
114
|
+
/*
|
115
|
+
* Return an event that may occur in the given context, based on the limit,
|
116
|
+
* which should be between 1 and #count. The event is selected by iterating
|
117
|
+
* through all observed events for the context, subtracting the observation
|
118
|
+
* count of each event from the limit until it is zero or less.
|
119
|
+
*
|
120
|
+
* @param [Fixnum] context A number that provides a context for observations.
|
121
|
+
* @param [Fixnum] limit The total number of event observations to be
|
122
|
+
* analysed before returning a event.
|
123
|
+
* @return [Fixnum] An event that has been previously observed in the given
|
124
|
+
* context, or the error_event if the #count of the context
|
125
|
+
* is zero, or if limit exceeds the #count of the context
|
94
126
|
*/
|
95
|
-
VALUE
|
127
|
+
VALUE method_sooth_native_select(VALUE self, VALUE context, VALUE limit);
|
96
128
|
|
97
129
|
/*
|
98
|
-
* Return
|
99
|
-
*
|
100
|
-
* all of the symbols that have been observed in the context of the
|
101
|
-
* bigram, subtracting the observation count of each symbol from the
|
102
|
-
* supplied limit. For this reason, limit should be between 1 and the
|
103
|
-
* observation count of the bigram itself, as returned by #count.
|
130
|
+
* Return an Enumerator that yields each observed event within the context
|
131
|
+
* together with its probability.
|
104
132
|
*
|
105
|
-
* @param [
|
106
|
-
* @
|
107
|
-
* analysed before returning a symbol.
|
108
|
-
* @return [Fixnum] A symbol that has been observed previously in the
|
109
|
-
* context of the bigram, or the error_symbol if no
|
110
|
-
* such symbol exists, or if the supplied limit was
|
111
|
-
* too large.
|
133
|
+
* @param [Fixnum] context A number that provides a context for observations.
|
134
|
+
* @return [Array] A list of event-probability pairs.
|
112
135
|
*/
|
113
|
-
VALUE
|
136
|
+
VALUE method_sooth_native_distribution(VALUE self, VALUE context);
|
114
137
|
|
115
138
|
/*
|
116
|
-
* Return a number indicating how uncertain the predictor is about which
|
117
|
-
* is likely to be observed after the given
|
118
|
-
* returned if the
|
139
|
+
* Return a number indicating how uncertain the predictor is about which event
|
140
|
+
* is likely to be observed after the given context. Note that nil will be
|
141
|
+
* returned if the context has never been observed.
|
119
142
|
*
|
120
|
-
* @param [
|
121
|
-
* @return [Float] The uncertainty, which is calculated to be the
|
122
|
-
* of the
|
123
|
-
* in the context of the bigram.
|
143
|
+
* @param [Fixnum] context A number that provides a context for observations.
|
144
|
+
* @return [Float] The uncertainty, which is calculated to be the Shannon entropy
|
145
|
+
* of the #distribution over the context.
|
124
146
|
*/
|
125
|
-
VALUE method_sooth_native_uncertainty(VALUE self, VALUE
|
147
|
+
VALUE method_sooth_native_uncertainty(VALUE self, VALUE context);
|
126
148
|
|
127
149
|
/*
|
128
150
|
* Return a number indicating the surprise received by the predictor when it
|
129
|
-
* observed the given
|
130
|
-
* returned if the
|
151
|
+
* observed the given event within the given context. Note that nil will be
|
152
|
+
* returned if the event has never been observed within the context.
|
131
153
|
*
|
132
|
-
* @param [
|
133
|
-
* @param [Fixnum]
|
134
|
-
* @return [Float] The surprise, which is calculated to be the
|
135
|
-
* mutual information of the
|
136
|
-
* distribution over the
|
137
|
-
* the bigram.
|
154
|
+
* @param [Fixnum] context A number that provides a context for observations.
|
155
|
+
* @param [Fixnum] event A number representing the observed event.
|
156
|
+
* @return [Float] The surprise, which is calculated to be the Shannon pointwise
|
157
|
+
* mutual information of the event according to the
|
158
|
+
* #distribution over the context.
|
138
159
|
*/
|
139
160
|
VALUE method_sooth_native_surprise(VALUE self, VALUE bigram, VALUE limit);
|
140
161
|
|
@@ -152,9 +173,12 @@ void Init_sooth_native()
|
|
152
173
|
rb_define_method(SoothNative, "load", method_sooth_native_load, 1);
|
153
174
|
rb_define_method(SoothNative, "save", method_sooth_native_save, 1);
|
154
175
|
|
155
|
-
rb_define_method(SoothNative, "
|
176
|
+
rb_define_method(SoothNative, "size", method_sooth_native_size, 1);
|
156
177
|
rb_define_method(SoothNative, "count", method_sooth_native_count, 1);
|
178
|
+
rb_define_method(SoothNative, "observe", method_sooth_native_observe, 2);
|
157
179
|
rb_define_method(SoothNative, "select", method_sooth_native_select, 2);
|
180
|
+
|
181
|
+
rb_define_method(SoothNative, "distribution", method_sooth_native_distribution, 1);
|
158
182
|
rb_define_method(SoothNative, "uncertainty", method_sooth_native_uncertainty, 1);
|
159
183
|
rb_define_method(SoothNative, "surprise", method_sooth_native_surprise, 2);
|
160
184
|
}
|
@@ -183,12 +207,12 @@ method_sooth_native_deallocate(void * predictor)
|
|
183
207
|
//------------------------------------------------------------------------------
|
184
208
|
|
185
209
|
VALUE
|
186
|
-
method_sooth_native_initialize(VALUE self, VALUE
|
210
|
+
method_sooth_native_initialize(VALUE self, VALUE error_event)
|
187
211
|
{
|
188
212
|
SoothPredictor * predictor = NULL;
|
189
|
-
Check_Type(
|
213
|
+
Check_Type(error_event, T_FIXNUM);
|
190
214
|
Data_Get_Struct(self, SoothPredictor, predictor);
|
191
|
-
predictor->
|
215
|
+
predictor->error_event = NUM2UINT(error_event);
|
192
216
|
return self;
|
193
217
|
}
|
194
218
|
|
@@ -236,78 +260,90 @@ method_sooth_native_save(VALUE self, VALUE filename)
|
|
236
260
|
//------------------------------------------------------------------------------
|
237
261
|
|
238
262
|
VALUE
|
239
|
-
|
263
|
+
method_sooth_native_size(VALUE self, VALUE context)
|
240
264
|
{
|
241
265
|
SoothPredictor * predictor = NULL;
|
242
|
-
Check_Type(
|
243
|
-
Check_Type(bigram, T_ARRAY);
|
244
|
-
if (RARRAY_LEN(bigram) != 2)
|
245
|
-
{
|
246
|
-
rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
|
247
|
-
}
|
248
|
-
Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
|
249
|
-
Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
|
266
|
+
Check_Type(context, T_FIXNUM);
|
250
267
|
Data_Get_Struct(self, SoothPredictor, predictor);
|
251
|
-
uint32_t
|
252
|
-
|
268
|
+
uint32_t size = sooth_predictor_size(predictor, NUM2UINT(context));
|
269
|
+
return UINT2NUM(size);
|
270
|
+
}
|
271
|
+
|
272
|
+
//------------------------------------------------------------------------------
|
273
|
+
|
274
|
+
VALUE
|
275
|
+
method_sooth_native_count(VALUE self, VALUE context)
|
276
|
+
{
|
277
|
+
SoothPredictor * predictor = NULL;
|
278
|
+
Check_Type(context, T_FIXNUM);
|
279
|
+
Data_Get_Struct(self, SoothPredictor, predictor);
|
280
|
+
uint32_t count = sooth_predictor_count(predictor, NUM2UINT(context));
|
253
281
|
return UINT2NUM(count);
|
254
282
|
}
|
255
283
|
|
256
284
|
//------------------------------------------------------------------------------
|
257
285
|
|
258
286
|
VALUE
|
259
|
-
|
287
|
+
method_sooth_native_observe(VALUE self, VALUE context, VALUE event)
|
260
288
|
{
|
261
289
|
SoothPredictor * predictor = NULL;
|
262
|
-
Check_Type(
|
263
|
-
|
264
|
-
{
|
265
|
-
rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
|
266
|
-
}
|
267
|
-
Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
|
268
|
-
Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
|
290
|
+
Check_Type(context, T_FIXNUM);
|
291
|
+
Check_Type(event, T_FIXNUM);
|
269
292
|
Data_Get_Struct(self, SoothPredictor, predictor);
|
270
|
-
uint32_t
|
271
|
-
uint32_t count = sooth_predictor_count(predictor, c_bigram);
|
293
|
+
uint32_t count = sooth_predictor_observe(predictor, NUM2UINT(context), NUM2UINT(event));
|
272
294
|
return UINT2NUM(count);
|
273
295
|
}
|
274
296
|
|
275
297
|
//------------------------------------------------------------------------------
|
276
298
|
|
277
299
|
VALUE
|
278
|
-
method_sooth_native_select(VALUE self, VALUE
|
300
|
+
method_sooth_native_select(VALUE self, VALUE context, VALUE limit)
|
279
301
|
{
|
280
302
|
SoothPredictor * predictor = NULL;
|
303
|
+
Check_Type(context, T_FIXNUM);
|
281
304
|
Check_Type(limit, T_FIXNUM);
|
282
|
-
Check_Type(bigram, T_ARRAY);
|
283
|
-
if (RARRAY_LEN(bigram) != 2)
|
284
|
-
{
|
285
|
-
rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
|
286
|
-
}
|
287
|
-
Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
|
288
|
-
Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
|
289
305
|
Data_Get_Struct(self, SoothPredictor, predictor);
|
290
|
-
uint32_t
|
291
|
-
|
292
|
-
return UINT2NUM(symbol);
|
306
|
+
uint32_t event = sooth_predictor_select(predictor, NUM2UINT(context), NUM2UINT(limit));
|
307
|
+
return UINT2NUM(event);
|
293
308
|
}
|
294
309
|
|
295
310
|
//------------------------------------------------------------------------------
|
296
311
|
|
297
312
|
VALUE
|
298
|
-
|
313
|
+
method_sooth_native_distribution(VALUE self, VALUE context)
|
299
314
|
{
|
300
315
|
SoothPredictor * predictor = NULL;
|
301
|
-
Check_Type(
|
302
|
-
|
316
|
+
Check_Type(context, T_FIXNUM);
|
317
|
+
Data_Get_Struct(self, SoothPredictor, predictor);
|
318
|
+
uint32_t c_context = NUM2UINT(context);
|
319
|
+
SoothStatistic * statistics = sooth_predictor_distribution(predictor, c_context);
|
320
|
+
if (statistics == NULL)
|
303
321
|
{
|
304
|
-
|
322
|
+
return Qnil;
|
305
323
|
}
|
306
|
-
|
307
|
-
|
324
|
+
uint32_t size = sooth_predictor_size(predictor, c_context);
|
325
|
+
double count = (double)sooth_predictor_count(predictor, c_context);
|
326
|
+
VALUE r_array = rb_ary_new2(size);
|
327
|
+
for (uint32_t i = 0; i < size; ++i)
|
328
|
+
{
|
329
|
+
SoothStatistic statistic = statistics[i];
|
330
|
+
VALUE pair = rb_ary_new2(2);
|
331
|
+
rb_ary_store(pair, 0, UINT2NUM(statistic.event));
|
332
|
+
rb_ary_store(pair, 1, DBL2NUM((double)statistic.count/count));
|
333
|
+
rb_ary_store(r_array, i, pair);
|
334
|
+
}
|
335
|
+
return r_array;
|
336
|
+
}
|
337
|
+
|
338
|
+
//------------------------------------------------------------------------------
|
339
|
+
|
340
|
+
VALUE
|
341
|
+
method_sooth_native_uncertainty(VALUE self, VALUE context)
|
342
|
+
{
|
343
|
+
SoothPredictor * predictor = NULL;
|
344
|
+
Check_Type(context, T_FIXNUM);
|
308
345
|
Data_Get_Struct(self, SoothPredictor, predictor);
|
309
|
-
|
310
|
-
double uncertainty = sooth_predictor_uncertainty(predictor, c_bigram);
|
346
|
+
double uncertainty = sooth_predictor_uncertainty(predictor, NUM2UINT(context));
|
311
347
|
if (uncertainty < 0)
|
312
348
|
{
|
313
349
|
return Qnil;
|
@@ -318,20 +354,13 @@ method_sooth_native_uncertainty(VALUE self, VALUE bigram)
|
|
318
354
|
//------------------------------------------------------------------------------
|
319
355
|
|
320
356
|
VALUE
|
321
|
-
method_sooth_native_surprise(VALUE self, VALUE
|
357
|
+
method_sooth_native_surprise(VALUE self, VALUE context, VALUE event)
|
322
358
|
{
|
323
359
|
SoothPredictor * predictor = NULL;
|
324
|
-
Check_Type(
|
325
|
-
Check_Type(
|
326
|
-
if (RARRAY_LEN(bigram) != 2)
|
327
|
-
{
|
328
|
-
rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
|
329
|
-
}
|
330
|
-
Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
|
331
|
-
Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
|
360
|
+
Check_Type(context, T_FIXNUM);
|
361
|
+
Check_Type(event, T_FIXNUM);
|
332
362
|
Data_Get_Struct(self, SoothPredictor, predictor);
|
333
|
-
|
334
|
-
double surprise = sooth_predictor_surprise(predictor, c_bigram, NUM2UINT(symbol));
|
363
|
+
double surprise = sooth_predictor_surprise(predictor, NUM2UINT(context), NUM2UINT(event));
|
335
364
|
if (surprise < 0)
|
336
365
|
{
|
337
366
|
return Qnil;
|