sooth 1.0.3 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -8
- data/Gemfile +1 -1
- data/Gemfile.lock +61 -34
- data/README.md +7 -4
- data/VERSION +1 -1
- data/ext/sooth_native/extconf.rb +1 -1
- data/ext/sooth_native/native.c +142 -113
- data/ext/sooth_native/sooth_context.h +1 -3
- data/ext/sooth_native/sooth_predictor.c +82 -62
- data/ext/sooth_native/sooth_predictor.h +8 -6
- data/ext/sooth_native/sooth_statistic.h +1 -3
- data/sooth.gemspec +7 -8
- data/spec/memory_spec.rb +24 -24
- data/spec/predictor_spec.rb +153 -115
- metadata +5 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9bac8ab9338973373ec7328dd58dcae462e86485
|
4
|
+
data.tar.gz: ea7f2ecb422e0301e36d5f6826aa8ccdbaefb619
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf39ddf48fba680f4e6f84d42d5f9dc2f59999c83aabafc9f6ba8761f77d24835cfd5b6a6e9f349d61c0e44caec9b7ce2276ba181e4cb13c96980f177481e5ee
|
7
|
+
data.tar.gz: 4ab08bdf68a0f38b676c033f54949bf6a6854f2ade41a2f32b2a62b8a3cff49dcd58b3d89b3f6afa60cef5e6c3863fd22d4a6afd6223285844e72bef4b63d24c
|
data/CHANGELOG.md
CHANGED
@@ -1,35 +1,30 @@
|
|
1
1
|
# Change Log
|
2
2
|
|
3
|
-
## [v1.0.
|
3
|
+
## [v1.0.3](https://github.com/jasonhutchens/sooth/tree/v1.0.3) (2015-10-03)
|
4
|
+
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v1.0.2...v1.0.3)
|
4
5
|
|
6
|
+
## [v1.0.2](https://github.com/jasonhutchens/sooth/tree/v1.0.2) (2015-05-26)
|
5
7
|
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v1.0.1...v1.0.2)
|
6
8
|
|
7
9
|
## [v1.0.1](https://github.com/jasonhutchens/sooth/tree/v1.0.1) (2015-05-26)
|
8
|
-
|
9
10
|
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v1.0.0...v1.0.1)
|
10
11
|
|
11
12
|
## [v1.0.0](https://github.com/jasonhutchens/sooth/tree/v1.0.0) (2014-12-17)
|
12
|
-
|
13
13
|
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.5.0...v1.0.0)
|
14
14
|
|
15
15
|
## [v0.5.0](https://github.com/jasonhutchens/sooth/tree/v0.5.0) (2014-12-16)
|
16
|
-
|
17
16
|
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.4.0...v0.5.0)
|
18
17
|
|
19
18
|
## [v0.4.0](https://github.com/jasonhutchens/sooth/tree/v0.4.0) (2014-12-16)
|
20
|
-
|
21
19
|
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.3.0...v0.4.0)
|
22
20
|
|
23
21
|
## [v0.3.0](https://github.com/jasonhutchens/sooth/tree/v0.3.0) (2014-12-11)
|
24
|
-
|
25
22
|
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.2.0...v0.3.0)
|
26
23
|
|
27
24
|
## [v0.2.0](https://github.com/jasonhutchens/sooth/tree/v0.2.0) (2014-12-10)
|
28
|
-
|
29
25
|
[Full Changelog](https://github.com/jasonhutchens/sooth/compare/v0.1.0...v0.2.0)
|
30
26
|
|
31
27
|
## [v0.1.0](https://github.com/jasonhutchens/sooth/tree/v0.1.0) (2014-12-10)
|
32
28
|
|
33
29
|
|
34
|
-
|
35
30
|
\* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)*
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,31 +1,40 @@
|
|
1
1
|
GEM
|
2
2
|
remote: https://rubygems.org/
|
3
3
|
specs:
|
4
|
-
addressable (2.
|
4
|
+
addressable (2.4.0)
|
5
|
+
ast (2.2.0)
|
5
6
|
builder (3.2.2)
|
6
|
-
byebug (
|
7
|
+
byebug (8.2.2)
|
8
|
+
childprocess (0.5.9)
|
9
|
+
ffi (~> 1.0, >= 1.0.11)
|
7
10
|
colorize (0.7.7)
|
8
11
|
descendants_tracker (0.0.4)
|
9
12
|
thread_safe (~> 0.3, >= 0.3.1)
|
10
13
|
diff-lcs (1.2.5)
|
11
14
|
docile (1.1.5)
|
12
|
-
faraday (0.9.
|
15
|
+
faraday (0.9.2)
|
13
16
|
multipart-post (>= 1.2, < 3)
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
+
ffi (1.9.10)
|
18
|
+
git (1.3.0)
|
19
|
+
github_api (0.13.1)
|
20
|
+
addressable (~> 2.4.0)
|
17
21
|
descendants_tracker (~> 0.0.4)
|
18
22
|
faraday (~> 0.8, < 0.10)
|
19
23
|
hashie (>= 3.4)
|
20
24
|
multi_json (>= 1.7.5, < 2.0)
|
21
|
-
nokogiri (~> 1.6.6)
|
22
25
|
oauth2
|
23
|
-
github_changelog_generator (1.
|
26
|
+
github_changelog_generator (1.12.0)
|
27
|
+
bundler (>= 1.7)
|
24
28
|
colorize (~> 0.7)
|
25
29
|
github_api (~> 0.12)
|
26
|
-
|
27
|
-
|
28
|
-
|
30
|
+
overcommit (>= 0.31)
|
31
|
+
rake (>= 10.0)
|
32
|
+
rspec (>= 3.2)
|
33
|
+
rubocop (>= 0.31)
|
34
|
+
hashie (3.4.3)
|
35
|
+
highline (1.7.8)
|
36
|
+
iniparse (1.4.2)
|
37
|
+
jeweler (2.1.1)
|
29
38
|
builder
|
30
39
|
bundler (>= 1.0)
|
31
40
|
git (>= 1.2.5)
|
@@ -34,44 +43,62 @@ GEM
|
|
34
43
|
nokogiri (>= 1.5.10)
|
35
44
|
rake
|
36
45
|
rdoc
|
46
|
+
semver
|
37
47
|
json (1.8.3)
|
38
48
|
jwt (1.5.1)
|
39
|
-
|
49
|
+
mini_portile2 (2.0.0)
|
40
50
|
multi_json (1.11.2)
|
41
51
|
multi_xml (0.5.5)
|
42
52
|
multipart-post (2.0.0)
|
43
|
-
nokogiri (1.6.
|
44
|
-
|
45
|
-
oauth2 (1.
|
53
|
+
nokogiri (1.6.7.2)
|
54
|
+
mini_portile2 (~> 2.0.0.rc2)
|
55
|
+
oauth2 (1.1.0)
|
46
56
|
faraday (>= 0.8, < 0.10)
|
47
|
-
jwt (~> 1.0)
|
57
|
+
jwt (~> 1.0, < 1.5.2)
|
48
58
|
multi_json (~> 1.3)
|
49
59
|
multi_xml (~> 0.5)
|
50
|
-
rack (
|
60
|
+
rack (>= 1.2, < 3)
|
61
|
+
overcommit (0.33.0)
|
62
|
+
childprocess (~> 0.5.8)
|
63
|
+
iniparse (~> 1.4)
|
64
|
+
parser (2.3.0.7)
|
65
|
+
ast (~> 2.2)
|
66
|
+
powerpack (0.1.1)
|
51
67
|
rack (1.6.4)
|
52
|
-
|
53
|
-
rake
|
68
|
+
rainbow (2.1.0)
|
69
|
+
rake (11.1.2)
|
70
|
+
rake-compiler (0.9.7)
|
54
71
|
rake
|
55
|
-
rdoc (4.2.
|
56
|
-
|
57
|
-
|
58
|
-
rspec-
|
59
|
-
rspec-
|
60
|
-
|
61
|
-
|
62
|
-
|
72
|
+
rdoc (4.2.2)
|
73
|
+
json (~> 1.4)
|
74
|
+
rspec (3.4.0)
|
75
|
+
rspec-core (~> 3.4.0)
|
76
|
+
rspec-expectations (~> 3.4.0)
|
77
|
+
rspec-mocks (~> 3.4.0)
|
78
|
+
rspec-core (3.4.4)
|
79
|
+
rspec-support (~> 3.4.0)
|
80
|
+
rspec-expectations (3.4.0)
|
63
81
|
diff-lcs (>= 1.2.0, < 2.0)
|
64
|
-
rspec-support (~> 3.
|
65
|
-
rspec-mocks (3.
|
82
|
+
rspec-support (~> 3.4.0)
|
83
|
+
rspec-mocks (3.4.1)
|
66
84
|
diff-lcs (>= 1.2.0, < 2.0)
|
67
|
-
rspec-support (~> 3.
|
68
|
-
rspec-support (3.
|
69
|
-
|
85
|
+
rspec-support (~> 3.4.0)
|
86
|
+
rspec-support (3.4.1)
|
87
|
+
rubocop (0.39.0)
|
88
|
+
parser (>= 2.3.0.7, < 3.0)
|
89
|
+
powerpack (~> 0.1)
|
90
|
+
rainbow (>= 1.99.1, < 3.0)
|
91
|
+
ruby-progressbar (~> 1.7)
|
92
|
+
unicode-display_width (~> 1.0, >= 1.0.1)
|
93
|
+
ruby-progressbar (1.7.5)
|
94
|
+
semver (1.0.1)
|
95
|
+
simplecov (0.11.2)
|
70
96
|
docile (~> 1.1.0)
|
71
97
|
json (~> 1.8)
|
72
98
|
simplecov-html (~> 0.10.0)
|
73
99
|
simplecov-html (0.10.0)
|
74
100
|
thread_safe (0.3.5)
|
101
|
+
unicode-display_width (1.0.3)
|
75
102
|
yard (0.8.7.6)
|
76
103
|
|
77
104
|
PLATFORMS
|
@@ -79,7 +106,7 @@ PLATFORMS
|
|
79
106
|
|
80
107
|
DEPENDENCIES
|
81
108
|
bundler (~> 1.7)
|
82
|
-
byebug (~>
|
109
|
+
byebug (~> 8.2)
|
83
110
|
github_changelog_generator (~> 1.4)
|
84
111
|
jeweler (~> 2.0)
|
85
112
|
rake-compiler (~> 0.9)
|
@@ -89,4 +116,4 @@ DEPENDENCIES
|
|
89
116
|
yard (~> 0.8)
|
90
117
|
|
91
118
|
BUNDLED WITH
|
92
|
-
1.
|
119
|
+
1.11.2
|
data/README.md
CHANGED
@@ -5,15 +5,18 @@
|
|
5
5
|
Sooth
|
6
6
|
=====
|
7
7
|
|
8
|
-
Sooth is a
|
9
|
-
[MegaHAL](https://github.com/jasonhutchens/megahal), a learning chatterbot.
|
8
|
+
Sooth is a minimal stochastic predictive model. It is used by [MegaHAL](https://github.com/jasonhutchens/megahal), a learning chatterbot, and by [Typing Simulator](https://github.com/jasonhutchens/typing_simulator), a program that pretends to type text files like a human being.
|
10
9
|
|
11
10
|
Getting Started
|
12
11
|
---------------
|
13
12
|
|
14
|
-
|
13
|
+
The basic philisophy is to keep things simple. Sooth can make _predictions_ about _events_ that occur within some _context_.
|
14
|
+
|
15
|
+
A _context_ is just a number; it's up to you to make it significant. It might represent a word, or an array of words, or something else entirely. Sooth doesn't care. Likewise, an _event_ is a number, but doesn't need to represent the same kind of thing as a _context_. It's perfectly fine for a _context_ to be an array of words and an _event_ to be a single word, which is what MegaHAL does. Or a _context_ could be an array of characters and an _event_ could be a time in milliseconds, which is what TypingSimulator does. Or they could be something else entirely...
|
16
|
+
|
17
|
+
Look at the [API docs](http://www.rubydoc.info/gems/sooth/Sooth/Predictor) to get up to speed, and at the specs for example usage.
|
15
18
|
|
16
19
|
Copyright
|
17
20
|
---------
|
18
21
|
|
19
|
-
Copyright (c)
|
22
|
+
Copyright (c) 2016 Jason Hutchens. See UNLICENSE for further details.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.0.0
|
data/ext/sooth_native/extconf.rb
CHANGED
data/ext/sooth_native/native.c
CHANGED
@@ -14,11 +14,10 @@ void method_sooth_native_deallocate(void * predictor);
|
|
14
14
|
|
15
15
|
/* @!parse [ruby]
|
16
16
|
* module Sooth
|
17
|
-
* # A
|
18
|
-
* #
|
19
|
-
* # a trivial Markovian predictor.
|
17
|
+
* # A minimal stochastic predictive model, implemented in C for efficiency.
|
18
|
+
* # No assumptions about PRNG or real-world significance of context/event.
|
20
19
|
* class Predictor
|
21
|
-
* def initialize(
|
20
|
+
* def initialize(error_event)
|
22
21
|
* end
|
23
22
|
* def clear
|
24
23
|
* end
|
@@ -26,19 +25,25 @@ void method_sooth_native_deallocate(void * predictor);
|
|
26
25
|
* end
|
27
26
|
* def save(filename)
|
28
27
|
* end
|
29
|
-
* def
|
28
|
+
* def size(context)
|
30
29
|
* # (native code)
|
31
30
|
* end
|
32
|
-
* def count(
|
31
|
+
* def count(context)
|
33
32
|
* # (native code)
|
34
33
|
* end
|
35
|
-
* def
|
34
|
+
* def observe(context, event)
|
36
35
|
* # (native code)
|
37
36
|
* end
|
38
|
-
* def
|
37
|
+
* def select(context, limit)
|
39
38
|
* # (native code)
|
40
39
|
* end
|
41
|
-
* def
|
40
|
+
* def distribution(context)
|
41
|
+
* # (native code)
|
42
|
+
* end
|
43
|
+
* def uncertainty(context)
|
44
|
+
* # (native code)
|
45
|
+
* end
|
46
|
+
* def surprise(context, evemt)
|
42
47
|
* # (native code)
|
43
48
|
* end
|
44
49
|
* end
|
@@ -46,13 +51,13 @@ void method_sooth_native_deallocate(void * predictor);
|
|
46
51
|
*
|
47
52
|
* Returns a new Sooth::Predictor instance.
|
48
53
|
*
|
49
|
-
* @param [Fixnum]
|
50
|
-
*
|
54
|
+
* @param [Fixnum] error_event The event to be returned by #select when no
|
55
|
+
* observations have been made for the context.
|
51
56
|
*/
|
52
|
-
VALUE method_sooth_native_initialize(VALUE self, VALUE
|
57
|
+
VALUE method_sooth_native_initialize(VALUE self, VALUE error_event);
|
53
58
|
|
54
59
|
/*
|
55
|
-
* Clear the predictor to a
|
60
|
+
* Clear the predictor to a blank slate.
|
56
61
|
*/
|
57
62
|
VALUE method_sooth_native_clear(VALUE self);
|
58
63
|
|
@@ -65,76 +70,92 @@ VALUE method_sooth_native_clear(VALUE self);
|
|
65
70
|
VALUE method_sooth_native_load(VALUE self, VALUE filename);
|
66
71
|
|
67
72
|
/*
|
68
|
-
* Save the predictor to a file that can be loaded
|
73
|
+
* Save the predictor to a file that can be loaded later.
|
69
74
|
*
|
70
75
|
* @param [String] filename The path of the file to be merge.
|
71
76
|
*/
|
72
77
|
VALUE method_sooth_native_save(VALUE self, VALUE filename);
|
73
78
|
|
74
79
|
/*
|
75
|
-
*
|
80
|
+
* Return the number of different events that have been observed within the
|
81
|
+
* given context.
|
76
82
|
*
|
77
|
-
* @param [
|
78
|
-
*
|
79
|
-
*
|
80
|
-
*
|
81
|
-
* @return [Fixnum] A count of the number of times the symbol has been
|
82
|
-
* observed in the context of the bigram.
|
83
|
+
* @param [Fixnum] context A number that provides a context for observations.
|
84
|
+
* @return [Fixnum] The number of distinct events that have been observed
|
85
|
+
* within the given context. This is guaranteed to be equal
|
86
|
+
* to the length of the #distribution for the context.
|
83
87
|
*/
|
84
|
-
|
88
|
+
|
89
|
+
VALUE method_sooth_native_size(VALUE self, VALUE context);
|
85
90
|
|
86
91
|
/*
|
87
|
-
* Return
|
92
|
+
* Return the number of times the context has been observed.
|
88
93
|
*
|
89
|
-
* @param [
|
90
|
-
* @return [Fixnum] A count of the number of times the
|
94
|
+
* @param [Fixnum] context A number that provides a context for observations.
|
95
|
+
* @return [Fixnum] A count of the number of times the context has been
|
91
96
|
* observed. This is guaranteed to be equal to the sum
|
92
|
-
* of the counts of observations of all
|
93
|
-
*
|
97
|
+
* of the counts of observations of all events observed in
|
98
|
+
* the context.
|
99
|
+
*/
|
100
|
+
VALUE method_sooth_native_count(VALUE self, VALUE context);
|
101
|
+
|
102
|
+
/*
|
103
|
+
* Register an observation of the given event within the given context.
|
104
|
+
*
|
105
|
+
* @param [Fixnum] context A number that provides a context for the event,
|
106
|
+
* allowing the predictor to maintain observation
|
107
|
+
* statistics for different contexts.
|
108
|
+
* @param [Fixnum] event A number representing the observed event.
|
109
|
+
* @return [Fixnum] A count of the number of times the event has been
|
110
|
+
* observed in the given context.
|
111
|
+
*/
|
112
|
+
VALUE method_sooth_native_observe(VALUE self, VALUE context, VALUE event);
|
113
|
+
|
114
|
+
/*
|
115
|
+
* Return an event that may occur in the given context, based on the limit,
|
116
|
+
* which should be between 1 and #count. The event is selected by iterating
|
117
|
+
* through all observed events for the context, subtracting the observation
|
118
|
+
* count of each event from the limit until it is zero or less.
|
119
|
+
*
|
120
|
+
* @param [Fixnum] context A number that provides a context for observations.
|
121
|
+
* @param [Fixnum] limit The total number of event observations to be
|
122
|
+
* analysed before returning a event.
|
123
|
+
* @return [Fixnum] An event that has been previously observed in the given
|
124
|
+
* context, or the error_event if the #count of the context
|
125
|
+
* is zero, or if limit exceeds the #count of the context
|
94
126
|
*/
|
95
|
-
VALUE
|
127
|
+
VALUE method_sooth_native_select(VALUE self, VALUE context, VALUE limit);
|
96
128
|
|
97
129
|
/*
|
98
|
-
* Return
|
99
|
-
*
|
100
|
-
* all of the symbols that have been observed in the context of the
|
101
|
-
* bigram, subtracting the observation count of each symbol from the
|
102
|
-
* supplied limit. For this reason, limit should be between 1 and the
|
103
|
-
* observation count of the bigram itself, as returned by #count.
|
130
|
+
* Return an Enumerator that yields each observed event within the context
|
131
|
+
* together with its probability.
|
104
132
|
*
|
105
|
-
* @param [
|
106
|
-
* @
|
107
|
-
* analysed before returning a symbol.
|
108
|
-
* @return [Fixnum] A symbol that has been observed previously in the
|
109
|
-
* context of the bigram, or the error_symbol if no
|
110
|
-
* such symbol exists, or if the supplied limit was
|
111
|
-
* too large.
|
133
|
+
* @param [Fixnum] context A number that provides a context for observations.
|
134
|
+
* @return [Array] A list of event-probability pairs.
|
112
135
|
*/
|
113
|
-
VALUE
|
136
|
+
VALUE method_sooth_native_distribution(VALUE self, VALUE context);
|
114
137
|
|
115
138
|
/*
|
116
|
-
* Return a number indicating how uncertain the predictor is about which
|
117
|
-
* is likely to be observed after the given
|
118
|
-
* returned if the
|
139
|
+
* Return a number indicating how uncertain the predictor is about which event
|
140
|
+
* is likely to be observed after the given context. Note that nil will be
|
141
|
+
* returned if the context has never been observed.
|
119
142
|
*
|
120
|
-
* @param [
|
121
|
-
* @return [Float] The uncertainty, which is calculated to be the
|
122
|
-
* of the
|
123
|
-
* in the context of the bigram.
|
143
|
+
* @param [Fixnum] context A number that provides a context for observations.
|
144
|
+
* @return [Float] The uncertainty, which is calculated to be the Shannon entropy
|
145
|
+
* of the #distribution over the context.
|
124
146
|
*/
|
125
|
-
VALUE method_sooth_native_uncertainty(VALUE self, VALUE
|
147
|
+
VALUE method_sooth_native_uncertainty(VALUE self, VALUE context);
|
126
148
|
|
127
149
|
/*
|
128
150
|
* Return a number indicating the surprise received by the predictor when it
|
129
|
-
* observed the given
|
130
|
-
* returned if the
|
151
|
+
* observed the given event within the given context. Note that nil will be
|
152
|
+
* returned if the event has never been observed within the context.
|
131
153
|
*
|
132
|
-
* @param [
|
133
|
-
* @param [Fixnum]
|
134
|
-
* @return [Float] The surprise, which is calculated to be the
|
135
|
-
* mutual information of the
|
136
|
-
* distribution over the
|
137
|
-
* the bigram.
|
154
|
+
* @param [Fixnum] context A number that provides a context for observations.
|
155
|
+
* @param [Fixnum] event A number representing the observed event.
|
156
|
+
* @return [Float] The surprise, which is calculated to be the Shannon pointwise
|
157
|
+
* mutual information of the event according to the
|
158
|
+
* #distribution over the context.
|
138
159
|
*/
|
139
160
|
VALUE method_sooth_native_surprise(VALUE self, VALUE bigram, VALUE limit);
|
140
161
|
|
@@ -152,9 +173,12 @@ void Init_sooth_native()
|
|
152
173
|
rb_define_method(SoothNative, "load", method_sooth_native_load, 1);
|
153
174
|
rb_define_method(SoothNative, "save", method_sooth_native_save, 1);
|
154
175
|
|
155
|
-
rb_define_method(SoothNative, "
|
176
|
+
rb_define_method(SoothNative, "size", method_sooth_native_size, 1);
|
156
177
|
rb_define_method(SoothNative, "count", method_sooth_native_count, 1);
|
178
|
+
rb_define_method(SoothNative, "observe", method_sooth_native_observe, 2);
|
157
179
|
rb_define_method(SoothNative, "select", method_sooth_native_select, 2);
|
180
|
+
|
181
|
+
rb_define_method(SoothNative, "distribution", method_sooth_native_distribution, 1);
|
158
182
|
rb_define_method(SoothNative, "uncertainty", method_sooth_native_uncertainty, 1);
|
159
183
|
rb_define_method(SoothNative, "surprise", method_sooth_native_surprise, 2);
|
160
184
|
}
|
@@ -183,12 +207,12 @@ method_sooth_native_deallocate(void * predictor)
|
|
183
207
|
//------------------------------------------------------------------------------
|
184
208
|
|
185
209
|
VALUE
|
186
|
-
method_sooth_native_initialize(VALUE self, VALUE
|
210
|
+
method_sooth_native_initialize(VALUE self, VALUE error_event)
|
187
211
|
{
|
188
212
|
SoothPredictor * predictor = NULL;
|
189
|
-
Check_Type(
|
213
|
+
Check_Type(error_event, T_FIXNUM);
|
190
214
|
Data_Get_Struct(self, SoothPredictor, predictor);
|
191
|
-
predictor->
|
215
|
+
predictor->error_event = NUM2UINT(error_event);
|
192
216
|
return self;
|
193
217
|
}
|
194
218
|
|
@@ -236,78 +260,90 @@ method_sooth_native_save(VALUE self, VALUE filename)
|
|
236
260
|
//------------------------------------------------------------------------------
|
237
261
|
|
238
262
|
VALUE
|
239
|
-
|
263
|
+
method_sooth_native_size(VALUE self, VALUE context)
|
240
264
|
{
|
241
265
|
SoothPredictor * predictor = NULL;
|
242
|
-
Check_Type(
|
243
|
-
Check_Type(bigram, T_ARRAY);
|
244
|
-
if (RARRAY_LEN(bigram) != 2)
|
245
|
-
{
|
246
|
-
rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
|
247
|
-
}
|
248
|
-
Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
|
249
|
-
Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
|
266
|
+
Check_Type(context, T_FIXNUM);
|
250
267
|
Data_Get_Struct(self, SoothPredictor, predictor);
|
251
|
-
uint32_t
|
252
|
-
|
268
|
+
uint32_t size = sooth_predictor_size(predictor, NUM2UINT(context));
|
269
|
+
return UINT2NUM(size);
|
270
|
+
}
|
271
|
+
|
272
|
+
//------------------------------------------------------------------------------
|
273
|
+
|
274
|
+
VALUE
|
275
|
+
method_sooth_native_count(VALUE self, VALUE context)
|
276
|
+
{
|
277
|
+
SoothPredictor * predictor = NULL;
|
278
|
+
Check_Type(context, T_FIXNUM);
|
279
|
+
Data_Get_Struct(self, SoothPredictor, predictor);
|
280
|
+
uint32_t count = sooth_predictor_count(predictor, NUM2UINT(context));
|
253
281
|
return UINT2NUM(count);
|
254
282
|
}
|
255
283
|
|
256
284
|
//------------------------------------------------------------------------------
|
257
285
|
|
258
286
|
VALUE
|
259
|
-
|
287
|
+
method_sooth_native_observe(VALUE self, VALUE context, VALUE event)
|
260
288
|
{
|
261
289
|
SoothPredictor * predictor = NULL;
|
262
|
-
Check_Type(
|
263
|
-
|
264
|
-
{
|
265
|
-
rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
|
266
|
-
}
|
267
|
-
Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
|
268
|
-
Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
|
290
|
+
Check_Type(context, T_FIXNUM);
|
291
|
+
Check_Type(event, T_FIXNUM);
|
269
292
|
Data_Get_Struct(self, SoothPredictor, predictor);
|
270
|
-
uint32_t
|
271
|
-
uint32_t count = sooth_predictor_count(predictor, c_bigram);
|
293
|
+
uint32_t count = sooth_predictor_observe(predictor, NUM2UINT(context), NUM2UINT(event));
|
272
294
|
return UINT2NUM(count);
|
273
295
|
}
|
274
296
|
|
275
297
|
//------------------------------------------------------------------------------
|
276
298
|
|
277
299
|
VALUE
|
278
|
-
method_sooth_native_select(VALUE self, VALUE
|
300
|
+
method_sooth_native_select(VALUE self, VALUE context, VALUE limit)
|
279
301
|
{
|
280
302
|
SoothPredictor * predictor = NULL;
|
303
|
+
Check_Type(context, T_FIXNUM);
|
281
304
|
Check_Type(limit, T_FIXNUM);
|
282
|
-
Check_Type(bigram, T_ARRAY);
|
283
|
-
if (RARRAY_LEN(bigram) != 2)
|
284
|
-
{
|
285
|
-
rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
|
286
|
-
}
|
287
|
-
Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
|
288
|
-
Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
|
289
305
|
Data_Get_Struct(self, SoothPredictor, predictor);
|
290
|
-
uint32_t
|
291
|
-
|
292
|
-
return UINT2NUM(symbol);
|
306
|
+
uint32_t event = sooth_predictor_select(predictor, NUM2UINT(context), NUM2UINT(limit));
|
307
|
+
return UINT2NUM(event);
|
293
308
|
}
|
294
309
|
|
295
310
|
//------------------------------------------------------------------------------
|
296
311
|
|
297
312
|
VALUE
|
298
|
-
|
313
|
+
method_sooth_native_distribution(VALUE self, VALUE context)
|
299
314
|
{
|
300
315
|
SoothPredictor * predictor = NULL;
|
301
|
-
Check_Type(
|
302
|
-
|
316
|
+
Check_Type(context, T_FIXNUM);
|
317
|
+
Data_Get_Struct(self, SoothPredictor, predictor);
|
318
|
+
uint32_t c_context = NUM2UINT(context);
|
319
|
+
SoothStatistic * statistics = sooth_predictor_distribution(predictor, c_context);
|
320
|
+
if (statistics == NULL)
|
303
321
|
{
|
304
|
-
|
322
|
+
return Qnil;
|
305
323
|
}
|
306
|
-
|
307
|
-
|
324
|
+
uint32_t size = sooth_predictor_size(predictor, c_context);
|
325
|
+
double count = (double)sooth_predictor_count(predictor, c_context);
|
326
|
+
VALUE r_array = rb_ary_new2(size);
|
327
|
+
for (uint32_t i = 0; i < size; ++i)
|
328
|
+
{
|
329
|
+
SoothStatistic statistic = statistics[i];
|
330
|
+
VALUE pair = rb_ary_new2(2);
|
331
|
+
rb_ary_store(pair, 0, UINT2NUM(statistic.event));
|
332
|
+
rb_ary_store(pair, 1, DBL2NUM((double)statistic.count/count));
|
333
|
+
rb_ary_store(r_array, i, pair);
|
334
|
+
}
|
335
|
+
return r_array;
|
336
|
+
}
|
337
|
+
|
338
|
+
//------------------------------------------------------------------------------
|
339
|
+
|
340
|
+
VALUE
|
341
|
+
method_sooth_native_uncertainty(VALUE self, VALUE context)
|
342
|
+
{
|
343
|
+
SoothPredictor * predictor = NULL;
|
344
|
+
Check_Type(context, T_FIXNUM);
|
308
345
|
Data_Get_Struct(self, SoothPredictor, predictor);
|
309
|
-
|
310
|
-
double uncertainty = sooth_predictor_uncertainty(predictor, c_bigram);
|
346
|
+
double uncertainty = sooth_predictor_uncertainty(predictor, NUM2UINT(context));
|
311
347
|
if (uncertainty < 0)
|
312
348
|
{
|
313
349
|
return Qnil;
|
@@ -318,20 +354,13 @@ method_sooth_native_uncertainty(VALUE self, VALUE bigram)
|
|
318
354
|
//------------------------------------------------------------------------------
|
319
355
|
|
320
356
|
VALUE
|
321
|
-
method_sooth_native_surprise(VALUE self, VALUE
|
357
|
+
method_sooth_native_surprise(VALUE self, VALUE context, VALUE event)
|
322
358
|
{
|
323
359
|
SoothPredictor * predictor = NULL;
|
324
|
-
Check_Type(
|
325
|
-
Check_Type(
|
326
|
-
if (RARRAY_LEN(bigram) != 2)
|
327
|
-
{
|
328
|
-
rb_raise(rb_eTypeError, "bigram must be an array of exactly two symbols");
|
329
|
-
}
|
330
|
-
Check_Type(RARRAY_PTR(bigram)[0], T_FIXNUM);
|
331
|
-
Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
|
360
|
+
Check_Type(context, T_FIXNUM);
|
361
|
+
Check_Type(event, T_FIXNUM);
|
332
362
|
Data_Get_Struct(self, SoothPredictor, predictor);
|
333
|
-
|
334
|
-
double surprise = sooth_predictor_surprise(predictor, c_bigram, NUM2UINT(symbol));
|
363
|
+
double surprise = sooth_predictor_surprise(predictor, NUM2UINT(context), NUM2UINT(event));
|
335
364
|
if (surprise < 0)
|
336
365
|
{
|
337
366
|
return Qnil;
|