sooth 1.0.3 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -9,15 +9,13 @@
9
9
 
10
10
  typedef struct
11
11
  {
12
- uint32_t bigram[2];
12
+ uint32_t id;
13
13
  uint32_t count;
14
14
  uint32_t statistics_size;
15
15
  SoothStatistic * statistics;
16
16
  }
17
17
  SoothContext;
18
18
 
19
- //------------------------------------------------------------------------------
20
-
21
19
  //==============================================================================
22
20
 
23
21
  #endif
@@ -9,24 +9,6 @@
9
9
 
10
10
  //------------------------------------------------------------------------------
11
11
 
12
- void
13
- sooth_show_predictor(SoothPredictor * predictor)
14
- {
15
- printf("Error Symbol: %u\n", predictor->error_symbol);
16
- for (uint32_t i = 0; i < predictor->contexts_size; ++i)
17
- {
18
- SoothContext context = predictor->contexts[i];
19
- printf(" Context %u-%u (%u)\n", context.bigram[0], context.bigram[1], context.count);
20
- for (uint32_t j = 0; j < context.statistics_size; ++j)
21
- {
22
- SoothStatistic statistic = context.statistics[j];
23
- printf(" Symbol %u (%u)\n", statistic.symbol, statistic.count);
24
- }
25
- }
26
- }
27
-
28
- //------------------------------------------------------------------------------
29
-
30
12
  SoothPredictor *
31
13
  sooth_predictor_init()
32
14
  {
@@ -37,7 +19,7 @@ sooth_predictor_init()
37
19
  return NULL;
38
20
  }
39
21
 
40
- predictor->error_symbol = 0;
22
+ predictor->error_event = 0;
41
23
  predictor->contexts = NULL;
42
24
  predictor->contexts_size = 0;
43
25
 
@@ -53,8 +35,6 @@ sooth_predictor_clear(SoothPredictor * predictor)
53
35
  {
54
36
  SoothContext * context = &(predictor->contexts[i]);
55
37
  free(context->statistics);
56
- context->statistics = NULL;
57
- context->statistics_size = 0;
58
38
  }
59
39
 
60
40
  free(predictor->contexts);
@@ -83,13 +63,13 @@ sooth_predictor_save(const char * const filename, SoothPredictor * predictor)
83
63
  return false;
84
64
  }
85
65
 
86
- fwrite("MH10", 1, 4, file);
87
- fwrite(&predictor->error_symbol, 4, 1, file);
66
+ fwrite("MH11", 1, 4, file);
67
+ fwrite(&predictor->error_event, 4, 1, file);
88
68
  fwrite(&predictor->contexts_size, 4, 1, file);
89
69
  for (uint32_t i = 0; i < predictor->contexts_size; ++i)
90
70
  {
91
71
  SoothContext context = predictor->contexts[i];
92
- fwrite(context.bigram, 4, 2, file);
72
+ fwrite(&context.id, 4, 1, file);
93
73
  fwrite(&context.count, 4, 1, file);
94
74
  fwrite(&context.statistics_size, 4, 1, file);
95
75
  fwrite(context.statistics, sizeof(SoothStatistic), context.statistics_size, file);
@@ -113,14 +93,14 @@ bool sooth_predictor_load(const char * const filename, SoothPredictor * predicto
113
93
 
114
94
  char code[4];
115
95
  fread(code, 1, 4, file);
116
- if (strncmp(code, "MH10", 4) != 0)
96
+ if (strncmp(code, "MH11", 4) != 0)
117
97
  {
118
98
  return false;
119
99
  }
120
100
 
121
101
  sooth_predictor_clear(predictor);
122
102
 
123
- fread(&predictor->error_symbol, 4, 1, file);
103
+ fread(&predictor->error_event, 4, 1, file);
124
104
  fread(&predictor->contexts_size, 4, 1, file);
125
105
  if (predictor->contexts_size == 0)
126
106
  {
@@ -130,19 +110,19 @@ bool sooth_predictor_load(const char * const filename, SoothPredictor * predicto
130
110
  if (predictor->contexts == NULL)
131
111
  {
132
112
  sooth_predictor_clear(predictor);
133
- return NULL;
113
+ return false;
134
114
  }
135
115
  for (uint32_t i = 0; i < predictor->contexts_size; ++i)
136
116
  {
137
117
  SoothContext * context = &(predictor->contexts[i]);
138
- fread(context->bigram, 4, 2, file);
118
+ fread(&context->id, 4, 1, file);
139
119
  fread(&context->count, 4, 1, file);
140
120
  fread(&context->statistics_size, 4, 1, file);
141
121
  context->statistics = malloc(sizeof(SoothStatistic) * context->statistics_size);
142
122
  if (context->statistics == NULL)
143
123
  {
144
124
  sooth_predictor_clear(predictor);
145
- return NULL;
125
+ return false;
146
126
  }
147
127
  fread(context->statistics, sizeof(SoothStatistic), context->statistics_size, file);
148
128
  }
@@ -152,7 +132,7 @@ bool sooth_predictor_load(const char * const filename, SoothPredictor * predicto
152
132
 
153
133
  //------------------------------------------------------------------------------
154
134
  SoothContext *
155
- sooth_predictor_find_context(SoothPredictor * predictor, uint32_t bigram[2])
135
+ sooth_predictor_find_context(SoothPredictor * predictor, uint32_t id)
156
136
  {
157
137
  SoothContext * context = NULL;
158
138
  uint32_t mid = 0;
@@ -166,11 +146,11 @@ sooth_predictor_find_context(SoothPredictor * predictor, uint32_t bigram[2])
166
146
  {
167
147
  mid = low + (high - low) / 2;
168
148
  context = &(predictor->contexts[mid]);
169
- if (context->bigram[0] < bigram[0] || (context->bigram[0] == bigram[0] && context->bigram[1] < bigram[1]))
149
+ if (context->id < id)
170
150
  {
171
151
  low = mid + 1;
172
152
  }
173
- else if (context->bigram[0] > bigram[0] || (context->bigram[0] == bigram[0] && context->bigram[1] > bigram[1]))
153
+ else if (context->id > id)
174
154
  {
175
155
  if (mid == 0)
176
156
  {
@@ -203,8 +183,7 @@ sooth_predictor_find_context(SoothPredictor * predictor, uint32_t bigram[2])
203
183
  }
204
184
 
205
185
  context = &(predictor->contexts[mid]);
206
- context->bigram[0] = bigram[0];
207
- context->bigram[1] = bigram[1];
186
+ context->id = id;
208
187
  context->count = 0;
209
188
  context->statistics_size = 0;
210
189
  context->statistics = NULL;
@@ -215,7 +194,7 @@ sooth_predictor_find_context(SoothPredictor * predictor, uint32_t bigram[2])
215
194
  //------------------------------------------------------------------------------
216
195
 
217
196
  SoothStatistic *
218
- sooth_predictor_find_statistic(SoothContext * context, uint32_t symbol)
197
+ sooth_predictor_find_statistic(SoothContext * context, uint32_t event)
219
198
  {
220
199
  SoothStatistic * statistic = NULL;
221
200
  uint32_t mid = 0;
@@ -229,11 +208,11 @@ sooth_predictor_find_statistic(SoothContext * context, uint32_t symbol)
229
208
  {
230
209
  mid = low + (high - low) / 2;
231
210
  statistic = &(context->statistics[mid]);
232
- if (statistic->symbol < symbol)
211
+ if (statistic->event < event)
233
212
  {
234
213
  low = mid + 1;
235
214
  }
236
- else if (statistic->symbol > symbol)
215
+ else if (statistic->event > event)
237
216
  {
238
217
  if (mid == 0)
239
218
  {
@@ -266,7 +245,7 @@ sooth_predictor_find_statistic(SoothContext * context, uint32_t symbol)
266
245
  }
267
246
 
268
247
  statistic = &(context->statistics[mid]);
269
- statistic->symbol = symbol;
248
+ statistic->event = event;
270
249
  statistic->count = 0;
271
250
 
272
251
  return statistic;
@@ -275,53 +254,79 @@ sooth_predictor_find_statistic(SoothContext * context, uint32_t symbol)
275
254
  //------------------------------------------------------------------------------
276
255
 
277
256
  uint32_t
278
- sooth_predictor_observe(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol)
257
+ sooth_predictor_size(SoothPredictor * predictor, uint32_t id)
279
258
  {
280
- SoothContext * context = sooth_predictor_find_context(predictor, bigram);
281
-
259
+ SoothContext * context = sooth_predictor_find_context(predictor, id);
260
+
282
261
  if (context == NULL)
283
262
  {
284
263
  return 0;
285
264
  }
286
265
 
287
- SoothStatistic * statistic = sooth_predictor_find_statistic(context, symbol);
266
+ return context->statistics_size;
267
+ }
288
268
 
289
- if (statistic == NULL)
269
+ //------------------------------------------------------------------------------
270
+
271
+ uint32_t
272
+ sooth_predictor_count(SoothPredictor * predictor, uint32_t id)
273
+ {
274
+ SoothContext * context = sooth_predictor_find_context(predictor, id);
275
+
276
+ if (context == NULL)
290
277
  {
291
278
  return 0;
292
279
  }
293
280
 
294
- statistic->count += 1;
295
- context->count += 1;
296
-
297
- return statistic->count;
281
+ return context->count;
298
282
  }
299
283
 
300
284
  //------------------------------------------------------------------------------
301
285
 
302
286
  uint32_t
303
- sooth_predictor_count(SoothPredictor * predictor, uint32_t bigram[2])
287
+ sooth_predictor_observe(SoothPredictor * predictor, uint32_t id, uint32_t event)
304
288
  {
305
- SoothContext * context = sooth_predictor_find_context(predictor, bigram);
306
-
289
+ SoothContext * context = sooth_predictor_find_context(predictor, id);
290
+
307
291
  if (context == NULL)
308
292
  {
309
293
  return 0;
310
294
  }
311
295
 
312
- return context->count;
296
+ if (context->count == UINT32_MAX)
297
+ {
298
+ context->count = 0;
299
+ for (uint32_t i = 0; i < context->statistics_size; ++i)
300
+ {
301
+ SoothStatistic statistic = context->statistics[i];
302
+ statistic.count /= 2;
303
+ context->count += statistic.count;
304
+ }
305
+ }
306
+
307
+ SoothStatistic * statistic = sooth_predictor_find_statistic(context, event);
308
+
309
+ if (statistic == NULL)
310
+ {
311
+ return 0;
312
+ }
313
+
314
+ statistic->count += 1;
315
+ context->count += 1;
316
+
317
+ return statistic->count;
313
318
  }
314
319
 
315
320
  //------------------------------------------------------------------------------
316
321
 
317
322
  uint32_t
318
- sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t limit)
323
+ sooth_predictor_select(SoothPredictor * predictor, uint32_t id, uint32_t limit)
319
324
  {
320
- SoothContext * context = sooth_predictor_find_context(predictor, bigram);
325
+ SoothContext * context = sooth_predictor_find_context(predictor, id);
321
326
 
322
- if (context == NULL || limit == 0)
327
+ if (context == NULL || limit == 0 || limit > context->count)
323
328
  {
324
- return predictor->error_symbol;
329
+ return predictor->error_event;
325
330
  }
326
331
 
327
332
  for (uint32_t i = 0; i < context->statistics_size; ++i)
@@ -332,18 +337,33 @@ sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t
332
337
  limit -= statistic.count;
333
338
  continue;
334
339
  }
335
- return statistic.symbol;
340
+ return statistic.event;
341
+ }
342
+
343
+ return predictor->error_event;
344
+ }
345
+
346
+ //------------------------------------------------------------------------------
347
+
348
+ SoothStatistic *
349
+ sooth_predictor_distribution(SoothPredictor * predictor, uint32_t id)
350
+ {
351
+ SoothContext * context = sooth_predictor_find_context(predictor, id);
352
+
353
+ if (context == NULL)
354
+ {
355
+ return NULL;
336
356
  }
337
357
 
338
- return predictor->error_symbol;
358
+ return context->statistics;
339
359
  }
340
360
 
341
361
  //------------------------------------------------------------------------------
342
362
 
343
363
  double
344
- sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t bigram[2])
364
+ sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t id)
345
365
  {
346
- SoothContext * context = sooth_predictor_find_context(predictor, bigram);
366
+ SoothContext * context = sooth_predictor_find_context(predictor, id);
347
367
 
348
368
  if (context == NULL || context->count == 0)
349
369
  {
@@ -366,16 +386,16 @@ sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t bigram[2])
366
386
  //------------------------------------------------------------------------------
367
387
 
368
388
  double
369
- sooth_predictor_surprise(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol)
389
+ sooth_predictor_surprise(SoothPredictor * predictor, uint32_t id, uint32_t event)
370
390
  {
371
- SoothContext * context = sooth_predictor_find_context(predictor, bigram);
391
+ SoothContext * context = sooth_predictor_find_context(predictor, id);
372
392
 
373
393
  if (context == NULL || context->count == 0)
374
394
  {
375
395
  return -1;
376
396
  }
377
397
 
378
- SoothStatistic * statistic = sooth_predictor_find_statistic(context, symbol);
398
+ SoothStatistic * statistic = sooth_predictor_find_statistic(context, event);
379
399
 
380
400
  if (statistic == NULL || statistic->count == 0)
381
401
  {
@@ -10,7 +10,7 @@
10
10
 
11
11
  typedef struct
12
12
  {
13
- uint32_t error_symbol;
13
+ uint32_t error_event;
14
14
  uint32_t contexts_size;
15
15
  SoothContext * contexts;
16
16
  }
@@ -23,11 +23,13 @@ void sooth_predictor_clear(SoothPredictor * predictor);
23
23
  void sooth_predictor_free(SoothPredictor * predictor);
24
24
  bool sooth_predictor_load(const char * const filename, SoothPredictor * predictor);
25
25
  bool sooth_predictor_save(const char * const filename, SoothPredictor * predictor);
26
- uint32_t sooth_predictor_observe(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol);
27
- uint32_t sooth_predictor_count(SoothPredictor * predictor, uint32_t bigram[2]);
28
- uint32_t sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t limit);
29
- double sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t bigram[2]);
30
- double sooth_predictor_surprise(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol);
26
+ uint32_t sooth_predictor_size(SoothPredictor * predictor, uint32_t id);
27
+ uint32_t sooth_predictor_count(SoothPredictor * predictor, uint32_t id);
28
+ uint32_t sooth_predictor_observe(SoothPredictor * predictor, uint32_t id, uint32_t symbol);
29
+ uint32_t sooth_predictor_select(SoothPredictor * predictor, uint32_t id, uint32_t limit);
30
+ SoothStatistic * sooth_predictor_distribution(SoothPredictor * predictor, uint32_t id);
31
+ double sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t id);
32
+ double sooth_predictor_surprise(SoothPredictor * predictor, uint32_t id, uint32_t symbol);
31
33
 
32
34
  //==============================================================================
33
35
 
@@ -7,13 +7,11 @@
7
7
 
8
8
  typedef struct
9
9
  {
10
- uint32_t symbol;
10
+ uint32_t event;
11
11
  uint32_t count;
12
12
  }
13
13
  SoothStatistic;
14
14
 
15
- //------------------------------------------------------------------------------
16
-
17
15
  //==============================================================================
18
16
 
19
17
  #endif
data/sooth.gemspec CHANGED
@@ -2,22 +2,21 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: sooth 1.0.3 ruby lib
5
+ # stub: sooth 2.0.0 ruby lib
6
6
  # stub: ext/sooth_native/extconf.rb
7
7
 
8
8
  Gem::Specification.new do |s|
9
9
  s.name = "sooth"
10
- s.version = "1.0.3"
10
+ s.version = "2.0.0"
11
11
 
12
12
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
13
13
  s.require_paths = ["lib"]
14
14
  s.authors = ["Jason Hutchens"]
15
- s.date = "2015-10-03"
15
+ s.date = "2016-04-06"
16
16
  s.description = "Sooth is a simple stochastic predictive model."
17
17
  s.email = "jasonhutchens@gmail.com"
18
18
  s.extensions = ["ext/sooth_native/extconf.rb"]
19
19
  s.extra_rdoc_files = [
20
- "CHANGELOG.md",
21
20
  "README.md"
22
21
  ]
23
22
  s.files = [
@@ -43,7 +42,7 @@ Gem::Specification.new do |s|
43
42
  s.homepage = "http://github.com/jasonhutchens/sooth"
44
43
  s.licenses = ["UNLICENSE"]
45
44
  s.required_ruby_version = Gem::Requirement.new("~> 2.1")
46
- s.rubygems_version = "2.4.8"
45
+ s.rubygems_version = "2.5.1"
47
46
  s.summary = "Sooth is a simple stochastic predictive model."
48
47
 
49
48
  if s.respond_to? :specification_version then
@@ -56,7 +55,7 @@ Gem::Specification.new do |s|
56
55
  s.add_development_dependency(%q<bundler>, ["~> 1.7"])
57
56
  s.add_development_dependency(%q<jeweler>, ["~> 2.0"])
58
57
  s.add_development_dependency(%q<simplecov>, ["~> 0.9"])
59
- s.add_development_dependency(%q<byebug>, ["~> 6.0"])
58
+ s.add_development_dependency(%q<byebug>, ["~> 8.2"])
60
59
  s.add_development_dependency(%q<rake-compiler>, ["~> 0.9"])
61
60
  s.add_development_dependency(%q<github_changelog_generator>, ["~> 1.4"])
62
61
  else
@@ -66,7 +65,7 @@ Gem::Specification.new do |s|
66
65
  s.add_dependency(%q<bundler>, ["~> 1.7"])
67
66
  s.add_dependency(%q<jeweler>, ["~> 2.0"])
68
67
  s.add_dependency(%q<simplecov>, ["~> 0.9"])
69
- s.add_dependency(%q<byebug>, ["~> 6.0"])
68
+ s.add_dependency(%q<byebug>, ["~> 8.2"])
70
69
  s.add_dependency(%q<rake-compiler>, ["~> 0.9"])
71
70
  s.add_dependency(%q<github_changelog_generator>, ["~> 1.4"])
72
71
  end
@@ -77,7 +76,7 @@ Gem::Specification.new do |s|
77
76
  s.add_dependency(%q<bundler>, ["~> 1.7"])
78
77
  s.add_dependency(%q<jeweler>, ["~> 2.0"])
79
78
  s.add_dependency(%q<simplecov>, ["~> 0.9"])
80
- s.add_dependency(%q<byebug>, ["~> 6.0"])
79
+ s.add_dependency(%q<byebug>, ["~> 8.2"])
81
80
  s.add_dependency(%q<rake-compiler>, ["~> 0.9"])
82
81
  s.add_dependency(%q<github_changelog_generator>, ["~> 1.4"])
83
82
  end
data/spec/memory_spec.rb CHANGED
@@ -4,32 +4,32 @@ describe Sooth::Predictor do
4
4
  let(:predictor) { Sooth::Predictor.new(42) }
5
5
 
6
6
  describe "#observe" do
7
- it "does not segfault when sorting bigrams" do
8
- expect(predictor.observe([3,3], 1)).to eq(1)
9
- expect(predictor.observe([5,5], 1)).to eq(1)
10
- expect(predictor.observe([4,4], 1)).to eq(1)
11
- expect(predictor.observe([5,5], 1)).to eq(2)
12
- expect(predictor.observe([3,3], 1)).to eq(2)
13
- expect(predictor.observe([4,4], 1)).to eq(2)
14
- expect(predictor.observe([2,2], 1)).to eq(1)
15
- expect(predictor.observe([4,4], 1)).to eq(3)
16
- expect(predictor.observe([2,2], 1)).to eq(2)
17
- expect(predictor.observe([3,3], 1)).to eq(3)
18
- expect(predictor.observe([5,5], 1)).to eq(3)
7
+ it "does not segfault when sorting contexts" do
8
+ expect(predictor.observe(3, 1)).to eq(1)
9
+ expect(predictor.observe(5, 1)).to eq(1)
10
+ expect(predictor.observe(4, 1)).to eq(1)
11
+ expect(predictor.observe(5, 1)).to eq(2)
12
+ expect(predictor.observe(3, 1)).to eq(2)
13
+ expect(predictor.observe(4, 1)).to eq(2)
14
+ expect(predictor.observe(2, 1)).to eq(1)
15
+ expect(predictor.observe(4, 1)).to eq(3)
16
+ expect(predictor.observe(2, 1)).to eq(2)
17
+ expect(predictor.observe(3, 1)).to eq(3)
18
+ expect(predictor.observe(3, 1)).to eq(4)
19
19
  end
20
20
 
21
- it "does not segfault when sorting symbols" do
22
- expect(predictor.observe([1,2], 3)).to eq(1)
23
- expect(predictor.observe([1,2], 5)).to eq(1)
24
- expect(predictor.observe([1,2], 4)).to eq(1)
25
- expect(predictor.observe([1,2], 5)).to eq(2)
26
- expect(predictor.observe([1,2], 3)).to eq(2)
27
- expect(predictor.observe([1,2], 4)).to eq(2)
28
- expect(predictor.observe([1,2], 2)).to eq(1)
29
- expect(predictor.observe([1,2], 4)).to eq(3)
30
- expect(predictor.observe([1,2], 2)).to eq(2)
31
- expect(predictor.observe([1,2], 3)).to eq(3)
32
- expect(predictor.observe([1,2], 5)).to eq(3)
21
+ it "does not segfault when sorting events" do
22
+ expect(predictor.observe(1, 3)).to eq(1)
23
+ expect(predictor.observe(1, 5)).to eq(1)
24
+ expect(predictor.observe(1, 4)).to eq(1)
25
+ expect(predictor.observe(1, 5)).to eq(2)
26
+ expect(predictor.observe(1, 3)).to eq(2)
27
+ expect(predictor.observe(1, 4)).to eq(2)
28
+ expect(predictor.observe(1, 2)).to eq(1)
29
+ expect(predictor.observe(1, 4)).to eq(3)
30
+ expect(predictor.observe(1, 2)).to eq(2)
31
+ expect(predictor.observe(1, 3)).to eq(3)
32
+ expect(predictor.observe(1, 5)).to eq(3)
33
33
  end
34
34
  end
35
35
  end