sooth 1.0.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,15 +9,13 @@
9
9
 
10
10
  typedef struct
11
11
  {
12
- uint32_t bigram[2];
12
+ uint32_t id;
13
13
  uint32_t count;
14
14
  uint32_t statistics_size;
15
15
  SoothStatistic * statistics;
16
16
  }
17
17
  SoothContext;
18
18
 
19
- //------------------------------------------------------------------------------
20
-
21
19
  //==============================================================================
22
20
 
23
21
  #endif
@@ -9,24 +9,6 @@
9
9
 
10
10
  //------------------------------------------------------------------------------
11
11
 
12
- void
13
- sooth_show_predictor(SoothPredictor * predictor)
14
- {
15
- printf("Error Symbol: %u\n", predictor->error_symbol);
16
- for (uint32_t i = 0; i < predictor->contexts_size; ++i)
17
- {
18
- SoothContext context = predictor->contexts[i];
19
- printf(" Context %u-%u (%u)\n", context.bigram[0], context.bigram[1], context.count);
20
- for (uint32_t j = 0; j < context.statistics_size; ++j)
21
- {
22
- SoothStatistic statistic = context.statistics[j];
23
- printf(" Symbol %u (%u)\n", statistic.symbol, statistic.count);
24
- }
25
- }
26
- }
27
-
28
- //------------------------------------------------------------------------------
29
-
30
12
  SoothPredictor *
31
13
  sooth_predictor_init()
32
14
  {
@@ -37,7 +19,7 @@ sooth_predictor_init()
37
19
  return NULL;
38
20
  }
39
21
 
40
- predictor->error_symbol = 0;
22
+ predictor->error_event = 0;
41
23
  predictor->contexts = NULL;
42
24
  predictor->contexts_size = 0;
43
25
 
@@ -53,8 +35,6 @@ sooth_predictor_clear(SoothPredictor * predictor)
53
35
  {
54
36
  SoothContext * context = &(predictor->contexts[i]);
55
37
  free(context->statistics);
56
- context->statistics = NULL;
57
- context->statistics_size = 0;
58
38
  }
59
39
 
60
40
  free(predictor->contexts);
@@ -83,13 +63,13 @@ sooth_predictor_save(const char * const filename, SoothPredictor * predictor)
83
63
  return false;
84
64
  }
85
65
 
86
- fwrite("MH10", 1, 4, file);
87
- fwrite(&predictor->error_symbol, 4, 1, file);
66
+ fwrite("MH11", 1, 4, file);
67
+ fwrite(&predictor->error_event, 4, 1, file);
88
68
  fwrite(&predictor->contexts_size, 4, 1, file);
89
69
  for (uint32_t i = 0; i < predictor->contexts_size; ++i)
90
70
  {
91
71
  SoothContext context = predictor->contexts[i];
92
- fwrite(context.bigram, 4, 2, file);
72
+ fwrite(&context.id, 4, 1, file);
93
73
  fwrite(&context.count, 4, 1, file);
94
74
  fwrite(&context.statistics_size, 4, 1, file);
95
75
  fwrite(context.statistics, sizeof(SoothStatistic), context.statistics_size, file);
@@ -113,14 +93,14 @@ bool sooth_predictor_load(const char * const filename, SoothPredictor * predicto
113
93
 
114
94
  char code[4];
115
95
  fread(code, 1, 4, file);
116
- if (strncmp(code, "MH10", 4) != 0)
96
+ if (strncmp(code, "MH11", 4) != 0)
117
97
  {
118
98
  return false;
119
99
  }
120
100
 
121
101
  sooth_predictor_clear(predictor);
122
102
 
123
- fread(&predictor->error_symbol, 4, 1, file);
103
+ fread(&predictor->error_event, 4, 1, file);
124
104
  fread(&predictor->contexts_size, 4, 1, file);
125
105
  if (predictor->contexts_size == 0)
126
106
  {
@@ -130,19 +110,19 @@ bool sooth_predictor_load(const char * const filename, SoothPredictor * predicto
130
110
  if (predictor->contexts == NULL)
131
111
  {
132
112
  sooth_predictor_clear(predictor);
133
- return NULL;
113
+ return false;
134
114
  }
135
115
  for (uint32_t i = 0; i < predictor->contexts_size; ++i)
136
116
  {
137
117
  SoothContext * context = &(predictor->contexts[i]);
138
- fread(context->bigram, 4, 2, file);
118
+ fread(&context->id, 4, 1, file);
139
119
  fread(&context->count, 4, 1, file);
140
120
  fread(&context->statistics_size, 4, 1, file);
141
121
  context->statistics = malloc(sizeof(SoothStatistic) * context->statistics_size);
142
122
  if (context->statistics == NULL)
143
123
  {
144
124
  sooth_predictor_clear(predictor);
145
- return NULL;
125
+ return false;
146
126
  }
147
127
  fread(context->statistics, sizeof(SoothStatistic), context->statistics_size, file);
148
128
  }
@@ -152,7 +132,7 @@ bool sooth_predictor_load(const char * const filename, SoothPredictor * predicto
152
132
 
153
133
  //------------------------------------------------------------------------------
154
134
  SoothContext *
155
- sooth_predictor_find_context(SoothPredictor * predictor, uint32_t bigram[2])
135
+ sooth_predictor_find_context(SoothPredictor * predictor, uint32_t id)
156
136
  {
157
137
  SoothContext * context = NULL;
158
138
  uint32_t mid = 0;
@@ -166,11 +146,11 @@ sooth_predictor_find_context(SoothPredictor * predictor, uint32_t bigram[2])
166
146
  {
167
147
  mid = low + (high - low) / 2;
168
148
  context = &(predictor->contexts[mid]);
169
- if (context->bigram[0] < bigram[0] || (context->bigram[0] == bigram[0] && context->bigram[1] < bigram[1]))
149
+ if (context->id < id)
170
150
  {
171
151
  low = mid + 1;
172
152
  }
173
- else if (context->bigram[0] > bigram[0] || (context->bigram[0] == bigram[0] && context->bigram[1] > bigram[1]))
153
+ else if (context->id > id)
174
154
  {
175
155
  if (mid == 0)
176
156
  {
@@ -203,8 +183,7 @@ sooth_predictor_find_context(SoothPredictor * predictor, uint32_t bigram[2])
203
183
  }
204
184
 
205
185
  context = &(predictor->contexts[mid]);
206
- context->bigram[0] = bigram[0];
207
- context->bigram[1] = bigram[1];
186
+ context->id = id;
208
187
  context->count = 0;
209
188
  context->statistics_size = 0;
210
189
  context->statistics = NULL;
@@ -215,7 +194,7 @@ sooth_predictor_find_context(SoothPredictor * predictor, uint32_t bigram[2])
215
194
  //------------------------------------------------------------------------------
216
195
 
217
196
  SoothStatistic *
218
- sooth_predictor_find_statistic(SoothContext * context, uint32_t symbol)
197
+ sooth_predictor_find_statistic(SoothContext * context, uint32_t event)
219
198
  {
220
199
  SoothStatistic * statistic = NULL;
221
200
  uint32_t mid = 0;
@@ -229,11 +208,11 @@ sooth_predictor_find_statistic(SoothContext * context, uint32_t symbol)
229
208
  {
230
209
  mid = low + (high - low) / 2;
231
210
  statistic = &(context->statistics[mid]);
232
- if (statistic->symbol < symbol)
211
+ if (statistic->event < event)
233
212
  {
234
213
  low = mid + 1;
235
214
  }
236
- else if (statistic->symbol > symbol)
215
+ else if (statistic->event > event)
237
216
  {
238
217
  if (mid == 0)
239
218
  {
@@ -266,7 +245,7 @@ sooth_predictor_find_statistic(SoothContext * context, uint32_t symbol)
266
245
  }
267
246
 
268
247
  statistic = &(context->statistics[mid]);
269
- statistic->symbol = symbol;
248
+ statistic->event = event;
270
249
  statistic->count = 0;
271
250
 
272
251
  return statistic;
@@ -275,53 +254,79 @@ sooth_predictor_find_statistic(SoothContext * context, uint32_t symbol)
275
254
  //------------------------------------------------------------------------------
276
255
 
277
256
  uint32_t
278
- sooth_predictor_observe(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol)
257
+ sooth_predictor_size(SoothPredictor * predictor, uint32_t id)
279
258
  {
280
- SoothContext * context = sooth_predictor_find_context(predictor, bigram);
281
-
259
+ SoothContext * context = sooth_predictor_find_context(predictor, id);
260
+
282
261
  if (context == NULL)
283
262
  {
284
263
  return 0;
285
264
  }
286
265
 
287
- SoothStatistic * statistic = sooth_predictor_find_statistic(context, symbol);
266
+ return context->statistics_size;
267
+ }
288
268
 
289
- if (statistic == NULL)
269
+ //------------------------------------------------------------------------------
270
+
271
+ uint32_t
272
+ sooth_predictor_count(SoothPredictor * predictor, uint32_t id)
273
+ {
274
+ SoothContext * context = sooth_predictor_find_context(predictor, id);
275
+
276
+ if (context == NULL)
290
277
  {
291
278
  return 0;
292
279
  }
293
280
 
294
- statistic->count += 1;
295
- context->count += 1;
296
-
297
- return statistic->count;
281
+ return context->count;
298
282
  }
299
283
 
300
284
  //------------------------------------------------------------------------------
301
285
 
302
286
  uint32_t
303
- sooth_predictor_count(SoothPredictor * predictor, uint32_t bigram[2])
287
+ sooth_predictor_observe(SoothPredictor * predictor, uint32_t id, uint32_t event)
304
288
  {
305
- SoothContext * context = sooth_predictor_find_context(predictor, bigram);
306
-
289
+ SoothContext * context = sooth_predictor_find_context(predictor, id);
290
+
307
291
  if (context == NULL)
308
292
  {
309
293
  return 0;
310
294
  }
311
295
 
312
- return context->count;
296
+ if (context->count == UINT32_MAX)
297
+ {
298
+ context->count = 0;
299
+ for (uint32_t i = 0; i < context->statistics_size; ++i)
300
+ {
301
+ SoothStatistic statistic = context->statistics[i];
302
+ statistic.count /= 2;
303
+ context->count += statistic.count;
304
+ }
305
+ }
306
+
307
+ SoothStatistic * statistic = sooth_predictor_find_statistic(context, event);
308
+
309
+ if (statistic == NULL)
310
+ {
311
+ return 0;
312
+ }
313
+
314
+ statistic->count += 1;
315
+ context->count += 1;
316
+
317
+ return statistic->count;
313
318
  }
314
319
 
315
320
  //------------------------------------------------------------------------------
316
321
 
317
322
  uint32_t
318
- sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t limit)
323
+ sooth_predictor_select(SoothPredictor * predictor, uint32_t id, uint32_t limit)
319
324
  {
320
- SoothContext * context = sooth_predictor_find_context(predictor, bigram);
325
+ SoothContext * context = sooth_predictor_find_context(predictor, id);
321
326
 
322
- if (context == NULL || limit == 0)
327
+ if (context == NULL || limit == 0 || limit > context->count)
323
328
  {
324
- return predictor->error_symbol;
329
+ return predictor->error_event;
325
330
  }
326
331
 
327
332
  for (uint32_t i = 0; i < context->statistics_size; ++i)
@@ -332,18 +337,33 @@ sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t
332
337
  limit -= statistic.count;
333
338
  continue;
334
339
  }
335
- return statistic.symbol;
340
+ return statistic.event;
341
+ }
342
+
343
+ return predictor->error_event;
344
+ }
345
+
346
+ //------------------------------------------------------------------------------
347
+
348
+ SoothStatistic *
349
+ sooth_predictor_distribution(SoothPredictor * predictor, uint32_t id)
350
+ {
351
+ SoothContext * context = sooth_predictor_find_context(predictor, id);
352
+
353
+ if (context == NULL)
354
+ {
355
+ return NULL;
336
356
  }
337
357
 
338
- return predictor->error_symbol;
358
+ return context->statistics;
339
359
  }
340
360
 
341
361
  //------------------------------------------------------------------------------
342
362
 
343
363
  double
344
- sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t bigram[2])
364
+ sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t id)
345
365
  {
346
- SoothContext * context = sooth_predictor_find_context(predictor, bigram);
366
+ SoothContext * context = sooth_predictor_find_context(predictor, id);
347
367
 
348
368
  if (context == NULL || context->count == 0)
349
369
  {
@@ -366,16 +386,16 @@ sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t bigram[2])
366
386
  //------------------------------------------------------------------------------
367
387
 
368
388
  double
369
- sooth_predictor_surprise(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol)
389
+ sooth_predictor_surprise(SoothPredictor * predictor, uint32_t id, uint32_t event)
370
390
  {
371
- SoothContext * context = sooth_predictor_find_context(predictor, bigram);
391
+ SoothContext * context = sooth_predictor_find_context(predictor, id);
372
392
 
373
393
  if (context == NULL || context->count == 0)
374
394
  {
375
395
  return -1;
376
396
  }
377
397
 
378
- SoothStatistic * statistic = sooth_predictor_find_statistic(context, symbol);
398
+ SoothStatistic * statistic = sooth_predictor_find_statistic(context, event);
379
399
 
380
400
  if (statistic == NULL || statistic->count == 0)
381
401
  {
@@ -10,7 +10,7 @@
10
10
 
11
11
  typedef struct
12
12
  {
13
- uint32_t error_symbol;
13
+ uint32_t error_event;
14
14
  uint32_t contexts_size;
15
15
  SoothContext * contexts;
16
16
  }
@@ -23,11 +23,13 @@ void sooth_predictor_clear(SoothPredictor * predictor);
23
23
  void sooth_predictor_free(SoothPredictor * predictor);
24
24
  bool sooth_predictor_load(const char * const filename, SoothPredictor * predictor);
25
25
  bool sooth_predictor_save(const char * const filename, SoothPredictor * predictor);
26
- uint32_t sooth_predictor_observe(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol);
27
- uint32_t sooth_predictor_count(SoothPredictor * predictor, uint32_t bigram[2]);
28
- uint32_t sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t limit);
29
- double sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t bigram[2]);
30
- double sooth_predictor_surprise(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol);
26
+ uint32_t sooth_predictor_size(SoothPredictor * predictor, uint32_t id);
27
+ uint32_t sooth_predictor_count(SoothPredictor * predictor, uint32_t id);
28
+ uint32_t sooth_predictor_observe(SoothPredictor * predictor, uint32_t id, uint32_t symbol);
29
+ uint32_t sooth_predictor_select(SoothPredictor * predictor, uint32_t id, uint32_t limit);
30
+ SoothStatistic * sooth_predictor_distribution(SoothPredictor * predictor, uint32_t id);
31
+ double sooth_predictor_uncertainty(SoothPredictor * predictor, uint32_t id);
32
+ double sooth_predictor_surprise(SoothPredictor * predictor, uint32_t id, uint32_t symbol);
31
33
 
32
34
  //==============================================================================
33
35
 
@@ -7,13 +7,11 @@
7
7
 
8
8
  typedef struct
9
9
  {
10
- uint32_t symbol;
10
+ uint32_t event;
11
11
  uint32_t count;
12
12
  }
13
13
  SoothStatistic;
14
14
 
15
- //------------------------------------------------------------------------------
16
-
17
15
  //==============================================================================
18
16
 
19
17
  #endif
data/sooth.gemspec CHANGED
@@ -2,22 +2,21 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: sooth 1.0.3 ruby lib
5
+ # stub: sooth 2.0.0 ruby lib
6
6
  # stub: ext/sooth_native/extconf.rb
7
7
 
8
8
  Gem::Specification.new do |s|
9
9
  s.name = "sooth"
10
- s.version = "1.0.3"
10
+ s.version = "2.0.0"
11
11
 
12
12
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
13
13
  s.require_paths = ["lib"]
14
14
  s.authors = ["Jason Hutchens"]
15
- s.date = "2015-10-03"
15
+ s.date = "2016-04-06"
16
16
  s.description = "Sooth is a simple stochastic predictive model."
17
17
  s.email = "jasonhutchens@gmail.com"
18
18
  s.extensions = ["ext/sooth_native/extconf.rb"]
19
19
  s.extra_rdoc_files = [
20
- "CHANGELOG.md",
21
20
  "README.md"
22
21
  ]
23
22
  s.files = [
@@ -43,7 +42,7 @@ Gem::Specification.new do |s|
43
42
  s.homepage = "http://github.com/jasonhutchens/sooth"
44
43
  s.licenses = ["UNLICENSE"]
45
44
  s.required_ruby_version = Gem::Requirement.new("~> 2.1")
46
- s.rubygems_version = "2.4.8"
45
+ s.rubygems_version = "2.5.1"
47
46
  s.summary = "Sooth is a simple stochastic predictive model."
48
47
 
49
48
  if s.respond_to? :specification_version then
@@ -56,7 +55,7 @@ Gem::Specification.new do |s|
56
55
  s.add_development_dependency(%q<bundler>, ["~> 1.7"])
57
56
  s.add_development_dependency(%q<jeweler>, ["~> 2.0"])
58
57
  s.add_development_dependency(%q<simplecov>, ["~> 0.9"])
59
- s.add_development_dependency(%q<byebug>, ["~> 6.0"])
58
+ s.add_development_dependency(%q<byebug>, ["~> 8.2"])
60
59
  s.add_development_dependency(%q<rake-compiler>, ["~> 0.9"])
61
60
  s.add_development_dependency(%q<github_changelog_generator>, ["~> 1.4"])
62
61
  else
@@ -66,7 +65,7 @@ Gem::Specification.new do |s|
66
65
  s.add_dependency(%q<bundler>, ["~> 1.7"])
67
66
  s.add_dependency(%q<jeweler>, ["~> 2.0"])
68
67
  s.add_dependency(%q<simplecov>, ["~> 0.9"])
69
- s.add_dependency(%q<byebug>, ["~> 6.0"])
68
+ s.add_dependency(%q<byebug>, ["~> 8.2"])
70
69
  s.add_dependency(%q<rake-compiler>, ["~> 0.9"])
71
70
  s.add_dependency(%q<github_changelog_generator>, ["~> 1.4"])
72
71
  end
@@ -77,7 +76,7 @@ Gem::Specification.new do |s|
77
76
  s.add_dependency(%q<bundler>, ["~> 1.7"])
78
77
  s.add_dependency(%q<jeweler>, ["~> 2.0"])
79
78
  s.add_dependency(%q<simplecov>, ["~> 0.9"])
80
- s.add_dependency(%q<byebug>, ["~> 6.0"])
79
+ s.add_dependency(%q<byebug>, ["~> 8.2"])
81
80
  s.add_dependency(%q<rake-compiler>, ["~> 0.9"])
82
81
  s.add_dependency(%q<github_changelog_generator>, ["~> 1.4"])
83
82
  end
data/spec/memory_spec.rb CHANGED
@@ -4,32 +4,32 @@ describe Sooth::Predictor do
4
4
  let(:predictor) { Sooth::Predictor.new(42) }
5
5
 
6
6
  describe "#observe" do
7
- it "does not segfault when sorting bigrams" do
8
- expect(predictor.observe([3,3], 1)).to eq(1)
9
- expect(predictor.observe([5,5], 1)).to eq(1)
10
- expect(predictor.observe([4,4], 1)).to eq(1)
11
- expect(predictor.observe([5,5], 1)).to eq(2)
12
- expect(predictor.observe([3,3], 1)).to eq(2)
13
- expect(predictor.observe([4,4], 1)).to eq(2)
14
- expect(predictor.observe([2,2], 1)).to eq(1)
15
- expect(predictor.observe([4,4], 1)).to eq(3)
16
- expect(predictor.observe([2,2], 1)).to eq(2)
17
- expect(predictor.observe([3,3], 1)).to eq(3)
18
- expect(predictor.observe([5,5], 1)).to eq(3)
7
+ it "does not segfault when sorting contexts" do
8
+ expect(predictor.observe(3, 1)).to eq(1)
9
+ expect(predictor.observe(5, 1)).to eq(1)
10
+ expect(predictor.observe(4, 1)).to eq(1)
11
+ expect(predictor.observe(5, 1)).to eq(2)
12
+ expect(predictor.observe(3, 1)).to eq(2)
13
+ expect(predictor.observe(4, 1)).to eq(2)
14
+ expect(predictor.observe(2, 1)).to eq(1)
15
+ expect(predictor.observe(4, 1)).to eq(3)
16
+ expect(predictor.observe(2, 1)).to eq(2)
17
+ expect(predictor.observe(3, 1)).to eq(3)
18
+ expect(predictor.observe(3, 1)).to eq(4)
19
19
  end
20
20
 
21
- it "does not segfault when sorting symbols" do
22
- expect(predictor.observe([1,2], 3)).to eq(1)
23
- expect(predictor.observe([1,2], 5)).to eq(1)
24
- expect(predictor.observe([1,2], 4)).to eq(1)
25
- expect(predictor.observe([1,2], 5)).to eq(2)
26
- expect(predictor.observe([1,2], 3)).to eq(2)
27
- expect(predictor.observe([1,2], 4)).to eq(2)
28
- expect(predictor.observe([1,2], 2)).to eq(1)
29
- expect(predictor.observe([1,2], 4)).to eq(3)
30
- expect(predictor.observe([1,2], 2)).to eq(2)
31
- expect(predictor.observe([1,2], 3)).to eq(3)
32
- expect(predictor.observe([1,2], 5)).to eq(3)
21
+ it "does not segfault when sorting events" do
22
+ expect(predictor.observe(1, 3)).to eq(1)
23
+ expect(predictor.observe(1, 5)).to eq(1)
24
+ expect(predictor.observe(1, 4)).to eq(1)
25
+ expect(predictor.observe(1, 5)).to eq(2)
26
+ expect(predictor.observe(1, 3)).to eq(2)
27
+ expect(predictor.observe(1, 4)).to eq(2)
28
+ expect(predictor.observe(1, 2)).to eq(1)
29
+ expect(predictor.observe(1, 4)).to eq(3)
30
+ expect(predictor.observe(1, 2)).to eq(2)
31
+ expect(predictor.observe(1, 3)).to eq(3)
32
+ expect(predictor.observe(1, 5)).to eq(3)
33
33
  end
34
34
  end
35
35
  end