sooth 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/ext/sooth_native/native.c +15 -1
- data/ext/sooth_native/sooth_context.h +1 -1
- data/ext/sooth_native/sooth_predictor.c +95 -17
- data/ext/sooth_native/sooth_predictor.h +6 -6
- data/sooth.gemspec +3 -3
- data/spec/predictor_spec.rb +32 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 27347b90bbbfd21db684190405f633f9932cff4a
|
4
|
+
data.tar.gz: 122ebc71eefabd6d1d95ad8ef15712033bcb9b9f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4462abb25b5f6c0a719be89b2e20cfe477f990757a54ae69f427cc3280938178f89cdfab01b2e9e605d414b83d17702e6a764c524669fec0b7204a85efad7db0
|
7
|
+
data.tar.gz: fccf92fb019587081ed45645cd2effe4028e81bdd69ee3e38b1528b5e1eca20d1f2d4dc455f2c17b915345c4e3fbaf7f340b627e6fc2f2fe014a58a2c42b7cc5
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.0
|
data/ext/sooth_native/native.c
CHANGED
@@ -174,6 +174,13 @@ method_sooth_native_clear(VALUE self)
|
|
174
174
|
VALUE
|
175
175
|
method_sooth_native_load(VALUE self, VALUE filename)
|
176
176
|
{
|
177
|
+
SoothPredictor * predictor = NULL;
|
178
|
+
Check_Type(filename, T_STRING);
|
179
|
+
Data_Get_Struct(self, SoothPredictor, predictor);
|
180
|
+
if (!sooth_predictor_load(RSTRING_PTR(filename), predictor))
|
181
|
+
{
|
182
|
+
rb_raise(rb_eIOError, "problem loading predictor");
|
183
|
+
}
|
177
184
|
return Qnil;
|
178
185
|
}
|
179
186
|
|
@@ -182,6 +189,13 @@ method_sooth_native_load(VALUE self, VALUE filename)
|
|
182
189
|
VALUE
|
183
190
|
method_sooth_native_save(VALUE self, VALUE filename)
|
184
191
|
{
|
192
|
+
SoothPredictor * predictor = NULL;
|
193
|
+
Check_Type(filename, T_STRING);
|
194
|
+
Data_Get_Struct(self, SoothPredictor, predictor);
|
195
|
+
if (!sooth_predictor_save(RSTRING_PTR(filename), predictor))
|
196
|
+
{
|
197
|
+
rb_raise(rb_eIOError, "problem saving predictor");
|
198
|
+
}
|
185
199
|
return Qnil;
|
186
200
|
}
|
187
201
|
|
@@ -220,7 +234,7 @@ method_sooth_native_count(VALUE self, VALUE bigram)
|
|
220
234
|
Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
|
221
235
|
Data_Get_Struct(self, SoothPredictor, predictor);
|
222
236
|
uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
|
223
|
-
|
237
|
+
uint32_t count = sooth_predictor_count(predictor, c_bigram);
|
224
238
|
return UINT2NUM(count);
|
225
239
|
}
|
226
240
|
|
@@ -1,5 +1,6 @@
|
|
1
1
|
//==============================================================================
|
2
2
|
|
3
|
+
#include <stdio.h>
|
3
4
|
#include <stdlib.h>
|
4
5
|
#include <string.h>
|
5
6
|
|
@@ -7,6 +8,24 @@
|
|
7
8
|
|
8
9
|
//------------------------------------------------------------------------------
|
9
10
|
|
11
|
+
void
|
12
|
+
sooth_show_predictor(SoothPredictor * predictor)
|
13
|
+
{
|
14
|
+
printf("Error Symbol: %u\n", predictor->error_symbol);
|
15
|
+
for (uint32_t i = 0; i < predictor->contexts_size; ++i)
|
16
|
+
{
|
17
|
+
SoothContext context = predictor->contexts[i];
|
18
|
+
printf(" Context %u-%u (%u)\n", context.bigram[0], context.bigram[1], context.count);
|
19
|
+
for (uint32_t j = 0; j < context.statistics_size; ++j)
|
20
|
+
{
|
21
|
+
SoothStatistic statistic = context.statistics[j];
|
22
|
+
printf(" Symbol %u (%u)\n", statistic.symbol, statistic.count);
|
23
|
+
}
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
//------------------------------------------------------------------------------
|
28
|
+
|
10
29
|
SoothPredictor *
|
11
30
|
sooth_predictor_init()
|
12
31
|
{
|
@@ -29,7 +48,7 @@ sooth_predictor_init()
|
|
29
48
|
void
|
30
49
|
sooth_predictor_clear(SoothPredictor * predictor)
|
31
50
|
{
|
32
|
-
for (
|
51
|
+
for (uint32_t i = 0; i < predictor->contexts_size; ++i)
|
33
52
|
{
|
34
53
|
SoothContext * context = &(predictor->contexts[i]);
|
35
54
|
free(context->statistics);
|
@@ -53,22 +72,81 @@ sooth_predictor_free(SoothPredictor * predictor)
|
|
53
72
|
|
54
73
|
//------------------------------------------------------------------------------
|
55
74
|
|
56
|
-
|
75
|
+
bool
|
57
76
|
sooth_predictor_save(const char * const filename, SoothPredictor * predictor)
|
58
77
|
{
|
59
|
-
|
78
|
+
FILE *file = fopen(filename, "wb");
|
60
79
|
|
61
|
-
|
80
|
+
if (file == NULL)
|
81
|
+
{
|
82
|
+
return false;
|
83
|
+
}
|
62
84
|
|
63
|
-
|
64
|
-
|
85
|
+
fwrite("MH10", 1, 4, file);
|
86
|
+
fwrite(&predictor->error_symbol, 4, 1, file);
|
87
|
+
fwrite(&predictor->contexts_size, 4, 1, file);
|
88
|
+
for (uint32_t i = 0; i < predictor->contexts_size; ++i)
|
89
|
+
{
|
90
|
+
SoothContext context = predictor->contexts[i];
|
91
|
+
fwrite(context.bigram, 4, 2, file);
|
92
|
+
fwrite(&context.count, 4, 1, file);
|
93
|
+
fwrite(&context.statistics_size, 4, 1, file);
|
94
|
+
fwrite(context.statistics, sizeof(SoothStatistic), context.statistics_size, file);
|
95
|
+
}
|
96
|
+
|
97
|
+
fclose(file);
|
98
|
+
|
99
|
+
return true;
|
65
100
|
}
|
101
|
+
|
66
102
|
//------------------------------------------------------------------------------
|
67
103
|
|
68
|
-
|
104
|
+
bool sooth_predictor_load(const char * const filename, SoothPredictor * predictor)
|
69
105
|
{
|
106
|
+
FILE *file = fopen(filename, "rb");
|
107
|
+
|
108
|
+
if (file == NULL)
|
109
|
+
{
|
110
|
+
return false;
|
111
|
+
}
|
112
|
+
|
113
|
+
char code[4];
|
114
|
+
fread(code, 1, 4, file);
|
115
|
+
if (strncmp(code, "MH10", 4) != 0)
|
116
|
+
{
|
117
|
+
return false;
|
118
|
+
}
|
119
|
+
|
70
120
|
sooth_predictor_clear(predictor);
|
71
|
-
|
121
|
+
|
122
|
+
fread(&predictor->error_symbol, 4, 1, file);
|
123
|
+
fread(&predictor->contexts_size, 4, 1, file);
|
124
|
+
if (predictor->contexts_size == 0)
|
125
|
+
{
|
126
|
+
return true;
|
127
|
+
}
|
128
|
+
predictor->contexts = malloc(sizeof(SoothContext) * predictor->contexts_size);
|
129
|
+
if (predictor->contexts == NULL)
|
130
|
+
{
|
131
|
+
sooth_predictor_clear(predictor);
|
132
|
+
return NULL;
|
133
|
+
}
|
134
|
+
for (uint32_t i = 0; i < predictor->contexts_size; ++i)
|
135
|
+
{
|
136
|
+
SoothContext * context = &(predictor->contexts[i]);
|
137
|
+
fread(context->bigram, 4, 2, file);
|
138
|
+
fread(&context->count, 4, 1, file);
|
139
|
+
fread(&context->statistics_size, 4, 1, file);
|
140
|
+
context->statistics = malloc(sizeof(SoothStatistic) * context->statistics_size);
|
141
|
+
if (context->statistics == NULL)
|
142
|
+
{
|
143
|
+
sooth_predictor_clear(predictor);
|
144
|
+
return NULL;
|
145
|
+
}
|
146
|
+
fread(context->statistics, sizeof(SoothStatistic), context->statistics_size, file);
|
147
|
+
}
|
148
|
+
|
149
|
+
return true;
|
72
150
|
}
|
73
151
|
|
74
152
|
//------------------------------------------------------------------------------
|
@@ -76,12 +154,12 @@ SoothContext *
|
|
76
154
|
sooth_predictor_find_context(SoothPredictor * predictor, uint32_t bigram[2])
|
77
155
|
{
|
78
156
|
SoothContext * context = NULL;
|
79
|
-
|
157
|
+
uint32_t mid = 0;
|
80
158
|
|
81
159
|
if (predictor->contexts_size > 0)
|
82
160
|
{
|
83
|
-
|
84
|
-
|
161
|
+
uint32_t low = 0;
|
162
|
+
uint32_t high = predictor->contexts_size - 1;
|
85
163
|
|
86
164
|
while (low <= high)
|
87
165
|
{
|
@@ -139,12 +217,12 @@ SoothStatistic *
|
|
139
217
|
sooth_predictor_find_statistic(SoothContext * context, uint32_t symbol)
|
140
218
|
{
|
141
219
|
SoothStatistic * statistic = NULL;
|
142
|
-
|
220
|
+
uint32_t mid = 0;
|
143
221
|
|
144
222
|
if (context->statistics_size > 0)
|
145
223
|
{
|
146
|
-
|
147
|
-
|
224
|
+
uint32_t low = 0;
|
225
|
+
uint32_t high = context->statistics_size - 1;
|
148
226
|
|
149
227
|
while (low <= high)
|
150
228
|
{
|
@@ -220,7 +298,7 @@ sooth_predictor_observe(SoothPredictor * predictor, uint32_t bigram[2], uint32_t
|
|
220
298
|
|
221
299
|
//------------------------------------------------------------------------------
|
222
300
|
|
223
|
-
|
301
|
+
uint32_t
|
224
302
|
sooth_predictor_count(SoothPredictor * predictor, uint32_t bigram[2])
|
225
303
|
{
|
226
304
|
SoothContext * context = sooth_predictor_find_context(predictor, bigram);
|
@@ -236,7 +314,7 @@ sooth_predictor_count(SoothPredictor * predictor, uint32_t bigram[2])
|
|
236
314
|
//------------------------------------------------------------------------------
|
237
315
|
|
238
316
|
uint32_t
|
239
|
-
sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2],
|
317
|
+
sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t limit)
|
240
318
|
{
|
241
319
|
SoothContext * context = sooth_predictor_find_context(predictor, bigram);
|
242
320
|
|
@@ -245,7 +323,7 @@ sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint64_t
|
|
245
323
|
return predictor->error_symbol;
|
246
324
|
}
|
247
325
|
|
248
|
-
for (
|
326
|
+
for (uint32_t i = 0; i < context->statistics_size; ++i)
|
249
327
|
{
|
250
328
|
SoothStatistic statistic = context->statistics[i];
|
251
329
|
if (limit > statistic.count)
|
@@ -4,13 +4,14 @@
|
|
4
4
|
//==============================================================================
|
5
5
|
|
6
6
|
#include <stdint.h>
|
7
|
+
#include <stdbool.h>
|
7
8
|
|
8
9
|
#include "sooth_context.h"
|
9
10
|
|
10
11
|
typedef struct
|
11
12
|
{
|
12
13
|
uint32_t error_symbol;
|
13
|
-
|
14
|
+
uint32_t contexts_size;
|
14
15
|
SoothContext * contexts;
|
15
16
|
}
|
16
17
|
SoothPredictor;
|
@@ -20,12 +21,11 @@ SoothPredictor;
|
|
20
21
|
SoothPredictor * sooth_predictor_init();
|
21
22
|
void sooth_predictor_clear(SoothPredictor * predictor);
|
22
23
|
void sooth_predictor_free(SoothPredictor * predictor);
|
23
|
-
|
24
|
-
|
25
|
-
void sooth_predictor_save(const char * const filename, SoothPredictor * predictor);
|
24
|
+
bool sooth_predictor_load(const char * const filename, SoothPredictor * predictor);
|
25
|
+
bool sooth_predictor_save(const char * const filename, SoothPredictor * predictor);
|
26
26
|
uint32_t sooth_predictor_observe(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol);
|
27
|
-
|
28
|
-
uint32_t sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2],
|
27
|
+
uint32_t sooth_predictor_count(SoothPredictor * predictor, uint32_t bigram[2]);
|
28
|
+
uint32_t sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t limit);
|
29
29
|
|
30
30
|
//==============================================================================
|
31
31
|
|
data/sooth.gemspec
CHANGED
@@ -2,17 +2,17 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: sooth 0.
|
5
|
+
# stub: sooth 0.4.0 ruby lib
|
6
6
|
# stub: ext/sooth_native/extconf.rb
|
7
7
|
|
8
8
|
Gem::Specification.new do |s|
|
9
9
|
s.name = "sooth"
|
10
|
-
s.version = "0.
|
10
|
+
s.version = "0.4.0"
|
11
11
|
|
12
12
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
13
13
|
s.require_paths = ["lib"]
|
14
14
|
s.authors = ["Jason Hutchens"]
|
15
|
-
s.date = "2014-12-
|
15
|
+
s.date = "2014-12-16"
|
16
16
|
s.description = "Sooth is a simple stochastic predictive model."
|
17
17
|
s.email = "jasonhutchens@gmail.com"
|
18
18
|
s.extensions = ["ext/sooth_native/extconf.rb"]
|
data/spec/predictor_spec.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
|
1
3
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
4
|
|
3
5
|
describe Sooth::Predictor do
|
@@ -102,4 +104,34 @@ describe Sooth::Predictor do
|
|
102
104
|
end
|
103
105
|
|
104
106
|
end
|
107
|
+
|
108
|
+
describe "#save" do
|
109
|
+
|
110
|
+
it "can save a file and load it back again" do
|
111
|
+
begin
|
112
|
+
file = Tempfile.new('sooth_spec')
|
113
|
+
expect(predictor.observe([1,2], 3)).to eq(1)
|
114
|
+
expect(predictor.observe([2,1], 3)).to eq(1)
|
115
|
+
expect(predictor.observe([1,2], 3)).to eq(2)
|
116
|
+
expect(predictor.observe([1,2], 3)).to eq(3)
|
117
|
+
expect { predictor.save(file.path) } .to_not raise_error
|
118
|
+
expect(predictor.count([1,2])).to eq(3)
|
119
|
+
expect(predictor.count([2,1])).to eq(1)
|
120
|
+
predictor.clear
|
121
|
+
expect(predictor.count([1,2])).to eq(0)
|
122
|
+
expect(predictor.count([2,1])).to eq(0)
|
123
|
+
expect { predictor.load(file.path) }.to_not raise_error
|
124
|
+
expect(predictor.count([1,2])).to eq(3)
|
125
|
+
expect(predictor.count([2,1])).to eq(1)
|
126
|
+
expect(predictor.observe([1,2], 3)).to eq(4)
|
127
|
+
expect(predictor.observe([1,2], 1)).to eq(1)
|
128
|
+
expect(predictor.observe([2,1], 3)).to eq(2)
|
129
|
+
expect(predictor.observe([2,1], 1)).to eq(1)
|
130
|
+
ensure
|
131
|
+
file.close
|
132
|
+
file.unlink
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
105
137
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sooth
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jason Hutchens
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-12-
|
11
|
+
date: 2014-12-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|