sooth 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/ext/sooth_native/native.c +15 -1
- data/ext/sooth_native/sooth_context.h +1 -1
- data/ext/sooth_native/sooth_predictor.c +95 -17
- data/ext/sooth_native/sooth_predictor.h +6 -6
- data/sooth.gemspec +3 -3
- data/spec/predictor_spec.rb +32 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 27347b90bbbfd21db684190405f633f9932cff4a
|
4
|
+
data.tar.gz: 122ebc71eefabd6d1d95ad8ef15712033bcb9b9f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4462abb25b5f6c0a719be89b2e20cfe477f990757a54ae69f427cc3280938178f89cdfab01b2e9e605d414b83d17702e6a764c524669fec0b7204a85efad7db0
|
7
|
+
data.tar.gz: fccf92fb019587081ed45645cd2effe4028e81bdd69ee3e38b1528b5e1eca20d1f2d4dc455f2c17b915345c4e3fbaf7f340b627e6fc2f2fe014a58a2c42b7cc5
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.0
|
data/ext/sooth_native/native.c
CHANGED
@@ -174,6 +174,13 @@ method_sooth_native_clear(VALUE self)
|
|
174
174
|
VALUE
|
175
175
|
method_sooth_native_load(VALUE self, VALUE filename)
|
176
176
|
{
|
177
|
+
SoothPredictor * predictor = NULL;
|
178
|
+
Check_Type(filename, T_STRING);
|
179
|
+
Data_Get_Struct(self, SoothPredictor, predictor);
|
180
|
+
if (!sooth_predictor_load(RSTRING_PTR(filename), predictor))
|
181
|
+
{
|
182
|
+
rb_raise(rb_eIOError, "problem loading predictor");
|
183
|
+
}
|
177
184
|
return Qnil;
|
178
185
|
}
|
179
186
|
|
@@ -182,6 +189,13 @@ method_sooth_native_load(VALUE self, VALUE filename)
|
|
182
189
|
VALUE
|
183
190
|
method_sooth_native_save(VALUE self, VALUE filename)
|
184
191
|
{
|
192
|
+
SoothPredictor * predictor = NULL;
|
193
|
+
Check_Type(filename, T_STRING);
|
194
|
+
Data_Get_Struct(self, SoothPredictor, predictor);
|
195
|
+
if (!sooth_predictor_save(RSTRING_PTR(filename), predictor))
|
196
|
+
{
|
197
|
+
rb_raise(rb_eIOError, "problem saving predictor");
|
198
|
+
}
|
185
199
|
return Qnil;
|
186
200
|
}
|
187
201
|
|
@@ -220,7 +234,7 @@ method_sooth_native_count(VALUE self, VALUE bigram)
|
|
220
234
|
Check_Type(RARRAY_PTR(bigram)[1], T_FIXNUM);
|
221
235
|
Data_Get_Struct(self, SoothPredictor, predictor);
|
222
236
|
uint32_t c_bigram[2] = {NUM2UINT(RARRAY_PTR(bigram)[0]), NUM2UINT(RARRAY_PTR(bigram)[1])};
|
223
|
-
|
237
|
+
uint32_t count = sooth_predictor_count(predictor, c_bigram);
|
224
238
|
return UINT2NUM(count);
|
225
239
|
}
|
226
240
|
|
@@ -1,5 +1,6 @@
|
|
1
1
|
//==============================================================================
|
2
2
|
|
3
|
+
#include <stdio.h>
|
3
4
|
#include <stdlib.h>
|
4
5
|
#include <string.h>
|
5
6
|
|
@@ -7,6 +8,24 @@
|
|
7
8
|
|
8
9
|
//------------------------------------------------------------------------------
|
9
10
|
|
11
|
+
void
|
12
|
+
sooth_show_predictor(SoothPredictor * predictor)
|
13
|
+
{
|
14
|
+
printf("Error Symbol: %u\n", predictor->error_symbol);
|
15
|
+
for (uint32_t i = 0; i < predictor->contexts_size; ++i)
|
16
|
+
{
|
17
|
+
SoothContext context = predictor->contexts[i];
|
18
|
+
printf(" Context %u-%u (%u)\n", context.bigram[0], context.bigram[1], context.count);
|
19
|
+
for (uint32_t j = 0; j < context.statistics_size; ++j)
|
20
|
+
{
|
21
|
+
SoothStatistic statistic = context.statistics[j];
|
22
|
+
printf(" Symbol %u (%u)\n", statistic.symbol, statistic.count);
|
23
|
+
}
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
//------------------------------------------------------------------------------
|
28
|
+
|
10
29
|
SoothPredictor *
|
11
30
|
sooth_predictor_init()
|
12
31
|
{
|
@@ -29,7 +48,7 @@ sooth_predictor_init()
|
|
29
48
|
void
|
30
49
|
sooth_predictor_clear(SoothPredictor * predictor)
|
31
50
|
{
|
32
|
-
for (
|
51
|
+
for (uint32_t i = 0; i < predictor->contexts_size; ++i)
|
33
52
|
{
|
34
53
|
SoothContext * context = &(predictor->contexts[i]);
|
35
54
|
free(context->statistics);
|
@@ -53,22 +72,81 @@ sooth_predictor_free(SoothPredictor * predictor)
|
|
53
72
|
|
54
73
|
//------------------------------------------------------------------------------
|
55
74
|
|
56
|
-
|
75
|
+
bool
|
57
76
|
sooth_predictor_save(const char * const filename, SoothPredictor * predictor)
|
58
77
|
{
|
59
|
-
|
78
|
+
FILE *file = fopen(filename, "wb");
|
60
79
|
|
61
|
-
|
80
|
+
if (file == NULL)
|
81
|
+
{
|
82
|
+
return false;
|
83
|
+
}
|
62
84
|
|
63
|
-
|
64
|
-
|
85
|
+
fwrite("MH10", 1, 4, file);
|
86
|
+
fwrite(&predictor->error_symbol, 4, 1, file);
|
87
|
+
fwrite(&predictor->contexts_size, 4, 1, file);
|
88
|
+
for (uint32_t i = 0; i < predictor->contexts_size; ++i)
|
89
|
+
{
|
90
|
+
SoothContext context = predictor->contexts[i];
|
91
|
+
fwrite(context.bigram, 4, 2, file);
|
92
|
+
fwrite(&context.count, 4, 1, file);
|
93
|
+
fwrite(&context.statistics_size, 4, 1, file);
|
94
|
+
fwrite(context.statistics, sizeof(SoothStatistic), context.statistics_size, file);
|
95
|
+
}
|
96
|
+
|
97
|
+
fclose(file);
|
98
|
+
|
99
|
+
return true;
|
65
100
|
}
|
101
|
+
|
66
102
|
//------------------------------------------------------------------------------
|
67
103
|
|
68
|
-
|
104
|
+
bool sooth_predictor_load(const char * const filename, SoothPredictor * predictor)
|
69
105
|
{
|
106
|
+
FILE *file = fopen(filename, "rb");
|
107
|
+
|
108
|
+
if (file == NULL)
|
109
|
+
{
|
110
|
+
return false;
|
111
|
+
}
|
112
|
+
|
113
|
+
char code[4];
|
114
|
+
fread(code, 1, 4, file);
|
115
|
+
if (strncmp(code, "MH10", 4) != 0)
|
116
|
+
{
|
117
|
+
return false;
|
118
|
+
}
|
119
|
+
|
70
120
|
sooth_predictor_clear(predictor);
|
71
|
-
|
121
|
+
|
122
|
+
fread(&predictor->error_symbol, 4, 1, file);
|
123
|
+
fread(&predictor->contexts_size, 4, 1, file);
|
124
|
+
if (predictor->contexts_size == 0)
|
125
|
+
{
|
126
|
+
return true;
|
127
|
+
}
|
128
|
+
predictor->contexts = malloc(sizeof(SoothContext) * predictor->contexts_size);
|
129
|
+
if (predictor->contexts == NULL)
|
130
|
+
{
|
131
|
+
sooth_predictor_clear(predictor);
|
132
|
+
return NULL;
|
133
|
+
}
|
134
|
+
for (uint32_t i = 0; i < predictor->contexts_size; ++i)
|
135
|
+
{
|
136
|
+
SoothContext * context = &(predictor->contexts[i]);
|
137
|
+
fread(context->bigram, 4, 2, file);
|
138
|
+
fread(&context->count, 4, 1, file);
|
139
|
+
fread(&context->statistics_size, 4, 1, file);
|
140
|
+
context->statistics = malloc(sizeof(SoothStatistic) * context->statistics_size);
|
141
|
+
if (context->statistics == NULL)
|
142
|
+
{
|
143
|
+
sooth_predictor_clear(predictor);
|
144
|
+
return NULL;
|
145
|
+
}
|
146
|
+
fread(context->statistics, sizeof(SoothStatistic), context->statistics_size, file);
|
147
|
+
}
|
148
|
+
|
149
|
+
return true;
|
72
150
|
}
|
73
151
|
|
74
152
|
//------------------------------------------------------------------------------
|
@@ -76,12 +154,12 @@ SoothContext *
|
|
76
154
|
sooth_predictor_find_context(SoothPredictor * predictor, uint32_t bigram[2])
|
77
155
|
{
|
78
156
|
SoothContext * context = NULL;
|
79
|
-
|
157
|
+
uint32_t mid = 0;
|
80
158
|
|
81
159
|
if (predictor->contexts_size > 0)
|
82
160
|
{
|
83
|
-
|
84
|
-
|
161
|
+
uint32_t low = 0;
|
162
|
+
uint32_t high = predictor->contexts_size - 1;
|
85
163
|
|
86
164
|
while (low <= high)
|
87
165
|
{
|
@@ -139,12 +217,12 @@ SoothStatistic *
|
|
139
217
|
sooth_predictor_find_statistic(SoothContext * context, uint32_t symbol)
|
140
218
|
{
|
141
219
|
SoothStatistic * statistic = NULL;
|
142
|
-
|
220
|
+
uint32_t mid = 0;
|
143
221
|
|
144
222
|
if (context->statistics_size > 0)
|
145
223
|
{
|
146
|
-
|
147
|
-
|
224
|
+
uint32_t low = 0;
|
225
|
+
uint32_t high = context->statistics_size - 1;
|
148
226
|
|
149
227
|
while (low <= high)
|
150
228
|
{
|
@@ -220,7 +298,7 @@ sooth_predictor_observe(SoothPredictor * predictor, uint32_t bigram[2], uint32_t
|
|
220
298
|
|
221
299
|
//------------------------------------------------------------------------------
|
222
300
|
|
223
|
-
|
301
|
+
uint32_t
|
224
302
|
sooth_predictor_count(SoothPredictor * predictor, uint32_t bigram[2])
|
225
303
|
{
|
226
304
|
SoothContext * context = sooth_predictor_find_context(predictor, bigram);
|
@@ -236,7 +314,7 @@ sooth_predictor_count(SoothPredictor * predictor, uint32_t bigram[2])
|
|
236
314
|
//------------------------------------------------------------------------------
|
237
315
|
|
238
316
|
uint32_t
|
239
|
-
sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2],
|
317
|
+
sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t limit)
|
240
318
|
{
|
241
319
|
SoothContext * context = sooth_predictor_find_context(predictor, bigram);
|
242
320
|
|
@@ -245,7 +323,7 @@ sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint64_t
|
|
245
323
|
return predictor->error_symbol;
|
246
324
|
}
|
247
325
|
|
248
|
-
for (
|
326
|
+
for (uint32_t i = 0; i < context->statistics_size; ++i)
|
249
327
|
{
|
250
328
|
SoothStatistic statistic = context->statistics[i];
|
251
329
|
if (limit > statistic.count)
|
@@ -4,13 +4,14 @@
|
|
4
4
|
//==============================================================================
|
5
5
|
|
6
6
|
#include <stdint.h>
|
7
|
+
#include <stdbool.h>
|
7
8
|
|
8
9
|
#include "sooth_context.h"
|
9
10
|
|
10
11
|
typedef struct
|
11
12
|
{
|
12
13
|
uint32_t error_symbol;
|
13
|
-
|
14
|
+
uint32_t contexts_size;
|
14
15
|
SoothContext * contexts;
|
15
16
|
}
|
16
17
|
SoothPredictor;
|
@@ -20,12 +21,11 @@ SoothPredictor;
|
|
20
21
|
SoothPredictor * sooth_predictor_init();
|
21
22
|
void sooth_predictor_clear(SoothPredictor * predictor);
|
22
23
|
void sooth_predictor_free(SoothPredictor * predictor);
|
23
|
-
|
24
|
-
|
25
|
-
void sooth_predictor_save(const char * const filename, SoothPredictor * predictor);
|
24
|
+
bool sooth_predictor_load(const char * const filename, SoothPredictor * predictor);
|
25
|
+
bool sooth_predictor_save(const char * const filename, SoothPredictor * predictor);
|
26
26
|
uint32_t sooth_predictor_observe(SoothPredictor * predictor, uint32_t bigram[2], uint32_t symbol);
|
27
|
-
|
28
|
-
uint32_t sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2],
|
27
|
+
uint32_t sooth_predictor_count(SoothPredictor * predictor, uint32_t bigram[2]);
|
28
|
+
uint32_t sooth_predictor_select(SoothPredictor * predictor, uint32_t bigram[2], uint32_t limit);
|
29
29
|
|
30
30
|
//==============================================================================
|
31
31
|
|
data/sooth.gemspec
CHANGED
@@ -2,17 +2,17 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: sooth 0.
|
5
|
+
# stub: sooth 0.4.0 ruby lib
|
6
6
|
# stub: ext/sooth_native/extconf.rb
|
7
7
|
|
8
8
|
Gem::Specification.new do |s|
|
9
9
|
s.name = "sooth"
|
10
|
-
s.version = "0.
|
10
|
+
s.version = "0.4.0"
|
11
11
|
|
12
12
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
13
13
|
s.require_paths = ["lib"]
|
14
14
|
s.authors = ["Jason Hutchens"]
|
15
|
-
s.date = "2014-12-
|
15
|
+
s.date = "2014-12-16"
|
16
16
|
s.description = "Sooth is a simple stochastic predictive model."
|
17
17
|
s.email = "jasonhutchens@gmail.com"
|
18
18
|
s.extensions = ["ext/sooth_native/extconf.rb"]
|
data/spec/predictor_spec.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
|
1
3
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
4
|
|
3
5
|
describe Sooth::Predictor do
|
@@ -102,4 +104,34 @@ describe Sooth::Predictor do
|
|
102
104
|
end
|
103
105
|
|
104
106
|
end
|
107
|
+
|
108
|
+
describe "#save" do
|
109
|
+
|
110
|
+
it "can save a file and load it back again" do
|
111
|
+
begin
|
112
|
+
file = Tempfile.new('sooth_spec')
|
113
|
+
expect(predictor.observe([1,2], 3)).to eq(1)
|
114
|
+
expect(predictor.observe([2,1], 3)).to eq(1)
|
115
|
+
expect(predictor.observe([1,2], 3)).to eq(2)
|
116
|
+
expect(predictor.observe([1,2], 3)).to eq(3)
|
117
|
+
expect { predictor.save(file.path) } .to_not raise_error
|
118
|
+
expect(predictor.count([1,2])).to eq(3)
|
119
|
+
expect(predictor.count([2,1])).to eq(1)
|
120
|
+
predictor.clear
|
121
|
+
expect(predictor.count([1,2])).to eq(0)
|
122
|
+
expect(predictor.count([2,1])).to eq(0)
|
123
|
+
expect { predictor.load(file.path) }.to_not raise_error
|
124
|
+
expect(predictor.count([1,2])).to eq(3)
|
125
|
+
expect(predictor.count([2,1])).to eq(1)
|
126
|
+
expect(predictor.observe([1,2], 3)).to eq(4)
|
127
|
+
expect(predictor.observe([1,2], 1)).to eq(1)
|
128
|
+
expect(predictor.observe([2,1], 3)).to eq(2)
|
129
|
+
expect(predictor.observe([2,1], 1)).to eq(1)
|
130
|
+
ensure
|
131
|
+
file.close
|
132
|
+
file.unlink
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
105
137
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sooth
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jason Hutchens
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-12-
|
11
|
+
date: 2014-12-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|