wapiti 0.0.5 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.simplecov +3 -0
- data/Gemfile +25 -2
- data/HISTORY.md +5 -1
- data/LICENSE +14 -13
- data/README.md +9 -16
- data/Rakefile +38 -8
- data/ext/wapiti/bcd.c +126 -124
- data/ext/wapiti/decoder.c +203 -124
- data/ext/wapiti/decoder.h +6 -4
- data/ext/wapiti/extconf.rb +2 -2
- data/ext/wapiti/gradient.c +491 -320
- data/ext/wapiti/gradient.h +52 -34
- data/ext/wapiti/lbfgs.c +74 -33
- data/ext/wapiti/model.c +47 -37
- data/ext/wapiti/model.h +22 -20
- data/ext/wapiti/native.c +850 -839
- data/ext/wapiti/native.h +1 -1
- data/ext/wapiti/options.c +52 -20
- data/ext/wapiti/options.h +37 -30
- data/ext/wapiti/pattern.c +35 -33
- data/ext/wapiti/pattern.h +12 -11
- data/ext/wapiti/progress.c +14 -13
- data/ext/wapiti/progress.h +3 -2
- data/ext/wapiti/quark.c +14 -16
- data/ext/wapiti/quark.h +6 -5
- data/ext/wapiti/reader.c +83 -69
- data/ext/wapiti/reader.h +11 -9
- data/ext/wapiti/rprop.c +84 -43
- data/ext/wapiti/sequence.h +18 -16
- data/ext/wapiti/sgdl1.c +45 -43
- data/ext/wapiti/thread.c +19 -17
- data/ext/wapiti/thread.h +5 -4
- data/ext/wapiti/tools.c +7 -7
- data/ext/wapiti/tools.h +3 -4
- data/ext/wapiti/trainers.h +1 -1
- data/ext/wapiti/vmath.c +40 -38
- data/ext/wapiti/vmath.h +12 -11
- data/ext/wapiti/wapiti.c +159 -37
- data/ext/wapiti/wapiti.h +18 -4
- data/lib/wapiti.rb +15 -15
- data/lib/wapiti/errors.rb +15 -15
- data/lib/wapiti/model.rb +92 -84
- data/lib/wapiti/options.rb +123 -124
- data/lib/wapiti/utility.rb +14 -14
- data/lib/wapiti/version.rb +2 -2
- data/spec/spec_helper.rb +29 -9
- data/spec/wapiti/model_spec.rb +230 -194
- data/spec/wapiti/native_spec.rb +7 -8
- data/spec/wapiti/options_spec.rb +184 -174
- data/wapiti.gemspec +22 -8
- metadata +38 -42
- data/.gitignore +0 -5
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b556f6c375171e82550f04adffc293154d90e370
|
4
|
+
data.tar.gz: 1a40a18e7e4f6afbbfc8a51d1aaef6a469375574
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f904c8d7ce3b0c48a74f4f2e678b120626eb89ae9d22a3cc4b75d3b8c081d8dc0cffbe030a9a2b3132927a80c696e5df7eb6673a66420600ebcf7e09eb497282
|
7
|
+
data.tar.gz: 58be17b5b87d9ee0ee7fa6037492e1bb0b6070ae1611c640799ab318fcf8abfc074ef89bcb944297b428996f8b2b13fc9dfed6081d6fbf02c0d4bfd8e54e1ea3
|
data/.simplecov
ADDED
data/Gemfile
CHANGED
@@ -1,6 +1,29 @@
|
|
1
|
-
source
|
1
|
+
source 'https://rubygems.org'
|
2
2
|
gemspec
|
3
3
|
|
4
|
-
group :
|
4
|
+
group :debug do
|
5
|
+
gem 'debugger', '~>1.6', :require => false, :platform => :mri
|
6
|
+
gem 'rubinius-compiler', '~>2.0', :require => false, :platform => :rbx
|
7
|
+
gem 'rubinius-debugger', '~>2.0', :require => false, :platform => :rbx
|
8
|
+
end
|
9
|
+
|
10
|
+
group :development do
|
11
|
+
gem 'simplecov', '~>0.8', :require => false
|
12
|
+
gem 'rubinius-coverage', :platform => :rbx
|
13
|
+
gem 'coveralls', :require => false
|
14
|
+
end
|
15
|
+
|
16
|
+
group :extra do
|
17
|
+
gem 'ZenTest'
|
18
|
+
gem 'pry'
|
19
|
+
end
|
20
|
+
|
21
|
+
group :osx do
|
5
22
|
gem 'autotest-fsevent', :require => false
|
6
23
|
end
|
24
|
+
|
25
|
+
platform :rbx do
|
26
|
+
gem 'rubysl', '~>2.0'
|
27
|
+
gem 'racc'
|
28
|
+
gem 'json'
|
29
|
+
end
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
0.1.0 / 2014-02-25
|
2
|
+
==================
|
3
|
+
* Updated to wapiti 1.5.0
|
4
|
+
|
1
5
|
0.0.5 / 2011-09-06
|
2
6
|
==================
|
3
7
|
* Added UTF-8 encoding to native result strings for Ruby 1.9
|
@@ -10,4 +14,4 @@
|
|
10
14
|
|
11
15
|
0.0.2 / 2011-09-01
|
12
16
|
==================
|
13
|
-
* Initial release
|
17
|
+
* Initial release
|
data/LICENSE
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
Wapiti-Ruby
|
2
|
-
Copyright 2011 Sylvester Keil. All rights reserved.
|
2
|
+
Copyright 2011-2014 Sylvester Keil. All rights reserved.
|
3
3
|
|
4
4
|
Wapiti - A linear-chain CRF tool
|
5
|
-
Copyright 2009-
|
5
|
+
Copyright 2009-2013 CNRS. All rights reserved.
|
6
6
|
|
7
7
|
Redistribution and use in source and binary forms, with or without
|
8
8
|
modification, are permitted provided that the following conditions are met:
|
@@ -14,17 +14,18 @@ modification, are permitted provided that the following conditions are met:
|
|
14
14
|
this list of conditions and the following disclaimer in the documentation
|
15
15
|
and/or other materials provided with the distribution.
|
16
16
|
|
17
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
|
18
|
-
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
19
|
-
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
20
|
-
EVENT SHALL THE COPYRIGHT
|
21
|
-
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
22
|
-
BUT NOT LIMITED TO, PROCUREMENT OF
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
17
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
18
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
19
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
20
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
21
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
22
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
23
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
24
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
25
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
26
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
27
|
+
POSSIBILITY OF SUCH DAMAGE.
|
27
28
|
|
28
29
|
The views and conclusions contained in the software and documentation are
|
29
30
|
those of the authors and should not be interpreted as representing official
|
30
|
-
policies, either expressed or implied, of the copyright
|
31
|
+
policies, either expressed or implied, of the copyright holders.
|
data/README.md
CHANGED
@@ -1,24 +1,20 @@
|
|
1
1
|
Wapiti-Ruby
|
2
2
|
===========
|
3
|
-
|
4
3
|
The Wapiti-Ruby gem provides a wicked fast linear-chain CRF
|
5
4
|
([Conditional Random Fields](http://en.wikipedia.org/wiki/Conditional_random_field))
|
6
|
-
API for sequence segmentation and labelling; it is based on the
|
7
|
-
|
5
|
+
API for sequence segmentation and labelling; it is based on the
|
6
|
+
codebase of [wapiti](http://wapiti.limsi.fr/).
|
8
7
|
|
8
|
+
[![Build Status](https://secure.travis-ci.org/inukshuk/wapiti-ruby.png)](http://travis-ci.org/inukshuk/wapiti-ruby)
|
9
|
+
[![Coverage Status](https://coveralls.io/repos/inukshuk/wapiti-ruby/badge.png?branch=master)](https://coveralls.io/r/inukshuk/wapiti-ruby?branch=master)
|
9
10
|
|
10
11
|
Requirements
|
11
12
|
------------
|
12
|
-
|
13
13
|
Wapiti is written in C and Ruby and requires a compiler with C99
|
14
14
|
support (e.g., gcc); on GNU/Linux systems it will be fairly easy to install
|
15
|
-
all necessary packages through your distribution
|
16
|
-
install Xcode or
|
17
|
-
[osx-gcc-installer](https://github.com/kennethreitz/osx-gcc-installer);
|
18
|
-
on Windows you may want to install RubyInstaller's
|
19
|
-
[DevKit](https://github.com/oneclick/rubyinstaller/wiki/development-kit).
|
15
|
+
all necessary packages through your distribution.
|
20
16
|
|
21
|
-
The Wapiti Ruby gem has been confirmed to work with MRI 1.9, 1.8.7,
|
17
|
+
The Wapiti Ruby gem has been confirmed to work with MRI 2.x, 1.9.x, 1.8.7,
|
22
18
|
and Rubinius.
|
23
19
|
|
24
20
|
|
@@ -80,7 +76,7 @@ Before saving your model you can use `compact` to reduce the model's size:
|
|
80
76
|
|
81
77
|
### Loading existing Models
|
82
78
|
|
83
|
-
model = Wapiti
|
79
|
+
model = Wapiti.load('m1.mod')
|
84
80
|
|
85
81
|
### Labelling
|
86
82
|
|
@@ -150,7 +146,6 @@ are also available through the associated attribute readers).
|
|
150
146
|
|
151
147
|
Citing
|
152
148
|
------
|
153
|
-
|
154
149
|
If you're using Wapiti-Ruby for research purposes, please use the following
|
155
150
|
citation of the original wapiti package:
|
156
151
|
|
@@ -174,7 +169,6 @@ welcome to also refer back to the
|
|
174
169
|
|
175
170
|
Contributing
|
176
171
|
------------
|
177
|
-
|
178
172
|
The Wapiti-Ruby source code is
|
179
173
|
[hosted on GitHub](http://github.com/inukshuk/wapiti-ruby/).
|
180
174
|
You can check out a copy of the latest code using Git:
|
@@ -189,9 +183,8 @@ example, fix the bug and submit a pull request.
|
|
189
183
|
|
190
184
|
License
|
191
185
|
-------
|
186
|
+
Copyright 2011-2014 Sylvester Keil. All rights reserved.
|
192
187
|
|
193
|
-
Copyright
|
194
|
-
|
195
|
-
Copyright 2009-2011 CNRS. All rights reserved.
|
188
|
+
Copyright 2009-2013 CNRS. All rights reserved.
|
196
189
|
|
197
190
|
Wapiti-Ruby is distributed under a BSD-style license. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -1,25 +1,55 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require 'bundler'
|
2
|
+
begin
|
3
|
+
Bundler.setup
|
4
|
+
rescue Bundler::BundlerError => e
|
5
|
+
$stderr.puts e.message
|
6
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
7
|
+
exit e.status_code
|
8
|
+
end
|
3
9
|
|
4
10
|
require 'rake/clean'
|
5
|
-
require 'rake/testtask'
|
6
11
|
require 'rake/extensiontask'
|
7
12
|
|
13
|
+
$:.unshift(File.join(File.dirname(__FILE__), './lib'))
|
8
14
|
require 'wapiti/version'
|
9
15
|
|
10
|
-
task :default => [:test]
|
11
16
|
|
12
17
|
Rake::ExtensionTask.new do |ext|
|
13
18
|
ext.name = 'native'
|
14
|
-
|
19
|
+
|
15
20
|
ext.ext_dir = 'ext/wapiti'
|
16
21
|
ext.lib_dir = 'lib/wapiti'
|
17
|
-
|
22
|
+
|
18
23
|
CLEAN.include("#{ext.lib_dir}/native.*")
|
19
|
-
CLEAN.include("#{ext.tmp_dir}")
|
20
|
-
|
24
|
+
CLEAN.include("#{ext.tmp_dir}")
|
25
|
+
end
|
26
|
+
|
27
|
+
require 'rspec/core'
|
28
|
+
require 'rspec/core/rake_task'
|
29
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
30
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
31
|
+
end
|
32
|
+
|
33
|
+
desc 'Run an IRB session with Wapiti loaded'
|
34
|
+
task :console do
|
35
|
+
require 'pry'
|
36
|
+
require 'wapiti'
|
37
|
+
|
38
|
+
Pry.start
|
39
|
+
end
|
40
|
+
|
41
|
+
task :check_warnings do
|
42
|
+
$VERBOSE = true
|
43
|
+
require 'wapiti'
|
44
|
+
puts Wapiti::VERSION
|
21
45
|
end
|
22
46
|
|
47
|
+
require 'coveralls/rake/task'
|
48
|
+
Coveralls::RakeTask.new
|
49
|
+
task :test_with_coveralls => [:compile, :spec, 'coveralls:push']
|
50
|
+
|
51
|
+
task :default => [:compile, :spec]
|
52
|
+
|
23
53
|
task :build => [:clean] do
|
24
54
|
system 'gem build wapiti.gemspec'
|
25
55
|
end
|
data/ext/wapiti/bcd.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -29,6 +29,7 @@
|
|
29
29
|
#include <math.h>
|
30
30
|
#include <stdbool.h>
|
31
31
|
#include <stddef.h>
|
32
|
+
#include <stdint.h>
|
32
33
|
#include <stdlib.h>
|
33
34
|
#include <string.h>
|
34
35
|
|
@@ -58,13 +59,13 @@
|
|
58
59
|
******************************************************************************/
|
59
60
|
typedef struct bcd_s bcd_t;
|
60
61
|
struct bcd_s {
|
61
|
-
double
|
62
|
-
double
|
63
|
-
double
|
64
|
-
double
|
65
|
-
|
66
|
-
|
67
|
-
|
62
|
+
double *ugrd; // [Y]
|
63
|
+
double *uhes; // [Y]
|
64
|
+
double *bgrd; // [Y][Y]
|
65
|
+
double *bhes; // [Y][Y]
|
66
|
+
uint32_t *actpos; // [T]
|
67
|
+
uint32_t actcnt;
|
68
|
+
grd_st_t *grd_st;
|
68
69
|
};
|
69
70
|
|
70
71
|
/* bcd_soft:
|
@@ -80,19 +81,19 @@ static double bcd_soft(double z, double r) {
|
|
80
81
|
* List position where the given block is active in the sequence and setup the
|
81
82
|
* limits for the fwd/bwd.
|
82
83
|
*/
|
83
|
-
static void bcd_actpos(mdl_t *mdl, bcd_t *bcd, const seq_t *seq,
|
84
|
-
const
|
85
|
-
|
86
|
-
|
87
|
-
for (
|
84
|
+
static void bcd_actpos(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, uint64_t o) {
|
85
|
+
const uint32_t T = seq->len;
|
86
|
+
uint32_t *actpos = bcd->actpos;
|
87
|
+
uint32_t actcnt = 0;
|
88
|
+
for (uint32_t t = 0; t < T; t++) {
|
88
89
|
const pos_t *pos = &(seq->pos[t]);
|
89
90
|
bool ok = false;
|
90
91
|
if (mdl->kind[o] & 1)
|
91
|
-
for (
|
92
|
+
for (uint32_t n = 0; !ok && n < pos->ucnt; n++)
|
92
93
|
if (pos->uobs[n] == o)
|
93
94
|
ok = true;
|
94
95
|
if (mdl->kind[o] & 2)
|
95
|
-
for (
|
96
|
+
for (uint32_t n = 0; !ok && n < pos->bcnt; n++)
|
96
97
|
if (pos->bobs[n] == o)
|
97
98
|
ok = true;
|
98
99
|
if (!ok)
|
@@ -101,8 +102,8 @@ static void bcd_actpos(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
|
|
101
102
|
}
|
102
103
|
assert(actcnt != 0);
|
103
104
|
bcd->actcnt = actcnt;
|
104
|
-
bcd->
|
105
|
-
bcd->
|
105
|
+
bcd->grd_st->first = actpos[0];
|
106
|
+
bcd->grd_st->last = actpos[actcnt - 1];
|
106
107
|
}
|
107
108
|
|
108
109
|
/* bct_flgradhes:
|
@@ -110,17 +111,17 @@ static void bcd_actpos(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
|
|
110
111
|
* very similar than the trn_spupgrad function but does the computation only
|
111
112
|
* at active pos and approximate also the hessian.
|
112
113
|
*/
|
113
|
-
static void bcd_flgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq,
|
114
|
-
const
|
115
|
-
const
|
116
|
-
const
|
117
|
-
const double
|
118
|
-
const double
|
119
|
-
const double
|
120
|
-
const double
|
121
|
-
const double
|
122
|
-
const
|
123
|
-
const
|
114
|
+
static void bcd_flgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, uint64_t o) {
|
115
|
+
const grd_st_t *grd_st = bcd->grd_st;
|
116
|
+
const uint32_t Y = mdl->nlbl;
|
117
|
+
const uint32_t T = seq->len;
|
118
|
+
const double (*psi )[T][Y][Y] = (void *)grd_st->psi;
|
119
|
+
const double (*alpha)[T][Y] = (void *)grd_st->alpha;
|
120
|
+
const double (*beta )[T][Y] = (void *)grd_st->beta;
|
121
|
+
const double *unorm = grd_st->unorm;
|
122
|
+
const double *bnorm = grd_st->bnorm;
|
123
|
+
const uint32_t *actpos = bcd->actpos;
|
124
|
+
const uint32_t actcnt = bcd->actcnt;
|
124
125
|
double *ugrd = bcd->ugrd;
|
125
126
|
double *uhes = bcd->uhes;
|
126
127
|
double *bgrd = bcd->bgrd;
|
@@ -128,35 +129,35 @@ static void bcd_flgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
|
|
128
129
|
// Update the gradient and the hessian but here we sum only on the
|
129
130
|
// positions where the block is active for unigrams features
|
130
131
|
if (mdl->kind[o] & 1) {
|
131
|
-
for (
|
132
|
-
const
|
133
|
-
for (
|
132
|
+
for (uint32_t n = 0; n < actcnt; n++) {
|
133
|
+
const uint32_t t = actpos[n];
|
134
|
+
for (uint32_t y = 0; y < Y; y++) {
|
134
135
|
const double e = (*alpha)[t][y] * (*beta)[t][y]
|
135
136
|
* unorm[t];
|
136
137
|
ugrd[y] += e;
|
137
138
|
uhes[y] += e * (1.0 - e);
|
138
139
|
}
|
139
|
-
const
|
140
|
+
const uint32_t y = seq->pos[t].lbl;
|
140
141
|
ugrd[y] -= 1.0;
|
141
142
|
}
|
142
143
|
}
|
143
144
|
if ((mdl->kind[o] & 2) == 0)
|
144
145
|
return;
|
145
146
|
// for bigrams features
|
146
|
-
for (
|
147
|
-
const
|
147
|
+
for (uint32_t n = 0; n < actcnt; n++) {
|
148
|
+
const uint32_t t = actpos[n];
|
148
149
|
if (t == 0)
|
149
150
|
continue;
|
150
|
-
for (
|
151
|
-
for (
|
151
|
+
for (uint32_t yp = 0, d = 0; yp < Y; yp++) {
|
152
|
+
for (uint32_t y = 0; y < Y; y++, d++) {
|
152
153
|
double e = (*alpha)[t - 1][yp] * (*beta)[t][y]
|
153
154
|
* (*psi)[t][yp][y] * bnorm[t];
|
154
155
|
bgrd[d] += e;
|
155
156
|
bhes[d] += e * (1.0 - e);
|
156
157
|
}
|
157
158
|
}
|
158
|
-
const
|
159
|
-
const
|
159
|
+
const uint32_t yp = seq->pos[t - 1].lbl;
|
160
|
+
const uint32_t y = seq->pos[t ].lbl;
|
160
161
|
bgrd[yp * Y + y] -= 1.0;
|
161
162
|
}
|
162
163
|
}
|
@@ -166,21 +167,21 @@ static void bcd_flgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
|
|
166
167
|
* very similar than the trn_spupgrad function but does the computation only
|
167
168
|
* at active pos and approximate also the hessian.
|
168
169
|
*/
|
169
|
-
static void bcd_spgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq,
|
170
|
-
const
|
171
|
-
const
|
172
|
-
const
|
173
|
-
const double
|
174
|
-
const double
|
175
|
-
const
|
176
|
-
const
|
177
|
-
const
|
178
|
-
const double
|
179
|
-
const double
|
180
|
-
const double
|
181
|
-
const double
|
182
|
-
const
|
183
|
-
const
|
170
|
+
static void bcd_spgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, uint64_t o) {
|
171
|
+
const grd_st_t *grd_st = bcd->grd_st;
|
172
|
+
const uint32_t Y = mdl->nlbl;
|
173
|
+
const uint32_t T = seq->len;
|
174
|
+
const double (*psiuni)[T][Y] = (void *)grd_st->psiuni;
|
175
|
+
const double *psival = grd_st->psi;
|
176
|
+
const uint32_t *psiyp = grd_st->psiyp;
|
177
|
+
const uint32_t (*psiidx)[T][Y] = (void *)grd_st->psiidx;
|
178
|
+
const uint32_t *psioff = grd_st->psioff;
|
179
|
+
const double (*alpha)[T][Y] = (void *)grd_st->alpha;
|
180
|
+
const double (*beta )[T][Y] = (void *)grd_st->beta;
|
181
|
+
const double *unorm = grd_st->unorm;
|
182
|
+
const double *bnorm = grd_st->bnorm;
|
183
|
+
const uint32_t *actpos = bcd->actpos;
|
184
|
+
const uint32_t actcnt = bcd->actcnt;
|
184
185
|
double *ugrd = bcd->ugrd;
|
185
186
|
double *uhes = bcd->uhes;
|
186
187
|
double *bgrd = bcd->bgrd;
|
@@ -188,51 +189,51 @@ static void bcd_spgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
|
|
188
189
|
// Update the gradient and the hessian but here we sum only on the
|
189
190
|
// positions where the block is active for unigrams features
|
190
191
|
if (mdl->kind[o] & 1) {
|
191
|
-
for (
|
192
|
-
const
|
193
|
-
for (
|
192
|
+
for (uint32_t n = 0; n < actcnt; n++) {
|
193
|
+
const uint32_t t = actpos[n];
|
194
|
+
for (uint32_t y = 0; y < Y; y++) {
|
194
195
|
const double e = (*alpha)[t][y] * (*beta)[t][y]
|
195
196
|
* unorm[t];
|
196
197
|
ugrd[y] += e;
|
197
198
|
uhes[y] += e * (1.0 - e);
|
198
199
|
}
|
199
|
-
const
|
200
|
+
const uint32_t y = seq->pos[t].lbl;
|
200
201
|
ugrd[y] -= 1.0;
|
201
202
|
}
|
202
203
|
}
|
203
204
|
if ((mdl->kind[o] & 2) == 0)
|
204
205
|
return;
|
205
206
|
// for bigrams features
|
206
|
-
for (
|
207
|
-
const
|
207
|
+
for (uint32_t n = 0; n < actcnt; n++) {
|
208
|
+
const uint32_t t = actpos[n];
|
208
209
|
if (t == 0)
|
209
210
|
continue;
|
210
211
|
// We build the expectation matrix
|
211
212
|
double e[Y][Y];
|
212
|
-
for (
|
213
|
-
for (
|
213
|
+
for (uint32_t yp = 0; yp < Y; yp++)
|
214
|
+
for (uint32_t y = 0; y < Y; y++)
|
214
215
|
e[yp][y] = (*alpha)[t - 1][yp] * (*beta)[t][y]
|
215
216
|
* (*psiuni)[t][y] * bnorm[t];
|
216
|
-
const
|
217
|
-
for (
|
217
|
+
const uint32_t off = psioff[t];
|
218
|
+
for (uint32_t n = 0, y = 0; n < (*psiidx)[t][Y - 1]; ) {
|
218
219
|
while (n >= (*psiidx)[t][y])
|
219
220
|
y++;
|
220
221
|
while (n < (*psiidx)[t][y]) {
|
221
|
-
const
|
222
|
-
const double
|
222
|
+
const uint32_t yp = psiyp [off + n];
|
223
|
+
const double v = psival[off + n];
|
223
224
|
e[yp][y] += e[yp][y] * v;
|
224
225
|
n++;
|
225
226
|
}
|
226
227
|
}
|
227
228
|
// And use it
|
228
|
-
for (
|
229
|
-
for (
|
229
|
+
for (uint32_t yp = 0, d = 0; yp < Y; yp++) {
|
230
|
+
for (uint32_t y = 0; y < Y; y++, d++) {
|
230
231
|
bgrd[d] += e[yp][y];
|
231
232
|
bhes[d] += e[yp][y] * (1.0 - e[yp][y]);
|
232
233
|
}
|
233
234
|
}
|
234
|
-
const
|
235
|
-
const
|
235
|
+
const uint32_t yp = seq->pos[t - 1].lbl;
|
236
|
+
const uint32_t y = seq->pos[t ].lbl;
|
236
237
|
bgrd[yp * Y + y] -= 1.0;
|
237
238
|
}
|
238
239
|
}
|
@@ -240,24 +241,24 @@ static void bcd_spgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
|
|
240
241
|
/* bct_update:
|
241
242
|
* Update the model with the computed gradient and hessian.
|
242
243
|
*/
|
243
|
-
static void bcd_update(mdl_t *mdl, bcd_t *bcd,
|
244
|
-
const double
|
245
|
-
const double
|
246
|
-
const double
|
247
|
-
const
|
248
|
-
const double
|
249
|
-
const double
|
250
|
-
double
|
251
|
-
double
|
244
|
+
static void bcd_update(mdl_t *mdl, bcd_t *bcd, uint64_t o) {
|
245
|
+
const double rho1 = mdl->opt->rho1;
|
246
|
+
const double rho2 = mdl->opt->rho2;
|
247
|
+
const double kappa = mdl->opt->bcd.kappa;
|
248
|
+
const uint32_t Y = mdl->nlbl;
|
249
|
+
const double *ugrd = bcd->ugrd;
|
250
|
+
const double *bgrd = bcd->bgrd;
|
251
|
+
double *uhes = bcd->uhes;
|
252
|
+
double *bhes = bcd->bhes;
|
252
253
|
if (mdl->kind[o] & 1) {
|
253
254
|
// Adjust the hessian
|
254
255
|
double a = 1.0;
|
255
|
-
for (
|
256
|
+
for (uint32_t y = 0; y < Y; y++)
|
256
257
|
a = max(a, fabs(ugrd[y] / uhes[y]));
|
257
258
|
xvm_scale(uhes, uhes, a * kappa, Y);
|
258
259
|
// Update the model
|
259
260
|
double *w = mdl->theta + mdl->uoff[o];
|
260
|
-
for (
|
261
|
+
for (uint32_t y = 0; y < Y; y++) {
|
261
262
|
double z = uhes[y] * w[y] - ugrd[y];
|
262
263
|
double d = uhes[y] + rho2;
|
263
264
|
w[y] = bcd_soft(z, rho1) / d;
|
@@ -266,12 +267,12 @@ static void bcd_update(mdl_t *mdl, bcd_t *bcd, size_t o) {
|
|
266
267
|
if (mdl->kind[o] & 2) {
|
267
268
|
// Adjust the hessian
|
268
269
|
double a = 1.0;
|
269
|
-
for (
|
270
|
+
for (uint32_t i = 0; i < Y * Y; i++)
|
270
271
|
a = max(a, fabs(bgrd[i] / bhes[i]));
|
271
272
|
xvm_scale(bhes, bhes, a * kappa, Y * Y);
|
272
273
|
// Update the model
|
273
274
|
double *bw = mdl->theta + mdl->boff[o];
|
274
|
-
for (
|
275
|
+
for (uint32_t i = 0; i < Y * Y; i++) {
|
275
276
|
double z = bhes[i] * bw[i] - bgrd[i];
|
276
277
|
double d = bhes[i] + rho2;
|
277
278
|
bw[i] = bcd_soft(z, rho1) / d;
|
@@ -283,108 +284,109 @@ static void bcd_update(mdl_t *mdl, bcd_t *bcd, size_t o) {
|
|
283
284
|
* Train the model using the blockwise coordinates descend method.
|
284
285
|
*/
|
285
286
|
void trn_bcd(mdl_t *mdl) {
|
286
|
-
const
|
287
|
-
const
|
288
|
-
const
|
289
|
-
const
|
290
|
-
const
|
287
|
+
const uint32_t Y = mdl->nlbl;
|
288
|
+
const uint64_t O = mdl->nobs;
|
289
|
+
const uint32_t S = mdl->train->nseq;
|
290
|
+
const uint32_t T = mdl->train->mlen;
|
291
|
+
const uint32_t K = mdl->opt->maxiter;
|
291
292
|
// Build the index:
|
292
293
|
// Count active sequences per blocks
|
293
294
|
info(" - Build the index\n");
|
294
295
|
info(" 1/2 -- scan the sequences\n");
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
296
|
+
uint64_t tot = 0;
|
297
|
+
uint32_t cnt[O], lcl[O];
|
298
|
+
for (uint64_t o = 0; o < O; o++)
|
299
|
+
cnt[o] = 0, lcl[o] = (uint32_t)-1;
|
300
|
+
for (uint32_t s = 0; s < S; s++) {
|
299
301
|
// List actives blocks
|
300
302
|
const seq_t *seq = mdl->train->seq[s];
|
301
|
-
for (
|
302
|
-
for (
|
303
|
+
for (uint32_t t = 0; t < seq->len; t++) {
|
304
|
+
for (uint32_t b = 0; b < seq->pos[t].ucnt; b++)
|
303
305
|
lcl[seq->pos[t].uobs[b]] = s;
|
304
|
-
for (
|
306
|
+
for (uint32_t b = 0; b < seq->pos[t].bcnt; b++)
|
305
307
|
lcl[seq->pos[t].bobs[b]] = s;
|
306
308
|
}
|
307
309
|
// Updates blocks count
|
308
|
-
for (
|
310
|
+
for (uint64_t o = 0; o < O; o++)
|
309
311
|
cnt[o] += (lcl[o] == s);
|
310
312
|
}
|
311
|
-
for (
|
313
|
+
for (uint64_t o = 0; o < O; o++)
|
312
314
|
tot += cnt[o];
|
313
315
|
// Allocate memory
|
314
|
-
|
315
|
-
|
316
|
-
for (
|
316
|
+
uint32_t *idx_cnt = wapiti_xmalloc(sizeof(uint32_t ) * O);
|
317
|
+
uint32_t **idx_lst = wapiti_xmalloc(sizeof(uint32_t *) * O);
|
318
|
+
for (uint64_t o = 0; o < O; o++) {
|
317
319
|
idx_cnt[o] = cnt[o];
|
318
|
-
idx_lst[o] =
|
320
|
+
idx_lst[o] = wapiti_xmalloc(sizeof(uint32_t) * cnt[o]);
|
319
321
|
}
|
320
322
|
// Populate the index
|
321
323
|
info(" 2/2 -- Populate the index\n");
|
322
|
-
for (
|
323
|
-
cnt[o] = 0, lcl[o] =
|
324
|
-
for (
|
324
|
+
for (uint64_t o = 0; o < O; o++)
|
325
|
+
cnt[o] = 0, lcl[o] = (uint32_t)-1;
|
326
|
+
for (uint32_t s = 0; s < S; s++) {
|
325
327
|
// List actives blocks
|
326
328
|
const seq_t *seq = mdl->train->seq[s];
|
327
|
-
for (
|
328
|
-
for (
|
329
|
+
for (uint32_t t = 0; t < seq->len; t++) {
|
330
|
+
for (uint32_t b = 0; b < seq->pos[t].ucnt; b++)
|
329
331
|
lcl[seq->pos[t].uobs[b]] = s;
|
330
|
-
for (
|
332
|
+
for (uint32_t b = 0; b < seq->pos[t].bcnt; b++)
|
331
333
|
lcl[seq->pos[t].bobs[b]] = s;
|
332
334
|
}
|
333
335
|
// Build index
|
334
|
-
for (
|
336
|
+
for (uint64_t o = 0; o < O; o++)
|
335
337
|
if (lcl[o] == s)
|
336
338
|
idx_lst[o][cnt[o]++] = s;
|
337
339
|
}
|
338
340
|
info(" Done\n");
|
339
341
|
// Allocate the specific trainer of BCD
|
340
|
-
bcd_t *bcd =
|
342
|
+
bcd_t *bcd = wapiti_xmalloc(sizeof(bcd_t));
|
341
343
|
bcd->ugrd = xvm_new(Y);
|
342
344
|
bcd->uhes = xvm_new(Y);
|
343
345
|
bcd->bgrd = xvm_new(Y * Y);
|
344
346
|
bcd->bhes = xvm_new(Y * Y);
|
345
|
-
bcd->actpos =
|
346
|
-
bcd->
|
347
|
+
bcd->actpos = wapiti_xmalloc(sizeof(int) * T);
|
348
|
+
bcd->grd_st = grd_stnew(mdl, NULL);
|
347
349
|
// And train the model
|
348
|
-
for (
|
349
|
-
for (
|
350
|
+
for (uint32_t i = 1; i <= K; i++) {
|
351
|
+
for (uint64_t o = 0; o < O; o++) {
|
350
352
|
// Clear the gradient and the hessian
|
351
|
-
for (
|
353
|
+
for (uint32_t y = 0, d = 0; y < Y; y++) {
|
352
354
|
bcd->ugrd[y] = 0.0;
|
353
355
|
bcd->uhes[y] = 0.0;
|
354
|
-
for (
|
356
|
+
for (uint32_t yp = 0; yp < Y; yp++, d++) {
|
355
357
|
bcd->bgrd[d] = 0.0;
|
356
358
|
bcd->bhes[d] = 0.0;
|
357
359
|
}
|
358
360
|
}
|
359
361
|
// Process active sequences
|
360
|
-
for (
|
361
|
-
const
|
362
|
+
for (uint32_t s = 0; s < idx_cnt[o]; s++) {
|
363
|
+
const uint32_t id = idx_lst[o][s];
|
362
364
|
const seq_t *seq = mdl->train->seq[id];
|
363
365
|
bcd_actpos(mdl, bcd, seq, o);
|
364
|
-
|
366
|
+
grd_stcheck(bcd->grd_st, seq->len);
|
365
367
|
if (mdl->opt->sparse) {
|
366
|
-
grd_spdopsi(bcd->
|
367
|
-
grd_spfwdbwd(bcd->
|
368
|
+
grd_spdopsi(bcd->grd_st, seq);
|
369
|
+
grd_spfwdbwd(bcd->grd_st, seq);
|
368
370
|
bcd_spgradhes(mdl, bcd, seq, o);
|
369
371
|
} else {
|
370
|
-
grd_fldopsi(bcd->
|
371
|
-
grd_flfwdbwd(bcd->
|
372
|
+
grd_fldopsi(bcd->grd_st, seq);
|
373
|
+
grd_flfwdbwd(bcd->grd_st, seq);
|
372
374
|
bcd_flgradhes(mdl, bcd, seq, o);
|
373
375
|
}
|
374
376
|
}
|
375
377
|
// And update the model
|
376
378
|
bcd_update(mdl, bcd, o);
|
377
379
|
}
|
378
|
-
if (!uit_progress(mdl, i
|
380
|
+
if (!uit_progress(mdl, i, -1.0))
|
379
381
|
break;
|
380
382
|
}
|
381
383
|
// Cleanup memory
|
382
|
-
|
384
|
+
grd_stfree(bcd->grd_st);
|
383
385
|
xvm_free(bcd->ugrd); xvm_free(bcd->uhes);
|
384
386
|
xvm_free(bcd->bgrd); xvm_free(bcd->bhes);
|
385
387
|
free(bcd->actpos);
|
386
388
|
free(bcd);
|
387
|
-
for (
|
389
|
+
for (uint64_t o = 0; o < O; o++)
|
388
390
|
free(idx_lst[o]);
|
389
391
|
free(idx_lst);
|
390
392
|
free(idx_cnt);
|