wapiti 0.0.5 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.simplecov +3 -0
- data/Gemfile +25 -2
- data/HISTORY.md +5 -1
- data/LICENSE +14 -13
- data/README.md +9 -16
- data/Rakefile +38 -8
- data/ext/wapiti/bcd.c +126 -124
- data/ext/wapiti/decoder.c +203 -124
- data/ext/wapiti/decoder.h +6 -4
- data/ext/wapiti/extconf.rb +2 -2
- data/ext/wapiti/gradient.c +491 -320
- data/ext/wapiti/gradient.h +52 -34
- data/ext/wapiti/lbfgs.c +74 -33
- data/ext/wapiti/model.c +47 -37
- data/ext/wapiti/model.h +22 -20
- data/ext/wapiti/native.c +850 -839
- data/ext/wapiti/native.h +1 -1
- data/ext/wapiti/options.c +52 -20
- data/ext/wapiti/options.h +37 -30
- data/ext/wapiti/pattern.c +35 -33
- data/ext/wapiti/pattern.h +12 -11
- data/ext/wapiti/progress.c +14 -13
- data/ext/wapiti/progress.h +3 -2
- data/ext/wapiti/quark.c +14 -16
- data/ext/wapiti/quark.h +6 -5
- data/ext/wapiti/reader.c +83 -69
- data/ext/wapiti/reader.h +11 -9
- data/ext/wapiti/rprop.c +84 -43
- data/ext/wapiti/sequence.h +18 -16
- data/ext/wapiti/sgdl1.c +45 -43
- data/ext/wapiti/thread.c +19 -17
- data/ext/wapiti/thread.h +5 -4
- data/ext/wapiti/tools.c +7 -7
- data/ext/wapiti/tools.h +3 -4
- data/ext/wapiti/trainers.h +1 -1
- data/ext/wapiti/vmath.c +40 -38
- data/ext/wapiti/vmath.h +12 -11
- data/ext/wapiti/wapiti.c +159 -37
- data/ext/wapiti/wapiti.h +18 -4
- data/lib/wapiti.rb +15 -15
- data/lib/wapiti/errors.rb +15 -15
- data/lib/wapiti/model.rb +92 -84
- data/lib/wapiti/options.rb +123 -124
- data/lib/wapiti/utility.rb +14 -14
- data/lib/wapiti/version.rb +2 -2
- data/spec/spec_helper.rb +29 -9
- data/spec/wapiti/model_spec.rb +230 -194
- data/spec/wapiti/native_spec.rb +7 -8
- data/spec/wapiti/options_spec.rb +184 -174
- data/wapiti.gemspec +22 -8
- metadata +38 -42
- data/.gitignore +0 -5
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b556f6c375171e82550f04adffc293154d90e370
|
4
|
+
data.tar.gz: 1a40a18e7e4f6afbbfc8a51d1aaef6a469375574
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f904c8d7ce3b0c48a74f4f2e678b120626eb89ae9d22a3cc4b75d3b8c081d8dc0cffbe030a9a2b3132927a80c696e5df7eb6673a66420600ebcf7e09eb497282
|
7
|
+
data.tar.gz: 58be17b5b87d9ee0ee7fa6037492e1bb0b6070ae1611c640799ab318fcf8abfc074ef89bcb944297b428996f8b2b13fc9dfed6081d6fbf02c0d4bfd8e54e1ea3
|
data/.simplecov
ADDED
data/Gemfile
CHANGED
@@ -1,6 +1,29 @@
|
|
1
|
-
source
|
1
|
+
source 'https://rubygems.org'
|
2
2
|
gemspec
|
3
3
|
|
4
|
-
group :
|
4
|
+
group :debug do
|
5
|
+
gem 'debugger', '~>1.6', :require => false, :platform => :mri
|
6
|
+
gem 'rubinius-compiler', '~>2.0', :require => false, :platform => :rbx
|
7
|
+
gem 'rubinius-debugger', '~>2.0', :require => false, :platform => :rbx
|
8
|
+
end
|
9
|
+
|
10
|
+
group :development do
|
11
|
+
gem 'simplecov', '~>0.8', :require => false
|
12
|
+
gem 'rubinius-coverage', :platform => :rbx
|
13
|
+
gem 'coveralls', :require => false
|
14
|
+
end
|
15
|
+
|
16
|
+
group :extra do
|
17
|
+
gem 'ZenTest'
|
18
|
+
gem 'pry'
|
19
|
+
end
|
20
|
+
|
21
|
+
group :osx do
|
5
22
|
gem 'autotest-fsevent', :require => false
|
6
23
|
end
|
24
|
+
|
25
|
+
platform :rbx do
|
26
|
+
gem 'rubysl', '~>2.0'
|
27
|
+
gem 'racc'
|
28
|
+
gem 'json'
|
29
|
+
end
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
0.1.0 / 2014-02-25
|
2
|
+
==================
|
3
|
+
* Updated to wapiti 1.5.0
|
4
|
+
|
1
5
|
0.0.5 / 2011-09-06
|
2
6
|
==================
|
3
7
|
* Added UTF-8 encoding to native result strings for Ruby 1.9
|
@@ -10,4 +14,4 @@
|
|
10
14
|
|
11
15
|
0.0.2 / 2011-09-01
|
12
16
|
==================
|
13
|
-
* Initial release
|
17
|
+
* Initial release
|
data/LICENSE
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
Wapiti-Ruby
|
2
|
-
Copyright 2011 Sylvester Keil. All rights reserved.
|
2
|
+
Copyright 2011-2014 Sylvester Keil. All rights reserved.
|
3
3
|
|
4
4
|
Wapiti - A linear-chain CRF tool
|
5
|
-
Copyright 2009-
|
5
|
+
Copyright 2009-2013 CNRS. All rights reserved.
|
6
6
|
|
7
7
|
Redistribution and use in source and binary forms, with or without
|
8
8
|
modification, are permitted provided that the following conditions are met:
|
@@ -14,17 +14,18 @@ modification, are permitted provided that the following conditions are met:
|
|
14
14
|
this list of conditions and the following disclaimer in the documentation
|
15
15
|
and/or other materials provided with the distribution.
|
16
16
|
|
17
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
|
18
|
-
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
19
|
-
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
20
|
-
EVENT SHALL THE COPYRIGHT
|
21
|
-
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
22
|
-
BUT NOT LIMITED TO, PROCUREMENT OF
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
17
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
18
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
19
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
20
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
21
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
22
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
23
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
24
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
25
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
26
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
27
|
+
POSSIBILITY OF SUCH DAMAGE.
|
27
28
|
|
28
29
|
The views and conclusions contained in the software and documentation are
|
29
30
|
those of the authors and should not be interpreted as representing official
|
30
|
-
policies, either expressed or implied, of the copyright
|
31
|
+
policies, either expressed or implied, of the copyright holders.
|
data/README.md
CHANGED
@@ -1,24 +1,20 @@
|
|
1
1
|
Wapiti-Ruby
|
2
2
|
===========
|
3
|
-
|
4
3
|
The Wapiti-Ruby gem provides a wicked fast linear-chain CRF
|
5
4
|
([Conditional Random Fields](http://en.wikipedia.org/wiki/Conditional_random_field))
|
6
|
-
API for sequence segmentation and labelling; it is based on the
|
7
|
-
|
5
|
+
API for sequence segmentation and labelling; it is based on the
|
6
|
+
codebase of [wapiti](http://wapiti.limsi.fr/).
|
8
7
|
|
8
|
+
[](http://travis-ci.org/inukshuk/wapiti-ruby)
|
9
|
+
[](https://coveralls.io/r/inukshuk/wapiti-ruby?branch=master)
|
9
10
|
|
10
11
|
Requirements
|
11
12
|
------------
|
12
|
-
|
13
13
|
Wapiti is written in C and Ruby and requires a compiler with C99
|
14
14
|
support (e.g., gcc); on GNU/Linux systems it will be fairly easy to install
|
15
|
-
all necessary packages through your distribution
|
16
|
-
install Xcode or
|
17
|
-
[osx-gcc-installer](https://github.com/kennethreitz/osx-gcc-installer);
|
18
|
-
on Windows you may want to install RubyInstaller's
|
19
|
-
[DevKit](https://github.com/oneclick/rubyinstaller/wiki/development-kit).
|
15
|
+
all necessary packages through your distribution.
|
20
16
|
|
21
|
-
The Wapiti Ruby gem has been confirmed to work with MRI 1.9, 1.8.7,
|
17
|
+
The Wapiti Ruby gem has been confirmed to work with MRI 2.x, 1.9.x, 1.8.7,
|
22
18
|
and Rubinius.
|
23
19
|
|
24
20
|
|
@@ -80,7 +76,7 @@ Before saving your model you can use `compact` to reduce the model's size:
|
|
80
76
|
|
81
77
|
### Loading existing Models
|
82
78
|
|
83
|
-
model = Wapiti
|
79
|
+
model = Wapiti.load('m1.mod')
|
84
80
|
|
85
81
|
### Labelling
|
86
82
|
|
@@ -150,7 +146,6 @@ are also available through the associated attribute readers).
|
|
150
146
|
|
151
147
|
Citing
|
152
148
|
------
|
153
|
-
|
154
149
|
If you're using Wapiti-Ruby for research purposes, please use the following
|
155
150
|
citation of the original wapiti package:
|
156
151
|
|
@@ -174,7 +169,6 @@ welcome to also refer back to the
|
|
174
169
|
|
175
170
|
Contributing
|
176
171
|
------------
|
177
|
-
|
178
172
|
The Wapiti-Ruby source code is
|
179
173
|
[hosted on GitHub](http://github.com/inukshuk/wapiti-ruby/).
|
180
174
|
You can check out a copy of the latest code using Git:
|
@@ -189,9 +183,8 @@ example, fix the bug and submit a pull request.
|
|
189
183
|
|
190
184
|
License
|
191
185
|
-------
|
186
|
+
Copyright 2011-2014 Sylvester Keil. All rights reserved.
|
192
187
|
|
193
|
-
Copyright
|
194
|
-
|
195
|
-
Copyright 2009-2011 CNRS. All rights reserved.
|
188
|
+
Copyright 2009-2013 CNRS. All rights reserved.
|
196
189
|
|
197
190
|
Wapiti-Ruby is distributed under a BSD-style license. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -1,25 +1,55 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require 'bundler'
|
2
|
+
begin
|
3
|
+
Bundler.setup
|
4
|
+
rescue Bundler::BundlerError => e
|
5
|
+
$stderr.puts e.message
|
6
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
7
|
+
exit e.status_code
|
8
|
+
end
|
3
9
|
|
4
10
|
require 'rake/clean'
|
5
|
-
require 'rake/testtask'
|
6
11
|
require 'rake/extensiontask'
|
7
12
|
|
13
|
+
$:.unshift(File.join(File.dirname(__FILE__), './lib'))
|
8
14
|
require 'wapiti/version'
|
9
15
|
|
10
|
-
task :default => [:test]
|
11
16
|
|
12
17
|
Rake::ExtensionTask.new do |ext|
|
13
18
|
ext.name = 'native'
|
14
|
-
|
19
|
+
|
15
20
|
ext.ext_dir = 'ext/wapiti'
|
16
21
|
ext.lib_dir = 'lib/wapiti'
|
17
|
-
|
22
|
+
|
18
23
|
CLEAN.include("#{ext.lib_dir}/native.*")
|
19
|
-
CLEAN.include("#{ext.tmp_dir}")
|
20
|
-
|
24
|
+
CLEAN.include("#{ext.tmp_dir}")
|
25
|
+
end
|
26
|
+
|
27
|
+
require 'rspec/core'
|
28
|
+
require 'rspec/core/rake_task'
|
29
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
30
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
31
|
+
end
|
32
|
+
|
33
|
+
desc 'Run an IRB session with Wapiti loaded'
|
34
|
+
task :console do
|
35
|
+
require 'pry'
|
36
|
+
require 'wapiti'
|
37
|
+
|
38
|
+
Pry.start
|
39
|
+
end
|
40
|
+
|
41
|
+
task :check_warnings do
|
42
|
+
$VERBOSE = true
|
43
|
+
require 'wapiti'
|
44
|
+
puts Wapiti::VERSION
|
21
45
|
end
|
22
46
|
|
47
|
+
require 'coveralls/rake/task'
|
48
|
+
Coveralls::RakeTask.new
|
49
|
+
task :test_with_coveralls => [:compile, :spec, 'coveralls:push']
|
50
|
+
|
51
|
+
task :default => [:compile, :spec]
|
52
|
+
|
23
53
|
task :build => [:clean] do
|
24
54
|
system 'gem build wapiti.gemspec'
|
25
55
|
end
|
data/ext/wapiti/bcd.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -29,6 +29,7 @@
|
|
29
29
|
#include <math.h>
|
30
30
|
#include <stdbool.h>
|
31
31
|
#include <stddef.h>
|
32
|
+
#include <stdint.h>
|
32
33
|
#include <stdlib.h>
|
33
34
|
#include <string.h>
|
34
35
|
|
@@ -58,13 +59,13 @@
|
|
58
59
|
******************************************************************************/
|
59
60
|
typedef struct bcd_s bcd_t;
|
60
61
|
struct bcd_s {
|
61
|
-
double
|
62
|
-
double
|
63
|
-
double
|
64
|
-
double
|
65
|
-
|
66
|
-
|
67
|
-
|
62
|
+
double *ugrd; // [Y]
|
63
|
+
double *uhes; // [Y]
|
64
|
+
double *bgrd; // [Y][Y]
|
65
|
+
double *bhes; // [Y][Y]
|
66
|
+
uint32_t *actpos; // [T]
|
67
|
+
uint32_t actcnt;
|
68
|
+
grd_st_t *grd_st;
|
68
69
|
};
|
69
70
|
|
70
71
|
/* bcd_soft:
|
@@ -80,19 +81,19 @@ static double bcd_soft(double z, double r) {
|
|
80
81
|
* List position where the given block is active in the sequence and setup the
|
81
82
|
* limits for the fwd/bwd.
|
82
83
|
*/
|
83
|
-
static void bcd_actpos(mdl_t *mdl, bcd_t *bcd, const seq_t *seq,
|
84
|
-
const
|
85
|
-
|
86
|
-
|
87
|
-
for (
|
84
|
+
static void bcd_actpos(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, uint64_t o) {
|
85
|
+
const uint32_t T = seq->len;
|
86
|
+
uint32_t *actpos = bcd->actpos;
|
87
|
+
uint32_t actcnt = 0;
|
88
|
+
for (uint32_t t = 0; t < T; t++) {
|
88
89
|
const pos_t *pos = &(seq->pos[t]);
|
89
90
|
bool ok = false;
|
90
91
|
if (mdl->kind[o] & 1)
|
91
|
-
for (
|
92
|
+
for (uint32_t n = 0; !ok && n < pos->ucnt; n++)
|
92
93
|
if (pos->uobs[n] == o)
|
93
94
|
ok = true;
|
94
95
|
if (mdl->kind[o] & 2)
|
95
|
-
for (
|
96
|
+
for (uint32_t n = 0; !ok && n < pos->bcnt; n++)
|
96
97
|
if (pos->bobs[n] == o)
|
97
98
|
ok = true;
|
98
99
|
if (!ok)
|
@@ -101,8 +102,8 @@ static void bcd_actpos(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
|
|
101
102
|
}
|
102
103
|
assert(actcnt != 0);
|
103
104
|
bcd->actcnt = actcnt;
|
104
|
-
bcd->
|
105
|
-
bcd->
|
105
|
+
bcd->grd_st->first = actpos[0];
|
106
|
+
bcd->grd_st->last = actpos[actcnt - 1];
|
106
107
|
}
|
107
108
|
|
108
109
|
/* bct_flgradhes:
|
@@ -110,17 +111,17 @@ static void bcd_actpos(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
|
|
110
111
|
* very similar than the trn_spupgrad function but does the computation only
|
111
112
|
* at active pos and approximate also the hessian.
|
112
113
|
*/
|
113
|
-
static void bcd_flgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq,
|
114
|
-
const
|
115
|
-
const
|
116
|
-
const
|
117
|
-
const double
|
118
|
-
const double
|
119
|
-
const double
|
120
|
-
const double
|
121
|
-
const double
|
122
|
-
const
|
123
|
-
const
|
114
|
+
static void bcd_flgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, uint64_t o) {
|
115
|
+
const grd_st_t *grd_st = bcd->grd_st;
|
116
|
+
const uint32_t Y = mdl->nlbl;
|
117
|
+
const uint32_t T = seq->len;
|
118
|
+
const double (*psi )[T][Y][Y] = (void *)grd_st->psi;
|
119
|
+
const double (*alpha)[T][Y] = (void *)grd_st->alpha;
|
120
|
+
const double (*beta )[T][Y] = (void *)grd_st->beta;
|
121
|
+
const double *unorm = grd_st->unorm;
|
122
|
+
const double *bnorm = grd_st->bnorm;
|
123
|
+
const uint32_t *actpos = bcd->actpos;
|
124
|
+
const uint32_t actcnt = bcd->actcnt;
|
124
125
|
double *ugrd = bcd->ugrd;
|
125
126
|
double *uhes = bcd->uhes;
|
126
127
|
double *bgrd = bcd->bgrd;
|
@@ -128,35 +129,35 @@ static void bcd_flgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
|
|
128
129
|
// Update the gradient and the hessian but here we sum only on the
|
129
130
|
// positions where the block is active for unigrams features
|
130
131
|
if (mdl->kind[o] & 1) {
|
131
|
-
for (
|
132
|
-
const
|
133
|
-
for (
|
132
|
+
for (uint32_t n = 0; n < actcnt; n++) {
|
133
|
+
const uint32_t t = actpos[n];
|
134
|
+
for (uint32_t y = 0; y < Y; y++) {
|
134
135
|
const double e = (*alpha)[t][y] * (*beta)[t][y]
|
135
136
|
* unorm[t];
|
136
137
|
ugrd[y] += e;
|
137
138
|
uhes[y] += e * (1.0 - e);
|
138
139
|
}
|
139
|
-
const
|
140
|
+
const uint32_t y = seq->pos[t].lbl;
|
140
141
|
ugrd[y] -= 1.0;
|
141
142
|
}
|
142
143
|
}
|
143
144
|
if ((mdl->kind[o] & 2) == 0)
|
144
145
|
return;
|
145
146
|
// for bigrams features
|
146
|
-
for (
|
147
|
-
const
|
147
|
+
for (uint32_t n = 0; n < actcnt; n++) {
|
148
|
+
const uint32_t t = actpos[n];
|
148
149
|
if (t == 0)
|
149
150
|
continue;
|
150
|
-
for (
|
151
|
-
for (
|
151
|
+
for (uint32_t yp = 0, d = 0; yp < Y; yp++) {
|
152
|
+
for (uint32_t y = 0; y < Y; y++, d++) {
|
152
153
|
double e = (*alpha)[t - 1][yp] * (*beta)[t][y]
|
153
154
|
* (*psi)[t][yp][y] * bnorm[t];
|
154
155
|
bgrd[d] += e;
|
155
156
|
bhes[d] += e * (1.0 - e);
|
156
157
|
}
|
157
158
|
}
|
158
|
-
const
|
159
|
-
const
|
159
|
+
const uint32_t yp = seq->pos[t - 1].lbl;
|
160
|
+
const uint32_t y = seq->pos[t ].lbl;
|
160
161
|
bgrd[yp * Y + y] -= 1.0;
|
161
162
|
}
|
162
163
|
}
|
@@ -166,21 +167,21 @@ static void bcd_flgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
|
|
166
167
|
* very similar than the trn_spupgrad function but does the computation only
|
167
168
|
* at active pos and approximate also the hessian.
|
168
169
|
*/
|
169
|
-
static void bcd_spgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq,
|
170
|
-
const
|
171
|
-
const
|
172
|
-
const
|
173
|
-
const double
|
174
|
-
const double
|
175
|
-
const
|
176
|
-
const
|
177
|
-
const
|
178
|
-
const double
|
179
|
-
const double
|
180
|
-
const double
|
181
|
-
const double
|
182
|
-
const
|
183
|
-
const
|
170
|
+
static void bcd_spgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, uint64_t o) {
|
171
|
+
const grd_st_t *grd_st = bcd->grd_st;
|
172
|
+
const uint32_t Y = mdl->nlbl;
|
173
|
+
const uint32_t T = seq->len;
|
174
|
+
const double (*psiuni)[T][Y] = (void *)grd_st->psiuni;
|
175
|
+
const double *psival = grd_st->psi;
|
176
|
+
const uint32_t *psiyp = grd_st->psiyp;
|
177
|
+
const uint32_t (*psiidx)[T][Y] = (void *)grd_st->psiidx;
|
178
|
+
const uint32_t *psioff = grd_st->psioff;
|
179
|
+
const double (*alpha)[T][Y] = (void *)grd_st->alpha;
|
180
|
+
const double (*beta )[T][Y] = (void *)grd_st->beta;
|
181
|
+
const double *unorm = grd_st->unorm;
|
182
|
+
const double *bnorm = grd_st->bnorm;
|
183
|
+
const uint32_t *actpos = bcd->actpos;
|
184
|
+
const uint32_t actcnt = bcd->actcnt;
|
184
185
|
double *ugrd = bcd->ugrd;
|
185
186
|
double *uhes = bcd->uhes;
|
186
187
|
double *bgrd = bcd->bgrd;
|
@@ -188,51 +189,51 @@ static void bcd_spgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
|
|
188
189
|
// Update the gradient and the hessian but here we sum only on the
|
189
190
|
// positions where the block is active for unigrams features
|
190
191
|
if (mdl->kind[o] & 1) {
|
191
|
-
for (
|
192
|
-
const
|
193
|
-
for (
|
192
|
+
for (uint32_t n = 0; n < actcnt; n++) {
|
193
|
+
const uint32_t t = actpos[n];
|
194
|
+
for (uint32_t y = 0; y < Y; y++) {
|
194
195
|
const double e = (*alpha)[t][y] * (*beta)[t][y]
|
195
196
|
* unorm[t];
|
196
197
|
ugrd[y] += e;
|
197
198
|
uhes[y] += e * (1.0 - e);
|
198
199
|
}
|
199
|
-
const
|
200
|
+
const uint32_t y = seq->pos[t].lbl;
|
200
201
|
ugrd[y] -= 1.0;
|
201
202
|
}
|
202
203
|
}
|
203
204
|
if ((mdl->kind[o] & 2) == 0)
|
204
205
|
return;
|
205
206
|
// for bigrams features
|
206
|
-
for (
|
207
|
-
const
|
207
|
+
for (uint32_t n = 0; n < actcnt; n++) {
|
208
|
+
const uint32_t t = actpos[n];
|
208
209
|
if (t == 0)
|
209
210
|
continue;
|
210
211
|
// We build the expectation matrix
|
211
212
|
double e[Y][Y];
|
212
|
-
for (
|
213
|
-
for (
|
213
|
+
for (uint32_t yp = 0; yp < Y; yp++)
|
214
|
+
for (uint32_t y = 0; y < Y; y++)
|
214
215
|
e[yp][y] = (*alpha)[t - 1][yp] * (*beta)[t][y]
|
215
216
|
* (*psiuni)[t][y] * bnorm[t];
|
216
|
-
const
|
217
|
-
for (
|
217
|
+
const uint32_t off = psioff[t];
|
218
|
+
for (uint32_t n = 0, y = 0; n < (*psiidx)[t][Y - 1]; ) {
|
218
219
|
while (n >= (*psiidx)[t][y])
|
219
220
|
y++;
|
220
221
|
while (n < (*psiidx)[t][y]) {
|
221
|
-
const
|
222
|
-
const double
|
222
|
+
const uint32_t yp = psiyp [off + n];
|
223
|
+
const double v = psival[off + n];
|
223
224
|
e[yp][y] += e[yp][y] * v;
|
224
225
|
n++;
|
225
226
|
}
|
226
227
|
}
|
227
228
|
// And use it
|
228
|
-
for (
|
229
|
-
for (
|
229
|
+
for (uint32_t yp = 0, d = 0; yp < Y; yp++) {
|
230
|
+
for (uint32_t y = 0; y < Y; y++, d++) {
|
230
231
|
bgrd[d] += e[yp][y];
|
231
232
|
bhes[d] += e[yp][y] * (1.0 - e[yp][y]);
|
232
233
|
}
|
233
234
|
}
|
234
|
-
const
|
235
|
-
const
|
235
|
+
const uint32_t yp = seq->pos[t - 1].lbl;
|
236
|
+
const uint32_t y = seq->pos[t ].lbl;
|
236
237
|
bgrd[yp * Y + y] -= 1.0;
|
237
238
|
}
|
238
239
|
}
|
@@ -240,24 +241,24 @@ static void bcd_spgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
|
|
240
241
|
/* bct_update:
|
241
242
|
* Update the model with the computed gradient and hessian.
|
242
243
|
*/
|
243
|
-
static void bcd_update(mdl_t *mdl, bcd_t *bcd,
|
244
|
-
const double
|
245
|
-
const double
|
246
|
-
const double
|
247
|
-
const
|
248
|
-
const double
|
249
|
-
const double
|
250
|
-
double
|
251
|
-
double
|
244
|
+
static void bcd_update(mdl_t *mdl, bcd_t *bcd, uint64_t o) {
|
245
|
+
const double rho1 = mdl->opt->rho1;
|
246
|
+
const double rho2 = mdl->opt->rho2;
|
247
|
+
const double kappa = mdl->opt->bcd.kappa;
|
248
|
+
const uint32_t Y = mdl->nlbl;
|
249
|
+
const double *ugrd = bcd->ugrd;
|
250
|
+
const double *bgrd = bcd->bgrd;
|
251
|
+
double *uhes = bcd->uhes;
|
252
|
+
double *bhes = bcd->bhes;
|
252
253
|
if (mdl->kind[o] & 1) {
|
253
254
|
// Adjust the hessian
|
254
255
|
double a = 1.0;
|
255
|
-
for (
|
256
|
+
for (uint32_t y = 0; y < Y; y++)
|
256
257
|
a = max(a, fabs(ugrd[y] / uhes[y]));
|
257
258
|
xvm_scale(uhes, uhes, a * kappa, Y);
|
258
259
|
// Update the model
|
259
260
|
double *w = mdl->theta + mdl->uoff[o];
|
260
|
-
for (
|
261
|
+
for (uint32_t y = 0; y < Y; y++) {
|
261
262
|
double z = uhes[y] * w[y] - ugrd[y];
|
262
263
|
double d = uhes[y] + rho2;
|
263
264
|
w[y] = bcd_soft(z, rho1) / d;
|
@@ -266,12 +267,12 @@ static void bcd_update(mdl_t *mdl, bcd_t *bcd, size_t o) {
|
|
266
267
|
if (mdl->kind[o] & 2) {
|
267
268
|
// Adjust the hessian
|
268
269
|
double a = 1.0;
|
269
|
-
for (
|
270
|
+
for (uint32_t i = 0; i < Y * Y; i++)
|
270
271
|
a = max(a, fabs(bgrd[i] / bhes[i]));
|
271
272
|
xvm_scale(bhes, bhes, a * kappa, Y * Y);
|
272
273
|
// Update the model
|
273
274
|
double *bw = mdl->theta + mdl->boff[o];
|
274
|
-
for (
|
275
|
+
for (uint32_t i = 0; i < Y * Y; i++) {
|
275
276
|
double z = bhes[i] * bw[i] - bgrd[i];
|
276
277
|
double d = bhes[i] + rho2;
|
277
278
|
bw[i] = bcd_soft(z, rho1) / d;
|
@@ -283,108 +284,109 @@ static void bcd_update(mdl_t *mdl, bcd_t *bcd, size_t o) {
|
|
283
284
|
* Train the model using the blockwise coordinates descend method.
|
284
285
|
*/
|
285
286
|
void trn_bcd(mdl_t *mdl) {
|
286
|
-
const
|
287
|
-
const
|
288
|
-
const
|
289
|
-
const
|
290
|
-
const
|
287
|
+
const uint32_t Y = mdl->nlbl;
|
288
|
+
const uint64_t O = mdl->nobs;
|
289
|
+
const uint32_t S = mdl->train->nseq;
|
290
|
+
const uint32_t T = mdl->train->mlen;
|
291
|
+
const uint32_t K = mdl->opt->maxiter;
|
291
292
|
// Build the index:
|
292
293
|
// Count active sequences per blocks
|
293
294
|
info(" - Build the index\n");
|
294
295
|
info(" 1/2 -- scan the sequences\n");
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
296
|
+
uint64_t tot = 0;
|
297
|
+
uint32_t cnt[O], lcl[O];
|
298
|
+
for (uint64_t o = 0; o < O; o++)
|
299
|
+
cnt[o] = 0, lcl[o] = (uint32_t)-1;
|
300
|
+
for (uint32_t s = 0; s < S; s++) {
|
299
301
|
// List actives blocks
|
300
302
|
const seq_t *seq = mdl->train->seq[s];
|
301
|
-
for (
|
302
|
-
for (
|
303
|
+
for (uint32_t t = 0; t < seq->len; t++) {
|
304
|
+
for (uint32_t b = 0; b < seq->pos[t].ucnt; b++)
|
303
305
|
lcl[seq->pos[t].uobs[b]] = s;
|
304
|
-
for (
|
306
|
+
for (uint32_t b = 0; b < seq->pos[t].bcnt; b++)
|
305
307
|
lcl[seq->pos[t].bobs[b]] = s;
|
306
308
|
}
|
307
309
|
// Updates blocks count
|
308
|
-
for (
|
310
|
+
for (uint64_t o = 0; o < O; o++)
|
309
311
|
cnt[o] += (lcl[o] == s);
|
310
312
|
}
|
311
|
-
for (
|
313
|
+
for (uint64_t o = 0; o < O; o++)
|
312
314
|
tot += cnt[o];
|
313
315
|
// Allocate memory
|
314
|
-
|
315
|
-
|
316
|
-
for (
|
316
|
+
uint32_t *idx_cnt = wapiti_xmalloc(sizeof(uint32_t ) * O);
|
317
|
+
uint32_t **idx_lst = wapiti_xmalloc(sizeof(uint32_t *) * O);
|
318
|
+
for (uint64_t o = 0; o < O; o++) {
|
317
319
|
idx_cnt[o] = cnt[o];
|
318
|
-
idx_lst[o] =
|
320
|
+
idx_lst[o] = wapiti_xmalloc(sizeof(uint32_t) * cnt[o]);
|
319
321
|
}
|
320
322
|
// Populate the index
|
321
323
|
info(" 2/2 -- Populate the index\n");
|
322
|
-
for (
|
323
|
-
cnt[o] = 0, lcl[o] =
|
324
|
-
for (
|
324
|
+
for (uint64_t o = 0; o < O; o++)
|
325
|
+
cnt[o] = 0, lcl[o] = (uint32_t)-1;
|
326
|
+
for (uint32_t s = 0; s < S; s++) {
|
325
327
|
// List actives blocks
|
326
328
|
const seq_t *seq = mdl->train->seq[s];
|
327
|
-
for (
|
328
|
-
for (
|
329
|
+
for (uint32_t t = 0; t < seq->len; t++) {
|
330
|
+
for (uint32_t b = 0; b < seq->pos[t].ucnt; b++)
|
329
331
|
lcl[seq->pos[t].uobs[b]] = s;
|
330
|
-
for (
|
332
|
+
for (uint32_t b = 0; b < seq->pos[t].bcnt; b++)
|
331
333
|
lcl[seq->pos[t].bobs[b]] = s;
|
332
334
|
}
|
333
335
|
// Build index
|
334
|
-
for (
|
336
|
+
for (uint64_t o = 0; o < O; o++)
|
335
337
|
if (lcl[o] == s)
|
336
338
|
idx_lst[o][cnt[o]++] = s;
|
337
339
|
}
|
338
340
|
info(" Done\n");
|
339
341
|
// Allocate the specific trainer of BCD
|
340
|
-
bcd_t *bcd =
|
342
|
+
bcd_t *bcd = wapiti_xmalloc(sizeof(bcd_t));
|
341
343
|
bcd->ugrd = xvm_new(Y);
|
342
344
|
bcd->uhes = xvm_new(Y);
|
343
345
|
bcd->bgrd = xvm_new(Y * Y);
|
344
346
|
bcd->bhes = xvm_new(Y * Y);
|
345
|
-
bcd->actpos =
|
346
|
-
bcd->
|
347
|
+
bcd->actpos = wapiti_xmalloc(sizeof(int) * T);
|
348
|
+
bcd->grd_st = grd_stnew(mdl, NULL);
|
347
349
|
// And train the model
|
348
|
-
for (
|
349
|
-
for (
|
350
|
+
for (uint32_t i = 1; i <= K; i++) {
|
351
|
+
for (uint64_t o = 0; o < O; o++) {
|
350
352
|
// Clear the gradient and the hessian
|
351
|
-
for (
|
353
|
+
for (uint32_t y = 0, d = 0; y < Y; y++) {
|
352
354
|
bcd->ugrd[y] = 0.0;
|
353
355
|
bcd->uhes[y] = 0.0;
|
354
|
-
for (
|
356
|
+
for (uint32_t yp = 0; yp < Y; yp++, d++) {
|
355
357
|
bcd->bgrd[d] = 0.0;
|
356
358
|
bcd->bhes[d] = 0.0;
|
357
359
|
}
|
358
360
|
}
|
359
361
|
// Process active sequences
|
360
|
-
for (
|
361
|
-
const
|
362
|
+
for (uint32_t s = 0; s < idx_cnt[o]; s++) {
|
363
|
+
const uint32_t id = idx_lst[o][s];
|
362
364
|
const seq_t *seq = mdl->train->seq[id];
|
363
365
|
bcd_actpos(mdl, bcd, seq, o);
|
364
|
-
|
366
|
+
grd_stcheck(bcd->grd_st, seq->len);
|
365
367
|
if (mdl->opt->sparse) {
|
366
|
-
grd_spdopsi(bcd->
|
367
|
-
grd_spfwdbwd(bcd->
|
368
|
+
grd_spdopsi(bcd->grd_st, seq);
|
369
|
+
grd_spfwdbwd(bcd->grd_st, seq);
|
368
370
|
bcd_spgradhes(mdl, bcd, seq, o);
|
369
371
|
} else {
|
370
|
-
grd_fldopsi(bcd->
|
371
|
-
grd_flfwdbwd(bcd->
|
372
|
+
grd_fldopsi(bcd->grd_st, seq);
|
373
|
+
grd_flfwdbwd(bcd->grd_st, seq);
|
372
374
|
bcd_flgradhes(mdl, bcd, seq, o);
|
373
375
|
}
|
374
376
|
}
|
375
377
|
// And update the model
|
376
378
|
bcd_update(mdl, bcd, o);
|
377
379
|
}
|
378
|
-
if (!uit_progress(mdl, i
|
380
|
+
if (!uit_progress(mdl, i, -1.0))
|
379
381
|
break;
|
380
382
|
}
|
381
383
|
// Cleanup memory
|
382
|
-
|
384
|
+
grd_stfree(bcd->grd_st);
|
383
385
|
xvm_free(bcd->ugrd); xvm_free(bcd->uhes);
|
384
386
|
xvm_free(bcd->bgrd); xvm_free(bcd->bhes);
|
385
387
|
free(bcd->actpos);
|
386
388
|
free(bcd);
|
387
|
-
for (
|
389
|
+
for (uint64_t o = 0; o < O; o++)
|
388
390
|
free(idx_lst[o]);
|
389
391
|
free(idx_lst);
|
390
392
|
free(idx_cnt);
|