wapiti 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/.autotest +13 -0
  2. data/.gitignore +5 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +6 -0
  5. data/LICENSE +30 -0
  6. data/README.md +153 -0
  7. data/Rakefile +33 -0
  8. data/ext/wapiti/bcd.c +392 -0
  9. data/ext/wapiti/decoder.c +535 -0
  10. data/ext/wapiti/decoder.h +46 -0
  11. data/ext/wapiti/extconf.rb +8 -0
  12. data/ext/wapiti/gradient.c +818 -0
  13. data/ext/wapiti/gradient.h +81 -0
  14. data/ext/wapiti/lbfgs.c +294 -0
  15. data/ext/wapiti/model.c +296 -0
  16. data/ext/wapiti/model.h +100 -0
  17. data/ext/wapiti/native.c +1238 -0
  18. data/ext/wapiti/native.h +15 -0
  19. data/ext/wapiti/options.c +278 -0
  20. data/ext/wapiti/options.h +91 -0
  21. data/ext/wapiti/pattern.c +395 -0
  22. data/ext/wapiti/pattern.h +56 -0
  23. data/ext/wapiti/progress.c +167 -0
  24. data/ext/wapiti/progress.h +43 -0
  25. data/ext/wapiti/quark.c +272 -0
  26. data/ext/wapiti/quark.h +46 -0
  27. data/ext/wapiti/reader.c +553 -0
  28. data/ext/wapiti/reader.h +73 -0
  29. data/ext/wapiti/rprop.c +191 -0
  30. data/ext/wapiti/sequence.h +148 -0
  31. data/ext/wapiti/sgdl1.c +218 -0
  32. data/ext/wapiti/thread.c +171 -0
  33. data/ext/wapiti/thread.h +42 -0
  34. data/ext/wapiti/tools.c +202 -0
  35. data/ext/wapiti/tools.h +54 -0
  36. data/ext/wapiti/trainers.h +39 -0
  37. data/ext/wapiti/vmath.c +372 -0
  38. data/ext/wapiti/vmath.h +51 -0
  39. data/ext/wapiti/wapiti.c +288 -0
  40. data/ext/wapiti/wapiti.h +45 -0
  41. data/lib/wapiti.rb +30 -0
  42. data/lib/wapiti/errors.rb +17 -0
  43. data/lib/wapiti/model.rb +49 -0
  44. data/lib/wapiti/options.rb +113 -0
  45. data/lib/wapiti/utility.rb +15 -0
  46. data/lib/wapiti/version.rb +3 -0
  47. data/spec/fixtures/ch.mod +18550 -0
  48. data/spec/fixtures/chpattern.txt +52 -0
  49. data/spec/fixtures/chtest.txt +1973 -0
  50. data/spec/fixtures/chtrain.txt +19995 -0
  51. data/spec/fixtures/nppattern.txt +52 -0
  52. data/spec/fixtures/nptest.txt +1973 -0
  53. data/spec/fixtures/nptrain.txt +19995 -0
  54. data/spec/fixtures/pattern.txt +14 -0
  55. data/spec/fixtures/test.txt +60000 -0
  56. data/spec/fixtures/train.txt +1200 -0
  57. data/spec/spec_helper.rb +21 -0
  58. data/spec/wapiti/model_spec.rb +173 -0
  59. data/spec/wapiti/native_spec.rb +12 -0
  60. data/spec/wapiti/options_spec.rb +175 -0
  61. data/spec/wapiti/utility_spec.rb +22 -0
  62. data/wapiti.gemspec +35 -0
  63. metadata +178 -0
@@ -0,0 +1,13 @@
1
+ require 'autotest/fsevent' if RUBY_PLATFORM =~ /darwin/
2
+
3
+ Autotest.add_hook :initialize do |at|
4
+ at.add_mapping(/.*\.[ch]$/) do |f, _|
5
+ at.files_matching(/native_spec\.rb$/)
6
+ at.files_matching(/options_spec\.rb$/)
7
+ at.files_matching(/model_spec\.rb$/)
8
+ end
9
+ end
10
+
11
+ Autotest.add_hook :run_command do |at|
12
+ system 'bundle exec rake compile'
13
+ end
@@ -0,0 +1,5 @@
1
+ tmp
2
+ Gemfile.lock
3
+ *.bundle
4
+ *.so
5
+ *.rbc
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --require ./spec/spec_helper.rb
3
+ --format progress
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source :rubygems
2
+ gemspec
3
+
4
+ group :osx_test do
5
+ gem 'autotest-fsevent', :require => false
6
+ end
data/LICENSE ADDED
@@ -0,0 +1,30 @@
1
+ Wapiti-Ruby
2
+ Copyright 2011 Sylvester Keil. All rights reserved.
3
+
4
+ Wapiti - A linear-chain CRF tool
5
+ Copyright 2009-2011 CNRS. All rights reserved.
6
+
7
+ Redistribution and use in source and binary forms, with or without
8
+ modification, are permitted provided that the following conditions are met:
9
+
10
+ 1. Redistributions of source code must retain the above copyright notice,
11
+ this list of conditions and the following disclaimer.
12
+
13
+ 2. Redistributions in binary form must reproduce the above copyright notice,
14
+ this list of conditions and the following disclaimer in the documentation
15
+ and/or other materials provided with the distribution.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
18
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
20
+ EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
21
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24
+ OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
26
+ EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
+
28
+ The views and conclusions contained in the software and documentation are
29
+ those of the authors and should not be interpreted as representing official
30
+ policies, either expressed or implied, of the copyright holder.
@@ -0,0 +1,153 @@
1
+ Wapiti-Ruby
2
+ ===========
3
+
4
+ The Wapiti-Ruby gem provides a wicked fast linear-chain CRF
5
+ ([Conditional Random Fields](http://en.wikipedia.org/wiki/Conditional_random_field))
6
+ API for sequence segmentation and labelling; it is based on the codebase of
7
+ Thomas Lavergne's awesome [wapiti](http://wapiti.limsi.fr/).
8
+
9
+
10
+ Requirements
11
+ ------------
12
+
13
+ Wapiti-Ruby is written in C and Ruby and requires a compiler with C99
14
+ support (e.g., gcc); the gem has been confirmed to work with MRI 1.9, 1.8.7,
15
+ and Rubinius.
16
+
17
+
18
+ Quickstart
19
+ ----------
20
+
21
+ ### Installation
22
+
23
+ $ [sudo] gem install wapiti
24
+
25
+ ### Creating a Model
26
+
27
+ Using a pattern and training data stored in a file:
28
+
29
+ model = Wapiti.train('train.txt', :pattern => 'pattern.txt')
30
+ => #<Wapiti::Model:0x0000010188f868>
31
+ model.labels
32
+ => ["B-ADJP", "B-ADVP", "B-CONJP" ...]
33
+ model.save('ch.mod')
34
+ => # saves the model as 'ch.mod'
35
+
36
+ Alternatively, you can pass in the training data as an array; the array
37
+ should contain one array for each sequence of training data.
38
+
39
+ data = []
40
+ data << ['Confidence NN B-NP', 'in IN B-PP', 'the DT B-NP', 'pound NN I-NP', '. . O']
41
+ ...
42
+ model = Wapiti.train(data, options)
43
+
44
+ You can consult the `Wapiti::Options` class for a list of supported
45
+ configuration options and algorithms:
46
+
47
+ Wapiti::Options.attribute_names
48
+ => [:algorithm, :check, :compact, :convergence_window, :development_data,
49
+ :jobsize, :label, :max_iterations, :maxent, :pattern, :posterior, :rho1,
50
+ :rho2, :score, :sparse, :stop_epsilon, :stop_window, :threads]
51
+ Wapiti::Options.algorithms
52
+ => ["l-bfgs", "sgd-l1", "bcd", "rprop", "rprop+", "rprop-", "auto"]
53
+
54
+ Use `#valid?` or `#validate` (which returns error messages) to make sure
55
+ your configuration is supported by Wapiti.
56
+
57
+ You can pass options either as an options hash or by adding a block to the
58
+ method invocation:
59
+
60
+ model = Wapiti::Model.train(data) do |config|
61
+ config.pattern = 'pattern.txt'
62
+ threads = 4
63
+ end
64
+
65
+ Before saving your model you can use `compact` to reduce the model's size:
66
+
67
+ model.save 'm1.mod'
68
+ => # m1.mod file size 1.8M
69
+ model.compact
70
+ model.save 'm2.mod'
71
+ => # m2.mod file size 471K
72
+
73
+ ### Loading existing Models
74
+
75
+ model = Wapiti::Model.load('m1.mod')
76
+
77
+ ### Labelling
78
+
79
+ By calling `#label` on a Model instance you can add labels to your sequence
80
+ data:
81
+
82
+ model = Waiti.load('m2.mod')
83
+ model.label('test.txt')
84
+ => [[["Confidence NN B-NP", "B-NP"], ["in IN B-PP", "B-PP"] ... ]
85
+
86
+ The result is an array of sequence arrays; each sequence array consists of
87
+ the original token and feature string (when using test data, the final
88
+ feature is usually the expected label) and the label calculated by Wapiti.
89
+
90
+ As with training data, you can pass in data either by filename or as
91
+ a Ruby Array:
92
+
93
+ model.label [['Confidence NN', 'in IN', 'the DT', 'pound NN', '. .']]
94
+ => [[["Confidence NN", "B-NP"], ["in IN", "B-PP"], ["the DT", "B-NP"],
95
+ ["pound NN", "I-NP"], [". .", "O"]]]
96
+
97
+ If you pass a block to `#label` Wapiti will yield each token and the
98
+ corresponding label:
99
+
100
+ model.label [['Confidence NN', 'in IN', 'the DT', 'pound NN', '. .']] do |token, label|
101
+ [token.downcase, label.downcase]
102
+ end
103
+ => [[["confidence nn", "b-np"], ["in in", "b-pp"], ["the dt", "b-np"],
104
+ ["pound nn", "i-np"], [". .", "o"]]]
105
+
106
+
107
+ Citing
108
+ ------
109
+
110
+ If you're using Wapiti-Ruby for research purposes, please use the following
111
+ citation of the original wapiti package:
112
+
113
+ @article{lavergne2010practical,
114
+ author = {Lavergne, Thomas and Capp\'{e}, Olivier and Yvon, Fran\c{c}ois},
115
+ title = {Practical Very Large Scale {CRFs}},
116
+ booktitle = {Proceedings the 48th Annual Meeting of the Association for
117
+ Computational Linguistics (ACL)},
118
+ month = {July},
119
+ year = {2010},
120
+ location = {Uppsala, Sweden},
121
+ publisher = {Association for Computational Linguistics},
122
+ pages = {504--513},
123
+ url = {http://www.aclweb.org/anthology/P10-1052}
124
+ }
125
+
126
+ If you're profiting from any of the Wapiti-Ruby specific features you are
127
+ welcome to also refer back to the
128
+ [Wapiti-Ruby homepage](http://github.com/inukshuk/wapiti-ruby/).
129
+
130
+
131
+ Contributing
132
+ ------------
133
+
134
+ The Wapiti-Ruby source code is
135
+ [hosted on GitHub](http://github.com/inukshuk/wapiti-ruby/).
136
+ You can check out a copy of the latest code using Git:
137
+
138
+ $ git clone https://github.com/inukshuk/wapiti-ruby.git
139
+
140
+ If you've found a bug or have a question, please open an issue on the
141
+ [Wapiti-Ruby issue tracker](http://github.com/inukshuk/wapiti-ruby/issues).
142
+ Or, for extra credit, clone the Wapiti-Ruby repository, write a failing
143
+ example, fix the bug and submit a pull request.
144
+
145
+
146
+ License
147
+ -------
148
+
149
+ Copyright 2011 Sylvester Keil. All rights reserved.
150
+
151
+ Copyright 2009-2011 CNRS. All rights reserved.
152
+
153
+ Wapiti-Ruby is distributed under a BSD-style license. See LICENSE for details.
@@ -0,0 +1,33 @@
1
+ lib = File.expand_path('../lib/', __FILE__)
2
+ $:.unshift lib unless $:.include?(lib)
3
+
4
+ require 'rake/clean'
5
+ require 'rake/testtask'
6
+ require 'rake/extensiontask'
7
+
8
+ require 'wapiti/version'
9
+
10
+ task :default => [:test]
11
+
12
+ Rake::ExtensionTask.new do |ext|
13
+ ext.name = 'native'
14
+
15
+ ext.ext_dir = 'ext/wapiti'
16
+ ext.lib_dir = 'lib/wapiti'
17
+
18
+ CLEAN.include("#{ext.lib_dir}/native.*")
19
+ CLEAN.include("#{ext.tmp_dir}")
20
+
21
+ end
22
+
23
+ task :build => [:clean] do
24
+ system 'gem build wapiti.gemspec'
25
+ end
26
+
27
+ task :release => [:build] do
28
+ system "git tag #{Wapiti::VERSION}"
29
+ system "gem push wapiti-#{Wapiti::VERSION}.gem"
30
+ end
31
+
32
+ CLEAN.include('*.gem')
33
+ CLEAN.include('*.rbc')
@@ -0,0 +1,392 @@
1
+ /*
2
+ * Wapiti - A linear-chain CRF tool
3
+ *
4
+ * Copyright (c) 2009-2011 CNRS
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25
+ * POSSIBILITY OF SUCH DAMAGE.
26
+ */
27
+
28
+ #include <assert.h>
29
+ #include <math.h>
30
+ #include <stdbool.h>
31
+ #include <stddef.h>
32
+ #include <stdlib.h>
33
+ #include <string.h>
34
+
35
+ #include "wapiti.h"
36
+ #include "gradient.h"
37
+ #include "model.h"
38
+ #include "options.h"
39
+ #include "progress.h"
40
+ #include "sequence.h"
41
+ #include "tools.h"
42
+ #include "vmath.h"
43
+
44
+ /******************************************************************************
45
+ * Blockwise Coordinates descent trainer
46
+ * The gradient and hessian computation used for the BCD is very similar to
47
+ * the generic one define below but there is some important differences:
48
+ * - The forward and backward recursions doesn't have to be performed fully
49
+ * but just in the range of activity of the considered block. So if the
50
+ * block is active only at position t, the alpha recusion is done from 1
51
+ * to t and the beta one from T to t, dividing the amount of computations
52
+ * by 2.
53
+ * - Samely the update of the gradient and hessian have to be done only at
54
+ * position where the block is active, so in the common case where the
55
+ * block is active only once in the sequence, the improvement can be huge.
56
+ * - And finally, there is no need to compute the logloss, which can take a
57
+ * long time due to the computation of the log()s.
58
+ ******************************************************************************/
59
+ typedef struct bcd_s bcd_t;
60
+ struct bcd_s {
61
+ double *ugrd; // [Y]
62
+ double *uhes; // [Y]
63
+ double *bgrd; // [Y][Y]
64
+ double *bhes; // [Y][Y]
65
+ size_t *actpos; // [T]
66
+ size_t actcnt;
67
+ grd_t *grd;
68
+ };
69
+
70
+ /* bcd_soft:
71
+ * The softmax function.
72
+ */
73
+ static double bcd_soft(double z, double r) {
74
+ if (z > r) return z - r;
75
+ if (z < -r) return z + r;
76
+ return 0.0;
77
+ }
78
+
79
+ /* bcd_actpos:
80
+ * List position where the given block is active in the sequence and setup the
81
+ * limits for the fwd/bwd.
82
+ */
83
+ static void bcd_actpos(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
84
+ const int T = seq->len;
85
+ size_t *actpos = bcd->actpos;
86
+ size_t actcnt = 0;
87
+ for (int t = 0; t < T; t++) {
88
+ const pos_t *pos = &(seq->pos[t]);
89
+ bool ok = false;
90
+ if (mdl->kind[o] & 1)
91
+ for (size_t n = 0; !ok && n < pos->ucnt; n++)
92
+ if (pos->uobs[n] == o)
93
+ ok = true;
94
+ if (mdl->kind[o] & 2)
95
+ for (size_t n = 0; !ok && n < pos->bcnt; n++)
96
+ if (pos->bobs[n] == o)
97
+ ok = true;
98
+ if (!ok)
99
+ continue;
100
+ actpos[actcnt++] = t;
101
+ }
102
+ assert(actcnt != 0);
103
+ bcd->actcnt = actcnt;
104
+ bcd->grd->first = actpos[0];
105
+ bcd->grd->last = actpos[actcnt - 1];
106
+ }
107
+
108
+ /* bct_flgradhes:
109
+ * Update the gradient and hessian for <blk> on sequence <seq>. This one is
110
+ * very similar than the trn_spupgrad function but does the computation only
111
+ * at active pos and approximate also the hessian.
112
+ */
113
+ static void bcd_flgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
114
+ const grd_t *grd = bcd->grd;
115
+ const size_t Y = mdl->nlbl;
116
+ const size_t T = seq->len;
117
+ const double (*psi )[T][Y][Y] = (void *)grd->psi;
118
+ const double (*alpha)[T][Y] = (void *)grd->alpha;
119
+ const double (*beta )[T][Y] = (void *)grd->beta;
120
+ const double *unorm = grd->unorm;
121
+ const double *bnorm = grd->bnorm;
122
+ const size_t *actpos = bcd->actpos;
123
+ const size_t actcnt = bcd->actcnt;
124
+ double *ugrd = bcd->ugrd;
125
+ double *uhes = bcd->uhes;
126
+ double *bgrd = bcd->bgrd;
127
+ double *bhes = bcd->bhes;
128
+ // Update the gradient and the hessian but here we sum only on the
129
+ // positions where the block is active for unigrams features
130
+ if (mdl->kind[o] & 1) {
131
+ for (size_t n = 0; n < actcnt; n++) {
132
+ const size_t t = actpos[n];
133
+ for (size_t y = 0; y < Y; y++) {
134
+ const double e = (*alpha)[t][y] * (*beta)[t][y]
135
+ * unorm[t];
136
+ ugrd[y] += e;
137
+ uhes[y] += e * (1.0 - e);
138
+ }
139
+ const size_t y = seq->pos[t].lbl;
140
+ ugrd[y] -= 1.0;
141
+ }
142
+ }
143
+ if ((mdl->kind[o] & 2) == 0)
144
+ return;
145
+ // for bigrams features
146
+ for (size_t n = 0; n < actcnt; n++) {
147
+ const size_t t = actpos[n];
148
+ if (t == 0)
149
+ continue;
150
+ for (size_t yp = 0, d = 0; yp < Y; yp++) {
151
+ for (size_t y = 0; y < Y; y++, d++) {
152
+ double e = (*alpha)[t - 1][yp] * (*beta)[t][y]
153
+ * (*psi)[t][yp][y] * bnorm[t];
154
+ bgrd[d] += e;
155
+ bhes[d] += e * (1.0 - e);
156
+ }
157
+ }
158
+ const size_t yp = seq->pos[t - 1].lbl;
159
+ const size_t y = seq->pos[t ].lbl;
160
+ bgrd[yp * Y + y] -= 1.0;
161
+ }
162
+ }
163
+
164
+ /* bct_spgradhes:
165
+ * Update the gradient and hessian for <blk> on sequence <seq>. This one is
166
+ * very similar than the trn_spupgrad function but does the computation only
167
+ * at active pos and approximate also the hessian.
168
+ */
169
+ static void bcd_spgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
170
+ const grd_t *grd = bcd->grd;
171
+ const size_t Y = mdl->nlbl;
172
+ const size_t T = seq->len;
173
+ const double (*psiuni)[T][Y] = (void *)grd->psiuni;
174
+ const double *psival = grd->psi;
175
+ const size_t *psiyp = grd->psiyp;
176
+ const size_t (*psiidx)[T][Y] = (void *)grd->psiidx;
177
+ const size_t *psioff = grd->psioff;
178
+ const double (*alpha)[T][Y] = (void *)grd->alpha;
179
+ const double (*beta )[T][Y] = (void *)grd->beta;
180
+ const double *unorm = grd->unorm;
181
+ const double *bnorm = grd->bnorm;
182
+ const size_t *actpos = bcd->actpos;
183
+ const size_t actcnt = bcd->actcnt;
184
+ double *ugrd = bcd->ugrd;
185
+ double *uhes = bcd->uhes;
186
+ double *bgrd = bcd->bgrd;
187
+ double *bhes = bcd->bhes;
188
+ // Update the gradient and the hessian but here we sum only on the
189
+ // positions where the block is active for unigrams features
190
+ if (mdl->kind[o] & 1) {
191
+ for (size_t n = 0; n < actcnt; n++) {
192
+ const size_t t = actpos[n];
193
+ for (size_t y = 0; y < Y; y++) {
194
+ const double e = (*alpha)[t][y] * (*beta)[t][y]
195
+ * unorm[t];
196
+ ugrd[y] += e;
197
+ uhes[y] += e * (1.0 - e);
198
+ }
199
+ const size_t y = seq->pos[t].lbl;
200
+ ugrd[y] -= 1.0;
201
+ }
202
+ }
203
+ if ((mdl->kind[o] & 2) == 0)
204
+ return;
205
+ // for bigrams features
206
+ for (size_t n = 0; n < actcnt; n++) {
207
+ const size_t t = actpos[n];
208
+ if (t == 0)
209
+ continue;
210
+ // We build the expectation matrix
211
+ double e[Y][Y];
212
+ for (size_t yp = 0; yp < Y; yp++)
213
+ for (size_t y = 0; y < Y; y++)
214
+ e[yp][y] = (*alpha)[t - 1][yp] * (*beta)[t][y]
215
+ * (*psiuni)[t][y] * bnorm[t];
216
+ const size_t off = psioff[t];
217
+ for (size_t n = 0, y = 0; n < (*psiidx)[t][Y - 1]; ) {
218
+ while (n >= (*psiidx)[t][y])
219
+ y++;
220
+ while (n < (*psiidx)[t][y]) {
221
+ const size_t yp = psiyp [off + n];
222
+ const double v = psival[off + n];
223
+ e[yp][y] += e[yp][y] * v;
224
+ n++;
225
+ }
226
+ }
227
+ // And use it
228
+ for (size_t yp = 0, d = 0; yp < Y; yp++) {
229
+ for (size_t y = 0; y < Y; y++, d++) {
230
+ bgrd[d] += e[yp][y];
231
+ bhes[d] += e[yp][y] * (1.0 - e[yp][y]);
232
+ }
233
+ }
234
+ const size_t yp = seq->pos[t - 1].lbl;
235
+ const size_t y = seq->pos[t ].lbl;
236
+ bgrd[yp * Y + y] -= 1.0;
237
+ }
238
+ }
239
+
240
+ /* bct_update:
241
+ * Update the model with the computed gradient and hessian.
242
+ */
243
+ static void bcd_update(mdl_t *mdl, bcd_t *bcd, size_t o) {
244
+ const double rho1 = mdl->opt->rho1;
245
+ const double rho2 = mdl->opt->rho2;
246
+ const double kappa = mdl->opt->bcd.kappa;
247
+ const size_t Y = mdl->nlbl;
248
+ const double *ugrd = bcd->ugrd;
249
+ const double *bgrd = bcd->bgrd;
250
+ double *uhes = bcd->uhes;
251
+ double *bhes = bcd->bhes;
252
+ if (mdl->kind[o] & 1) {
253
+ // Adjust the hessian
254
+ double a = 1.0;
255
+ for (size_t y = 0; y < Y; y++)
256
+ a = max(a, fabs(ugrd[y] / uhes[y]));
257
+ xvm_scale(uhes, uhes, a * kappa, Y);
258
+ // Update the model
259
+ double *w = mdl->theta + mdl->uoff[o];
260
+ for (size_t y = 0; y < Y; y++) {
261
+ double z = uhes[y] * w[y] - ugrd[y];
262
+ double d = uhes[y] + rho2;
263
+ w[y] = bcd_soft(z, rho1) / d;
264
+ }
265
+ }
266
+ if (mdl->kind[o] & 2) {
267
+ // Adjust the hessian
268
+ double a = 1.0;
269
+ for (size_t i = 0; i < Y * Y; i++)
270
+ a = max(a, fabs(bgrd[i] / bhes[i]));
271
+ xvm_scale(bhes, bhes, a * kappa, Y * Y);
272
+ // Update the model
273
+ double *bw = mdl->theta + mdl->boff[o];
274
+ for (size_t i = 0; i < Y * Y; i++) {
275
+ double z = bhes[i] * bw[i] - bgrd[i];
276
+ double d = bhes[i] + rho2;
277
+ bw[i] = bcd_soft(z, rho1) / d;
278
+ }
279
+ }
280
+ }
281
+
282
+ /* trn_bcd
283
+ * Train the model using the blockwise coordinates descend method.
284
+ */
285
+ void trn_bcd(mdl_t *mdl) {
286
+ const size_t Y = mdl->nlbl;
287
+ const size_t O = mdl->nobs;
288
+ const size_t T = mdl->train->mlen;
289
+ const size_t S = mdl->train->nseq;
290
+ const int K = mdl->opt->maxiter;
291
+ // Build the index:
292
+ // Count active sequences per blocks
293
+ info(" - Build the index\n");
294
+ info(" 1/2 -- scan the sequences\n");
295
+ size_t tot = 0, cnt[O], lcl[O];
296
+ for (size_t o = 0; o < O; o++)
297
+ cnt[o] = 0, lcl[o] = none;
298
+ for (size_t s = 0; s < S; s++) {
299
+ // List actives blocks
300
+ const seq_t *seq = mdl->train->seq[s];
301
+ for (int t = 0; t < seq->len; t++) {
302
+ for (size_t b = 0; b < seq->pos[t].ucnt; b++)
303
+ lcl[seq->pos[t].uobs[b]] = s;
304
+ for (size_t b = 0; b < seq->pos[t].bcnt; b++)
305
+ lcl[seq->pos[t].bobs[b]] = s;
306
+ }
307
+ // Updates blocks count
308
+ for (size_t o = 0; o < O; o++)
309
+ cnt[o] += (lcl[o] == s);
310
+ }
311
+ for (size_t o = 0; o < O; o++)
312
+ tot += cnt[o];
313
+ // Allocate memory
314
+ size_t *idx_cnt = xmalloc(sizeof(size_t ) * O);
315
+ size_t **idx_lst = xmalloc(sizeof(size_t *) * O);
316
+ for (size_t o = 0; o < O; o++) {
317
+ idx_cnt[o] = cnt[o];
318
+ idx_lst[o] = xmalloc(sizeof(size_t) * cnt[o]);
319
+ }
320
+ // Populate the index
321
+ info(" 2/2 -- Populate the index\n");
322
+ for (size_t o = 0; o < O; o++)
323
+ cnt[o] = 0, lcl[o] = none;
324
+ for (size_t s = 0; s < S; s++) {
325
+ // List actives blocks
326
+ const seq_t *seq = mdl->train->seq[s];
327
+ for (int t = 0; t < seq->len; t++) {
328
+ for (size_t b = 0; b < seq->pos[t].ucnt; b++)
329
+ lcl[seq->pos[t].uobs[b]] = s;
330
+ for (size_t b = 0; b < seq->pos[t].bcnt; b++)
331
+ lcl[seq->pos[t].bobs[b]] = s;
332
+ }
333
+ // Build index
334
+ for (size_t o = 0; o < O; o++)
335
+ if (lcl[o] == s)
336
+ idx_lst[o][cnt[o]++] = s;
337
+ }
338
+ info(" Done\n");
339
+ // Allocate the specific trainer of BCD
340
+ bcd_t *bcd = xmalloc(sizeof(bcd_t));
341
+ bcd->ugrd = xvm_new(Y);
342
+ bcd->uhes = xvm_new(Y);
343
+ bcd->bgrd = xvm_new(Y * Y);
344
+ bcd->bhes = xvm_new(Y * Y);
345
+ bcd->actpos = xmalloc(sizeof(size_t) * T);
346
+ bcd->grd = grd_new(mdl, NULL);
347
+ // And train the model
348
+ for (int i = 0; i < K; i++) {
349
+ for (size_t o = 0; o < O; o++) {
350
+ // Clear the gradient and the hessian
351
+ for (size_t y = 0, d = 0; y < Y; y++) {
352
+ bcd->ugrd[y] = 0.0;
353
+ bcd->uhes[y] = 0.0;
354
+ for (size_t yp = 0; yp < Y; yp++, d++) {
355
+ bcd->bgrd[d] = 0.0;
356
+ bcd->bhes[d] = 0.0;
357
+ }
358
+ }
359
+ // Process active sequences
360
+ for (size_t s = 0; s < idx_cnt[o]; s++) {
361
+ const size_t id = idx_lst[o][s];
362
+ const seq_t *seq = mdl->train->seq[id];
363
+ bcd_actpos(mdl, bcd, seq, o);
364
+ grd_check(bcd->grd, seq->len);
365
+ if (mdl->opt->sparse) {
366
+ grd_spdopsi(bcd->grd, seq);
367
+ grd_spfwdbwd(bcd->grd, seq);
368
+ bcd_spgradhes(mdl, bcd, seq, o);
369
+ } else {
370
+ grd_fldopsi(bcd->grd, seq);
371
+ grd_flfwdbwd(bcd->grd, seq);
372
+ bcd_flgradhes(mdl, bcd, seq, o);
373
+ }
374
+ }
375
+ // And update the model
376
+ bcd_update(mdl, bcd, o);
377
+ }
378
+ if (!uit_progress(mdl, i + 1, -1.0))
379
+ break;
380
+ }
381
+ // Cleanup memory
382
+ grd_free(bcd->grd);
383
+ xvm_free(bcd->ugrd); xvm_free(bcd->uhes);
384
+ xvm_free(bcd->bgrd); xvm_free(bcd->bhes);
385
+ free(bcd->actpos);
386
+ free(bcd);
387
+ for (size_t o = 0; o < O; o++)
388
+ free(idx_lst[o]);
389
+ free(idx_lst);
390
+ free(idx_cnt);
391
+ }
392
+