wapiti 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/.autotest +13 -0
  2. data/.gitignore +5 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +6 -0
  5. data/LICENSE +30 -0
  6. data/README.md +153 -0
  7. data/Rakefile +33 -0
  8. data/ext/wapiti/bcd.c +392 -0
  9. data/ext/wapiti/decoder.c +535 -0
  10. data/ext/wapiti/decoder.h +46 -0
  11. data/ext/wapiti/extconf.rb +8 -0
  12. data/ext/wapiti/gradient.c +818 -0
  13. data/ext/wapiti/gradient.h +81 -0
  14. data/ext/wapiti/lbfgs.c +294 -0
  15. data/ext/wapiti/model.c +296 -0
  16. data/ext/wapiti/model.h +100 -0
  17. data/ext/wapiti/native.c +1238 -0
  18. data/ext/wapiti/native.h +15 -0
  19. data/ext/wapiti/options.c +278 -0
  20. data/ext/wapiti/options.h +91 -0
  21. data/ext/wapiti/pattern.c +395 -0
  22. data/ext/wapiti/pattern.h +56 -0
  23. data/ext/wapiti/progress.c +167 -0
  24. data/ext/wapiti/progress.h +43 -0
  25. data/ext/wapiti/quark.c +272 -0
  26. data/ext/wapiti/quark.h +46 -0
  27. data/ext/wapiti/reader.c +553 -0
  28. data/ext/wapiti/reader.h +73 -0
  29. data/ext/wapiti/rprop.c +191 -0
  30. data/ext/wapiti/sequence.h +148 -0
  31. data/ext/wapiti/sgdl1.c +218 -0
  32. data/ext/wapiti/thread.c +171 -0
  33. data/ext/wapiti/thread.h +42 -0
  34. data/ext/wapiti/tools.c +202 -0
  35. data/ext/wapiti/tools.h +54 -0
  36. data/ext/wapiti/trainers.h +39 -0
  37. data/ext/wapiti/vmath.c +372 -0
  38. data/ext/wapiti/vmath.h +51 -0
  39. data/ext/wapiti/wapiti.c +288 -0
  40. data/ext/wapiti/wapiti.h +45 -0
  41. data/lib/wapiti.rb +30 -0
  42. data/lib/wapiti/errors.rb +17 -0
  43. data/lib/wapiti/model.rb +49 -0
  44. data/lib/wapiti/options.rb +113 -0
  45. data/lib/wapiti/utility.rb +15 -0
  46. data/lib/wapiti/version.rb +3 -0
  47. data/spec/fixtures/ch.mod +18550 -0
  48. data/spec/fixtures/chpattern.txt +52 -0
  49. data/spec/fixtures/chtest.txt +1973 -0
  50. data/spec/fixtures/chtrain.txt +19995 -0
  51. data/spec/fixtures/nppattern.txt +52 -0
  52. data/spec/fixtures/nptest.txt +1973 -0
  53. data/spec/fixtures/nptrain.txt +19995 -0
  54. data/spec/fixtures/pattern.txt +14 -0
  55. data/spec/fixtures/test.txt +60000 -0
  56. data/spec/fixtures/train.txt +1200 -0
  57. data/spec/spec_helper.rb +21 -0
  58. data/spec/wapiti/model_spec.rb +173 -0
  59. data/spec/wapiti/native_spec.rb +12 -0
  60. data/spec/wapiti/options_spec.rb +175 -0
  61. data/spec/wapiti/utility_spec.rb +22 -0
  62. data/wapiti.gemspec +35 -0
  63. metadata +178 -0
@@ -0,0 +1,13 @@
1
+ require 'autotest/fsevent' if RUBY_PLATFORM =~ /darwin/
2
+
3
+ Autotest.add_hook :initialize do |at|
4
+ at.add_mapping(/.*\.[ch]$/) do |f, _|
5
+ at.files_matching(/native_spec\.rb$/)
6
+ at.files_matching(/options_spec\.rb$/)
7
+ at.files_matching(/model_spec\.rb$/)
8
+ end
9
+ end
10
+
11
+ Autotest.add_hook :run_command do |at|
12
+ system 'bundle exec rake compile'
13
+ end
@@ -0,0 +1,5 @@
1
+ tmp
2
+ Gemfile.lock
3
+ *.bundle
4
+ *.so
5
+ *.rbc
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --require ./spec/spec_helper.rb
3
+ --format progress
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source :rubygems
2
+ gemspec
3
+
4
+ group :osx_test do
5
+ gem 'autotest-fsevent', :require => false
6
+ end
data/LICENSE ADDED
@@ -0,0 +1,30 @@
1
+ Wapiti-Ruby
2
+ Copyright 2011 Sylvester Keil. All rights reserved.
3
+
4
+ Wapiti - A linear-chain CRF tool
5
+ Copyright 2009-2011 CNRS. All rights reserved.
6
+
7
+ Redistribution and use in source and binary forms, with or without
8
+ modification, are permitted provided that the following conditions are met:
9
+
10
+ 1. Redistributions of source code must retain the above copyright notice,
11
+ this list of conditions and the following disclaimer.
12
+
13
+ 2. Redistributions in binary form must reproduce the above copyright notice,
14
+ this list of conditions and the following disclaimer in the documentation
15
+ and/or other materials provided with the distribution.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
18
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
20
+ EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
21
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24
+ OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
26
+ EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
+
28
+ The views and conclusions contained in the software and documentation are
29
+ those of the authors and should not be interpreted as representing official
30
+ policies, either expressed or implied, of the copyright holder.
@@ -0,0 +1,153 @@
1
+ Wapiti-Ruby
2
+ ===========
3
+
4
+ The Wapiti-Ruby gem provides a wicked fast linear-chain CRF
5
+ ([Conditional Random Fields](http://en.wikipedia.org/wiki/Conditional_random_field))
6
+ API for sequence segmentation and labelling; it is based on the codebase of
7
+ Thomas Lavergne's awesome [wapiti](http://wapiti.limsi.fr/).
8
+
9
+
10
+ Requirements
11
+ ------------
12
+
13
+ Wapiti-Ruby is written in C and Ruby and requires a compiler with C99
14
+ support (e.g., gcc); the gem has been confirmed to work with MRI 1.9, 1.8.7,
15
+ and Rubinius.
16
+
17
+
18
+ Quickstart
19
+ ----------
20
+
21
+ ### Installation
22
+
23
+ $ [sudo] gem install wapiti
24
+
25
+ ### Creating a Model
26
+
27
+ Using a pattern and training data stored in a file:
28
+
29
+ model = Wapiti.train('train.txt', :pattern => 'pattern.txt')
30
+ => #<Wapiti::Model:0x0000010188f868>
31
+ model.labels
32
+ => ["B-ADJP", "B-ADVP", "B-CONJP" ...]
33
+ model.save('ch.mod')
34
+ => # saves the model as 'ch.mod'
35
+
36
+ Alternatively, you can pass in the training data as an array; the array
37
+ should contain one array for each sequence of training data.
38
+
39
+ data = []
40
+ data << ['Confidence NN B-NP', 'in IN B-PP', 'the DT B-NP', 'pound NN I-NP', '. . O']
41
+ ...
42
+ model = Wapiti.train(data, options)
43
+
44
+ You can consult the `Wapiti::Options` class for a list of supported
45
+ configuration options and algorithms:
46
+
47
+ Wapiti::Options.attribute_names
48
+ => [:algorithm, :check, :compact, :convergence_window, :development_data,
49
+ :jobsize, :label, :max_iterations, :maxent, :pattern, :posterior, :rho1,
50
+ :rho2, :score, :sparse, :stop_epsilon, :stop_window, :threads]
51
+ Wapiti::Options.algorithms
52
+ => ["l-bfgs", "sgd-l1", "bcd", "rprop", "rprop+", "rprop-", "auto"]
53
+
54
+ Use `#valid?` or `#validate` (which returns error messages) to make sure
55
+ your configuration is supported by Wapiti.
56
+
57
+ You can pass options either as an options hash or by adding a block to the
58
+ method invocation:
59
+
60
+ model = Wapiti::Model.train(data) do |config|
61
+ config.pattern = 'pattern.txt'
62
+ threads = 4
63
+ end
64
+
65
+ Before saving your model you can use `compact` to reduce the model's size:
66
+
67
+ model.save 'm1.mod'
68
+ => # m1.mod file size 1.8M
69
+ model.compact
70
+ model.save 'm2.mod'
71
+ => # m2.mod file size 471K
72
+
73
+ ### Loading existing Models
74
+
75
+ model = Wapiti::Model.load('m1.mod')
76
+
77
+ ### Labelling
78
+
79
+ By calling `#label` on a Model instance you can add labels to your sequence
80
+ data:
81
+
82
+ model = Waiti.load('m2.mod')
83
+ model.label('test.txt')
84
+ => [[["Confidence NN B-NP", "B-NP"], ["in IN B-PP", "B-PP"] ... ]
85
+
86
+ The result is an array of sequence arrays; each sequence array consists of
87
+ the original token and feature string (when using test data, the final
88
+ feature is usually the expected label) and the label calculated by Wapiti.
89
+
90
+ As with training data, you can pass in data either by filename or as
91
+ a Ruby Array:
92
+
93
+ model.label [['Confidence NN', 'in IN', 'the DT', 'pound NN', '. .']]
94
+ => [[["Confidence NN", "B-NP"], ["in IN", "B-PP"], ["the DT", "B-NP"],
95
+ ["pound NN", "I-NP"], [". .", "O"]]]
96
+
97
+ If you pass a block to `#label` Wapiti will yield each token and the
98
+ corresponding label:
99
+
100
+ model.label [['Confidence NN', 'in IN', 'the DT', 'pound NN', '. .']] do |token, label|
101
+ [token.downcase, label.downcase]
102
+ end
103
+ => [[["confidence nn", "b-np"], ["in in", "b-pp"], ["the dt", "b-np"],
104
+ ["pound nn", "i-np"], [". .", "o"]]]
105
+
106
+
107
+ Citing
108
+ ------
109
+
110
+ If you're using Wapiti-Ruby for research purposes, please use the following
111
+ citation of the original wapiti package:
112
+
113
+ @article{lavergne2010practical,
114
+ author = {Lavergne, Thomas and Capp\'{e}, Olivier and Yvon, Fran\c{c}ois},
115
+ title = {Practical Very Large Scale {CRFs}},
116
+ booktitle = {Proceedings the 48th Annual Meeting of the Association for
117
+ Computational Linguistics (ACL)},
118
+ month = {July},
119
+ year = {2010},
120
+ location = {Uppsala, Sweden},
121
+ publisher = {Association for Computational Linguistics},
122
+ pages = {504--513},
123
+ url = {http://www.aclweb.org/anthology/P10-1052}
124
+ }
125
+
126
+ If you're profiting from any of the Wapiti-Ruby specific features you are
127
+ welcome to also refer back to the
128
+ [Wapiti-Ruby homepage](http://github.com/inukshuk/wapiti-ruby/).
129
+
130
+
131
+ Contributing
132
+ ------------
133
+
134
+ The Wapiti-Ruby source code is
135
+ [hosted on GitHub](http://github.com/inukshuk/wapiti-ruby/).
136
+ You can check out a copy of the latest code using Git:
137
+
138
+ $ git clone https://github.com/inukshuk/wapiti-ruby.git
139
+
140
+ If you've found a bug or have a question, please open an issue on the
141
+ [Wapiti-Ruby issue tracker](http://github.com/inukshuk/wapiti-ruby/issues).
142
+ Or, for extra credit, clone the Wapiti-Ruby repository, write a failing
143
+ example, fix the bug and submit a pull request.
144
+
145
+
146
+ License
147
+ -------
148
+
149
+ Copyright 2011 Sylvester Keil. All rights reserved.
150
+
151
+ Copyright 2009-2011 CNRS. All rights reserved.
152
+
153
+ Wapiti-Ruby is distributed under a BSD-style license. See LICENSE for details.
@@ -0,0 +1,33 @@
1
+ lib = File.expand_path('../lib/', __FILE__)
2
+ $:.unshift lib unless $:.include?(lib)
3
+
4
+ require 'rake/clean'
5
+ require 'rake/testtask'
6
+ require 'rake/extensiontask'
7
+
8
+ require 'wapiti/version'
9
+
10
+ task :default => [:test]
11
+
12
+ Rake::ExtensionTask.new do |ext|
13
+ ext.name = 'native'
14
+
15
+ ext.ext_dir = 'ext/wapiti'
16
+ ext.lib_dir = 'lib/wapiti'
17
+
18
+ CLEAN.include("#{ext.lib_dir}/native.*")
19
+ CLEAN.include("#{ext.tmp_dir}")
20
+
21
+ end
22
+
23
+ task :build => [:clean] do
24
+ system 'gem build wapiti.gemspec'
25
+ end
26
+
27
+ task :release => [:build] do
28
+ system "git tag #{Wapiti::VERSION}"
29
+ system "gem push wapiti-#{Wapiti::VERSION}.gem"
30
+ end
31
+
32
+ CLEAN.include('*.gem')
33
+ CLEAN.include('*.rbc')
@@ -0,0 +1,392 @@
1
+ /*
2
+ * Wapiti - A linear-chain CRF tool
3
+ *
4
+ * Copyright (c) 2009-2011 CNRS
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25
+ * POSSIBILITY OF SUCH DAMAGE.
26
+ */
27
+
28
+ #include <assert.h>
29
+ #include <math.h>
30
+ #include <stdbool.h>
31
+ #include <stddef.h>
32
+ #include <stdlib.h>
33
+ #include <string.h>
34
+
35
+ #include "wapiti.h"
36
+ #include "gradient.h"
37
+ #include "model.h"
38
+ #include "options.h"
39
+ #include "progress.h"
40
+ #include "sequence.h"
41
+ #include "tools.h"
42
+ #include "vmath.h"
43
+
44
+ /******************************************************************************
45
+ * Blockwise Coordinates descent trainer
46
+ * The gradient and hessian computation used for the BCD is very similar to
47
+ * the generic one define below but there is some important differences:
48
+ * - The forward and backward recursions doesn't have to be performed fully
49
+ * but just in the range of activity of the considered block. So if the
50
+ * block is active only at position t, the alpha recusion is done from 1
51
+ * to t and the beta one from T to t, dividing the amount of computations
52
+ * by 2.
53
+ * - Samely the update of the gradient and hessian have to be done only at
54
+ * position where the block is active, so in the common case where the
55
+ * block is active only once in the sequence, the improvement can be huge.
56
+ * - And finally, there is no need to compute the logloss, which can take a
57
+ * long time due to the computation of the log()s.
58
+ ******************************************************************************/
59
+ typedef struct bcd_s bcd_t;
60
+ struct bcd_s {
61
+ double *ugrd; // [Y]
62
+ double *uhes; // [Y]
63
+ double *bgrd; // [Y][Y]
64
+ double *bhes; // [Y][Y]
65
+ size_t *actpos; // [T]
66
+ size_t actcnt;
67
+ grd_t *grd;
68
+ };
69
+
70
+ /* bcd_soft:
71
+ * The softmax function.
72
+ */
73
+ static double bcd_soft(double z, double r) {
74
+ if (z > r) return z - r;
75
+ if (z < -r) return z + r;
76
+ return 0.0;
77
+ }
78
+
79
+ /* bcd_actpos:
80
+ * List position where the given block is active in the sequence and setup the
81
+ * limits for the fwd/bwd.
82
+ */
83
+ static void bcd_actpos(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
84
+ const int T = seq->len;
85
+ size_t *actpos = bcd->actpos;
86
+ size_t actcnt = 0;
87
+ for (int t = 0; t < T; t++) {
88
+ const pos_t *pos = &(seq->pos[t]);
89
+ bool ok = false;
90
+ if (mdl->kind[o] & 1)
91
+ for (size_t n = 0; !ok && n < pos->ucnt; n++)
92
+ if (pos->uobs[n] == o)
93
+ ok = true;
94
+ if (mdl->kind[o] & 2)
95
+ for (size_t n = 0; !ok && n < pos->bcnt; n++)
96
+ if (pos->bobs[n] == o)
97
+ ok = true;
98
+ if (!ok)
99
+ continue;
100
+ actpos[actcnt++] = t;
101
+ }
102
+ assert(actcnt != 0);
103
+ bcd->actcnt = actcnt;
104
+ bcd->grd->first = actpos[0];
105
+ bcd->grd->last = actpos[actcnt - 1];
106
+ }
107
+
108
+ /* bct_flgradhes:
109
+ * Update the gradient and hessian for <blk> on sequence <seq>. This one is
110
+ * very similar than the trn_spupgrad function but does the computation only
111
+ * at active pos and approximate also the hessian.
112
+ */
113
+ static void bcd_flgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
114
+ const grd_t *grd = bcd->grd;
115
+ const size_t Y = mdl->nlbl;
116
+ const size_t T = seq->len;
117
+ const double (*psi )[T][Y][Y] = (void *)grd->psi;
118
+ const double (*alpha)[T][Y] = (void *)grd->alpha;
119
+ const double (*beta )[T][Y] = (void *)grd->beta;
120
+ const double *unorm = grd->unorm;
121
+ const double *bnorm = grd->bnorm;
122
+ const size_t *actpos = bcd->actpos;
123
+ const size_t actcnt = bcd->actcnt;
124
+ double *ugrd = bcd->ugrd;
125
+ double *uhes = bcd->uhes;
126
+ double *bgrd = bcd->bgrd;
127
+ double *bhes = bcd->bhes;
128
+ // Update the gradient and the hessian but here we sum only on the
129
+ // positions where the block is active for unigrams features
130
+ if (mdl->kind[o] & 1) {
131
+ for (size_t n = 0; n < actcnt; n++) {
132
+ const size_t t = actpos[n];
133
+ for (size_t y = 0; y < Y; y++) {
134
+ const double e = (*alpha)[t][y] * (*beta)[t][y]
135
+ * unorm[t];
136
+ ugrd[y] += e;
137
+ uhes[y] += e * (1.0 - e);
138
+ }
139
+ const size_t y = seq->pos[t].lbl;
140
+ ugrd[y] -= 1.0;
141
+ }
142
+ }
143
+ if ((mdl->kind[o] & 2) == 0)
144
+ return;
145
+ // for bigrams features
146
+ for (size_t n = 0; n < actcnt; n++) {
147
+ const size_t t = actpos[n];
148
+ if (t == 0)
149
+ continue;
150
+ for (size_t yp = 0, d = 0; yp < Y; yp++) {
151
+ for (size_t y = 0; y < Y; y++, d++) {
152
+ double e = (*alpha)[t - 1][yp] * (*beta)[t][y]
153
+ * (*psi)[t][yp][y] * bnorm[t];
154
+ bgrd[d] += e;
155
+ bhes[d] += e * (1.0 - e);
156
+ }
157
+ }
158
+ const size_t yp = seq->pos[t - 1].lbl;
159
+ const size_t y = seq->pos[t ].lbl;
160
+ bgrd[yp * Y + y] -= 1.0;
161
+ }
162
+ }
163
+
164
+ /* bct_spgradhes:
165
+ * Update the gradient and hessian for <blk> on sequence <seq>. This one is
166
+ * very similar than the trn_spupgrad function but does the computation only
167
+ * at active pos and approximate also the hessian.
168
+ */
169
+ static void bcd_spgradhes(mdl_t *mdl, bcd_t *bcd, const seq_t *seq, size_t o) {
170
+ const grd_t *grd = bcd->grd;
171
+ const size_t Y = mdl->nlbl;
172
+ const size_t T = seq->len;
173
+ const double (*psiuni)[T][Y] = (void *)grd->psiuni;
174
+ const double *psival = grd->psi;
175
+ const size_t *psiyp = grd->psiyp;
176
+ const size_t (*psiidx)[T][Y] = (void *)grd->psiidx;
177
+ const size_t *psioff = grd->psioff;
178
+ const double (*alpha)[T][Y] = (void *)grd->alpha;
179
+ const double (*beta )[T][Y] = (void *)grd->beta;
180
+ const double *unorm = grd->unorm;
181
+ const double *bnorm = grd->bnorm;
182
+ const size_t *actpos = bcd->actpos;
183
+ const size_t actcnt = bcd->actcnt;
184
+ double *ugrd = bcd->ugrd;
185
+ double *uhes = bcd->uhes;
186
+ double *bgrd = bcd->bgrd;
187
+ double *bhes = bcd->bhes;
188
+ // Update the gradient and the hessian but here we sum only on the
189
+ // positions where the block is active for unigrams features
190
+ if (mdl->kind[o] & 1) {
191
+ for (size_t n = 0; n < actcnt; n++) {
192
+ const size_t t = actpos[n];
193
+ for (size_t y = 0; y < Y; y++) {
194
+ const double e = (*alpha)[t][y] * (*beta)[t][y]
195
+ * unorm[t];
196
+ ugrd[y] += e;
197
+ uhes[y] += e * (1.0 - e);
198
+ }
199
+ const size_t y = seq->pos[t].lbl;
200
+ ugrd[y] -= 1.0;
201
+ }
202
+ }
203
+ if ((mdl->kind[o] & 2) == 0)
204
+ return;
205
+ // for bigrams features
206
+ for (size_t n = 0; n < actcnt; n++) {
207
+ const size_t t = actpos[n];
208
+ if (t == 0)
209
+ continue;
210
+ // We build the expectation matrix
211
+ double e[Y][Y];
212
+ for (size_t yp = 0; yp < Y; yp++)
213
+ for (size_t y = 0; y < Y; y++)
214
+ e[yp][y] = (*alpha)[t - 1][yp] * (*beta)[t][y]
215
+ * (*psiuni)[t][y] * bnorm[t];
216
+ const size_t off = psioff[t];
217
+ for (size_t n = 0, y = 0; n < (*psiidx)[t][Y - 1]; ) {
218
+ while (n >= (*psiidx)[t][y])
219
+ y++;
220
+ while (n < (*psiidx)[t][y]) {
221
+ const size_t yp = psiyp [off + n];
222
+ const double v = psival[off + n];
223
+ e[yp][y] += e[yp][y] * v;
224
+ n++;
225
+ }
226
+ }
227
+ // And use it
228
+ for (size_t yp = 0, d = 0; yp < Y; yp++) {
229
+ for (size_t y = 0; y < Y; y++, d++) {
230
+ bgrd[d] += e[yp][y];
231
+ bhes[d] += e[yp][y] * (1.0 - e[yp][y]);
232
+ }
233
+ }
234
+ const size_t yp = seq->pos[t - 1].lbl;
235
+ const size_t y = seq->pos[t ].lbl;
236
+ bgrd[yp * Y + y] -= 1.0;
237
+ }
238
+ }
239
+
240
+ /* bct_update:
241
+ * Update the model with the computed gradient and hessian.
242
+ */
243
+ static void bcd_update(mdl_t *mdl, bcd_t *bcd, size_t o) {
244
+ const double rho1 = mdl->opt->rho1;
245
+ const double rho2 = mdl->opt->rho2;
246
+ const double kappa = mdl->opt->bcd.kappa;
247
+ const size_t Y = mdl->nlbl;
248
+ const double *ugrd = bcd->ugrd;
249
+ const double *bgrd = bcd->bgrd;
250
+ double *uhes = bcd->uhes;
251
+ double *bhes = bcd->bhes;
252
+ if (mdl->kind[o] & 1) {
253
+ // Adjust the hessian
254
+ double a = 1.0;
255
+ for (size_t y = 0; y < Y; y++)
256
+ a = max(a, fabs(ugrd[y] / uhes[y]));
257
+ xvm_scale(uhes, uhes, a * kappa, Y);
258
+ // Update the model
259
+ double *w = mdl->theta + mdl->uoff[o];
260
+ for (size_t y = 0; y < Y; y++) {
261
+ double z = uhes[y] * w[y] - ugrd[y];
262
+ double d = uhes[y] + rho2;
263
+ w[y] = bcd_soft(z, rho1) / d;
264
+ }
265
+ }
266
+ if (mdl->kind[o] & 2) {
267
+ // Adjust the hessian
268
+ double a = 1.0;
269
+ for (size_t i = 0; i < Y * Y; i++)
270
+ a = max(a, fabs(bgrd[i] / bhes[i]));
271
+ xvm_scale(bhes, bhes, a * kappa, Y * Y);
272
+ // Update the model
273
+ double *bw = mdl->theta + mdl->boff[o];
274
+ for (size_t i = 0; i < Y * Y; i++) {
275
+ double z = bhes[i] * bw[i] - bgrd[i];
276
+ double d = bhes[i] + rho2;
277
+ bw[i] = bcd_soft(z, rho1) / d;
278
+ }
279
+ }
280
+ }
281
+
282
+ /* trn_bcd
283
+ * Train the model using the blockwise coordinates descend method.
284
+ */
285
+ void trn_bcd(mdl_t *mdl) {
286
+ const size_t Y = mdl->nlbl;
287
+ const size_t O = mdl->nobs;
288
+ const size_t T = mdl->train->mlen;
289
+ const size_t S = mdl->train->nseq;
290
+ const int K = mdl->opt->maxiter;
291
+ // Build the index:
292
+ // Count active sequences per blocks
293
+ info(" - Build the index\n");
294
+ info(" 1/2 -- scan the sequences\n");
295
+ size_t tot = 0, cnt[O], lcl[O];
296
+ for (size_t o = 0; o < O; o++)
297
+ cnt[o] = 0, lcl[o] = none;
298
+ for (size_t s = 0; s < S; s++) {
299
+ // List actives blocks
300
+ const seq_t *seq = mdl->train->seq[s];
301
+ for (int t = 0; t < seq->len; t++) {
302
+ for (size_t b = 0; b < seq->pos[t].ucnt; b++)
303
+ lcl[seq->pos[t].uobs[b]] = s;
304
+ for (size_t b = 0; b < seq->pos[t].bcnt; b++)
305
+ lcl[seq->pos[t].bobs[b]] = s;
306
+ }
307
+ // Updates blocks count
308
+ for (size_t o = 0; o < O; o++)
309
+ cnt[o] += (lcl[o] == s);
310
+ }
311
+ for (size_t o = 0; o < O; o++)
312
+ tot += cnt[o];
313
+ // Allocate memory
314
+ size_t *idx_cnt = xmalloc(sizeof(size_t ) * O);
315
+ size_t **idx_lst = xmalloc(sizeof(size_t *) * O);
316
+ for (size_t o = 0; o < O; o++) {
317
+ idx_cnt[o] = cnt[o];
318
+ idx_lst[o] = xmalloc(sizeof(size_t) * cnt[o]);
319
+ }
320
+ // Populate the index
321
+ info(" 2/2 -- Populate the index\n");
322
+ for (size_t o = 0; o < O; o++)
323
+ cnt[o] = 0, lcl[o] = none;
324
+ for (size_t s = 0; s < S; s++) {
325
+ // List actives blocks
326
+ const seq_t *seq = mdl->train->seq[s];
327
+ for (int t = 0; t < seq->len; t++) {
328
+ for (size_t b = 0; b < seq->pos[t].ucnt; b++)
329
+ lcl[seq->pos[t].uobs[b]] = s;
330
+ for (size_t b = 0; b < seq->pos[t].bcnt; b++)
331
+ lcl[seq->pos[t].bobs[b]] = s;
332
+ }
333
+ // Build index
334
+ for (size_t o = 0; o < O; o++)
335
+ if (lcl[o] == s)
336
+ idx_lst[o][cnt[o]++] = s;
337
+ }
338
+ info(" Done\n");
339
+ // Allocate the specific trainer of BCD
340
+ bcd_t *bcd = xmalloc(sizeof(bcd_t));
341
+ bcd->ugrd = xvm_new(Y);
342
+ bcd->uhes = xvm_new(Y);
343
+ bcd->bgrd = xvm_new(Y * Y);
344
+ bcd->bhes = xvm_new(Y * Y);
345
+ bcd->actpos = xmalloc(sizeof(size_t) * T);
346
+ bcd->grd = grd_new(mdl, NULL);
347
+ // And train the model
348
+ for (int i = 0; i < K; i++) {
349
+ for (size_t o = 0; o < O; o++) {
350
+ // Clear the gradient and the hessian
351
+ for (size_t y = 0, d = 0; y < Y; y++) {
352
+ bcd->ugrd[y] = 0.0;
353
+ bcd->uhes[y] = 0.0;
354
+ for (size_t yp = 0; yp < Y; yp++, d++) {
355
+ bcd->bgrd[d] = 0.0;
356
+ bcd->bhes[d] = 0.0;
357
+ }
358
+ }
359
+ // Process active sequences
360
+ for (size_t s = 0; s < idx_cnt[o]; s++) {
361
+ const size_t id = idx_lst[o][s];
362
+ const seq_t *seq = mdl->train->seq[id];
363
+ bcd_actpos(mdl, bcd, seq, o);
364
+ grd_check(bcd->grd, seq->len);
365
+ if (mdl->opt->sparse) {
366
+ grd_spdopsi(bcd->grd, seq);
367
+ grd_spfwdbwd(bcd->grd, seq);
368
+ bcd_spgradhes(mdl, bcd, seq, o);
369
+ } else {
370
+ grd_fldopsi(bcd->grd, seq);
371
+ grd_flfwdbwd(bcd->grd, seq);
372
+ bcd_flgradhes(mdl, bcd, seq, o);
373
+ }
374
+ }
375
+ // And update the model
376
+ bcd_update(mdl, bcd, o);
377
+ }
378
+ if (!uit_progress(mdl, i + 1, -1.0))
379
+ break;
380
+ }
381
+ // Cleanup memory
382
+ grd_free(bcd->grd);
383
+ xvm_free(bcd->ugrd); xvm_free(bcd->uhes);
384
+ xvm_free(bcd->bgrd); xvm_free(bcd->bhes);
385
+ free(bcd->actpos);
386
+ free(bcd);
387
+ for (size_t o = 0; o < O; o++)
388
+ free(idx_lst[o]);
389
+ free(idx_lst);
390
+ free(idx_cnt);
391
+ }
392
+