opener-opinion-detector-basic 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -0
  3. data/ext/hack/Rakefile +0 -2
  4. data/lib/opener/opinion_detector_basic/version.rb +1 -1
  5. data/opener-opinion-detector-basic.gemspec +0 -1
  6. data/task/compile.rake +1 -1
  7. data/task/requirements.rake +0 -1
  8. metadata +2 -142
  9. data/core/vendor/src/crfsuite/AUTHORS +0 -1
  10. data/core/vendor/src/crfsuite/COPYING +0 -27
  11. data/core/vendor/src/crfsuite/ChangeLog +0 -103
  12. data/core/vendor/src/crfsuite/INSTALL +0 -236
  13. data/core/vendor/src/crfsuite/Makefile.am +0 -19
  14. data/core/vendor/src/crfsuite/Makefile.in +0 -783
  15. data/core/vendor/src/crfsuite/README +0 -183
  16. data/core/vendor/src/crfsuite/aclocal.m4 +0 -9018
  17. data/core/vendor/src/crfsuite/autogen.sh +0 -38
  18. data/core/vendor/src/crfsuite/compile +0 -143
  19. data/core/vendor/src/crfsuite/config.guess +0 -1502
  20. data/core/vendor/src/crfsuite/config.h.in +0 -198
  21. data/core/vendor/src/crfsuite/config.sub +0 -1714
  22. data/core/vendor/src/crfsuite/configure +0 -14273
  23. data/core/vendor/src/crfsuite/configure.in +0 -149
  24. data/core/vendor/src/crfsuite/crfsuite.sln +0 -42
  25. data/core/vendor/src/crfsuite/depcomp +0 -630
  26. data/core/vendor/src/crfsuite/example/chunking.py +0 -49
  27. data/core/vendor/src/crfsuite/example/crfutils.py +0 -179
  28. data/core/vendor/src/crfsuite/example/ner.py +0 -270
  29. data/core/vendor/src/crfsuite/example/pos.py +0 -78
  30. data/core/vendor/src/crfsuite/example/template.py +0 -88
  31. data/core/vendor/src/crfsuite/frontend/Makefile.am +0 -29
  32. data/core/vendor/src/crfsuite/frontend/Makefile.in +0 -640
  33. data/core/vendor/src/crfsuite/frontend/dump.c +0 -116
  34. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +0 -129
  35. data/core/vendor/src/crfsuite/frontend/iwa.c +0 -273
  36. data/core/vendor/src/crfsuite/frontend/iwa.h +0 -65
  37. data/core/vendor/src/crfsuite/frontend/learn.c +0 -439
  38. data/core/vendor/src/crfsuite/frontend/main.c +0 -137
  39. data/core/vendor/src/crfsuite/frontend/option.c +0 -93
  40. data/core/vendor/src/crfsuite/frontend/option.h +0 -86
  41. data/core/vendor/src/crfsuite/frontend/readdata.h +0 -38
  42. data/core/vendor/src/crfsuite/frontend/reader.c +0 -136
  43. data/core/vendor/src/crfsuite/frontend/tag.c +0 -427
  44. data/core/vendor/src/crfsuite/genbinary.sh.in +0 -15
  45. data/core/vendor/src/crfsuite/include/Makefile.am +0 -11
  46. data/core/vendor/src/crfsuite/include/Makefile.in +0 -461
  47. data/core/vendor/src/crfsuite/include/crfsuite.h +0 -1063
  48. data/core/vendor/src/crfsuite/include/crfsuite.hpp +0 -555
  49. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +0 -400
  50. data/core/vendor/src/crfsuite/include/os.h +0 -61
  51. data/core/vendor/src/crfsuite/install-sh +0 -520
  52. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +0 -28
  53. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +0 -21
  54. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +0 -549
  55. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +0 -86
  56. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +0 -524
  57. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +0 -587
  58. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +0 -976
  59. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +0 -46
  60. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +0 -721
  61. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +0 -216
  62. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +0 -353
  63. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +0 -705
  64. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +0 -943
  65. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +0 -352
  66. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +0 -994
  67. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +0 -550
  68. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +0 -492
  69. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +0 -236
  70. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +0 -272
  71. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +0 -106
  72. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +0 -118
  73. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +0 -80
  74. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +0 -91
  75. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +0 -48
  76. data/core/vendor/src/crfsuite/lib/crf/src/params.c +0 -335
  77. data/core/vendor/src/crfsuite/lib/crf/src/params.h +0 -80
  78. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +0 -172
  79. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +0 -46
  80. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +0 -1107
  81. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +0 -160
  82. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +0 -408
  83. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +0 -242
  84. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +0 -507
  85. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +0 -338
  86. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +0 -435
  87. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +0 -341
  88. data/core/vendor/src/crfsuite/ltmain.sh +0 -8413
  89. data/core/vendor/src/crfsuite/missing +0 -376
  90. data/core/vendor/src/crfsuite/swig/Makefile.am +0 -13
  91. data/core/vendor/src/crfsuite/swig/Makefile.in +0 -365
  92. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +0 -2
  93. data/core/vendor/src/crfsuite/swig/export.i +0 -32
  94. data/core/vendor/src/crfsuite/swig/python/README +0 -92
  95. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +0 -329
  96. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +0 -14355
  97. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +0 -63
  98. data/core/vendor/src/crfsuite/swig/python/prepare.sh +0 -9
  99. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +0 -52
  100. data/core/vendor/src/crfsuite/swig/python/sample_train.py +0 -68
  101. data/core/vendor/src/crfsuite/swig/python/setup.py +0 -44
  102. data/core/vendor/src/crfsuite/win32/stdint.h +0 -679
  103. data/core/vendor/src/liblbfgs/AUTHORS +0 -1
  104. data/core/vendor/src/liblbfgs/COPYING +0 -22
  105. data/core/vendor/src/liblbfgs/ChangeLog +0 -120
  106. data/core/vendor/src/liblbfgs/INSTALL +0 -231
  107. data/core/vendor/src/liblbfgs/Makefile.am +0 -10
  108. data/core/vendor/src/liblbfgs/Makefile.in +0 -638
  109. data/core/vendor/src/liblbfgs/NEWS +0 -0
  110. data/core/vendor/src/liblbfgs/README +0 -71
  111. data/core/vendor/src/liblbfgs/aclocal.m4 +0 -6985
  112. data/core/vendor/src/liblbfgs/autogen.sh +0 -38
  113. data/core/vendor/src/liblbfgs/config.guess +0 -1411
  114. data/core/vendor/src/liblbfgs/config.h.in +0 -64
  115. data/core/vendor/src/liblbfgs/config.sub +0 -1500
  116. data/core/vendor/src/liblbfgs/configure +0 -21146
  117. data/core/vendor/src/liblbfgs/configure.in +0 -107
  118. data/core/vendor/src/liblbfgs/depcomp +0 -522
  119. data/core/vendor/src/liblbfgs/include/lbfgs.h +0 -745
  120. data/core/vendor/src/liblbfgs/install-sh +0 -322
  121. data/core/vendor/src/liblbfgs/lbfgs.sln +0 -26
  122. data/core/vendor/src/liblbfgs/lib/Makefile.am +0 -24
  123. data/core/vendor/src/liblbfgs/lib/Makefile.in +0 -499
  124. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +0 -133
  125. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +0 -294
  126. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +0 -298
  127. data/core/vendor/src/liblbfgs/lib/lbfgs.c +0 -1371
  128. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +0 -95
  129. data/core/vendor/src/liblbfgs/ltmain.sh +0 -6426
  130. data/core/vendor/src/liblbfgs/missing +0 -353
  131. data/core/vendor/src/liblbfgs/sample/Makefile.am +0 -15
  132. data/core/vendor/src/liblbfgs/sample/Makefile.in +0 -433
  133. data/core/vendor/src/liblbfgs/sample/sample.c +0 -81
  134. data/core/vendor/src/liblbfgs/sample/sample.cpp +0 -126
  135. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +0 -105
  136. data/core/vendor/src/svm_light/LICENSE.txt +0 -59
  137. data/core/vendor/src/svm_light/Makefile +0 -105
  138. data/core/vendor/src/svm_light/kernel.h +0 -40
  139. data/core/vendor/src/svm_light/svm_classify.c +0 -197
  140. data/core/vendor/src/svm_light/svm_common.c +0 -985
  141. data/core/vendor/src/svm_light/svm_common.h +0 -301
  142. data/core/vendor/src/svm_light/svm_hideo.c +0 -1062
  143. data/core/vendor/src/svm_light/svm_learn.c +0 -4147
  144. data/core/vendor/src/svm_light/svm_learn.h +0 -169
  145. data/core/vendor/src/svm_light/svm_learn_main.c +0 -397
  146. data/core/vendor/src/svm_light/svm_loqo.c +0 -211
  147. data/task/c.rake +0 -36
  148. data/task/submodules.rake +0 -5
@@ -1,435 +0,0 @@
1
- /*
2
- * Online training with Passive Aggressive.
3
- *
4
- * Copyright (c) 2007-2010, Naoaki Okazaki
5
- * All rights reserved.
6
- *
7
- * Redistribution and use in source and binary forms, with or without
8
- * modification, are permitted provided that the following conditions are met:
9
- * * Redistributions of source code must retain the above copyright
10
- * notice, this list of conditions and the following disclaimer.
11
- * * Redistributions in binary form must reproduce the above copyright
12
- * notice, this list of conditions and the following disclaimer in the
13
- * documentation and/or other materials provided with the distribution.
14
- * * Neither the names of the authors nor the names of its contributors
15
- * may be used to endorse or promote products derived from this
16
- * software without specific prior written permission.
17
- *
18
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22
- * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
- */
30
-
31
- /* $Id$ */
32
-
33
- #ifdef HAVE_CONFIG_H
34
- #include <config.h>
35
- #endif/*HAVE_CONFIG_H*/
36
-
37
- #include <os.h>
38
-
39
- #include <stdio.h>
40
- #include <stdlib.h>
41
- #include <time.h>
42
-
43
- #include <crfsuite.h>
44
- #include "crfsuite_internal.h"
45
- #include "logging.h"
46
- #include "params.h"
47
- #include "vecmath.h"
48
-
49
- #define MIN(a, b) ((a) < (b) ? (a) : (b))
50
-
51
- /**
52
- * Training parameters (configurable with crfsuite_params_t interface).
53
- */
54
- typedef struct {
55
- int type;
56
- floatval_t c;
57
- int error_sensitive;
58
- int averaging;
59
- int max_iterations;
60
- floatval_t epsilon;
61
- } training_option_t;
62
-
63
- /**
64
- * Internal data structure for computing the sparse vector F(x, y) - F(x, y').
65
- */
66
- typedef struct {
67
- /* An array of feature indices relevant to the instance. */
68
- int *actives;
69
- int num_actives;
70
- int cap_actives;
71
- char *used;
72
-
73
- /* Coefficient for collecting feature weights. */
74
- floatval_t c;
75
- /* The difference vector [K]. */
76
- floatval_t *delta;
77
- /* The number of features. */
78
- int K;
79
- } delta_t;
80
-
81
- static int delta_init(delta_t *dc, const int K)
82
- {
83
- memset(dc, 0, sizeof(*dc));
84
- dc->used = (char*)calloc(K, sizeof(char));
85
- dc->delta = (floatval_t*)calloc(K, sizeof(floatval_t));
86
- dc->K = K;
87
- if (dc->delta == NULL || dc->used == NULL) {
88
- return 1;
89
- }
90
- return 0;
91
- }
92
-
93
- static void delta_finish(delta_t *dc)
94
- {
95
- free(dc->actives);
96
- free(dc->used);
97
- free(dc->delta);
98
- memset(dc, 0, sizeof(*dc));
99
- }
100
-
101
- static void delta_reset(delta_t *dc)
102
- {
103
- int i;
104
- for (i = 0;i < dc->num_actives;++i) {
105
- int k = dc->actives[i];
106
- dc->delta[k] = 0;
107
- }
108
- dc->num_actives = 0;
109
- }
110
-
111
- static void delta_collect(void *instance, int fid, floatval_t value)
112
- {
113
- delta_t *dc = (delta_t*)instance;
114
-
115
- /* Expand the active feature list if necessary. */
116
- if (dc->cap_actives <= dc->num_actives) {
117
- ++dc->cap_actives;
118
- dc->cap_actives *= 2;
119
- dc->actives = (int*)realloc(dc->actives, sizeof(int) * dc->cap_actives);
120
- }
121
-
122
- dc->actives[dc->num_actives++] = fid;
123
- dc->delta[fid] += dc->c * value;
124
- }
125
-
126
- static void delta_finalize(delta_t *dc)
127
- {
128
- int i, j = 0, k;
129
-
130
- /* Collapse the duplicated indices. */
131
- for (i = 0;i < dc->num_actives;++i) {
132
- k = dc->actives[i];
133
- if (!dc->used[k]) {
134
- dc->actives[j++] = k;
135
- dc->used[k] = 1;
136
- }
137
- }
138
- dc->num_actives = j; /* This is the distinct number of indices. */
139
-
140
- /* Reset the used flag. */
141
- for (i = 0;i < dc->num_actives;++i) {
142
- k = dc->actives[i];
143
- dc->used[k] = 0;
144
- }
145
- }
146
-
147
- static floatval_t delta_norm2(delta_t *dc)
148
- {
149
- int i;
150
- floatval_t norm2 = 0.;
151
-
152
- for (i = 0;i < dc->num_actives;++i) {
153
- int k = dc->actives[i];
154
- norm2 += dc->delta[k] * dc->delta[k];
155
- }
156
- return norm2;
157
- }
158
-
159
- static void delta_add(delta_t *dc, floatval_t *w, floatval_t *ws, const floatval_t tau, const floatval_t u)
160
- {
161
- int i;
162
- const floatval_t tauu = tau * u;
163
-
164
- for (i = 0;i < dc->num_actives;++i) {
165
- int k = dc->actives[i];
166
- w[k] += tau * dc->delta[k];
167
- ws[k] += tauu * dc->delta[k];
168
- }
169
- }
170
-
171
- static int diff(int *x, int *y, int n)
172
- {
173
- int i, d = 0;
174
- for (i = 0;i < n;++i) {
175
- if (x[i] != y[i]) {
176
- ++d;
177
- }
178
- }
179
- return d;
180
- }
181
-
182
- static floatval_t cost_insensitive(floatval_t err, floatval_t d)
183
- {
184
- return err + 1.;
185
- }
186
-
187
- static floatval_t cost_sensitive(floatval_t err, floatval_t d)
188
- {
189
- return err + sqrt(d);
190
- }
191
-
192
- static floatval_t tau0(floatval_t cost, floatval_t norm, floatval_t c)
193
- {
194
- return cost / norm;
195
- }
196
-
197
- static floatval_t tau1(floatval_t cost, floatval_t norm, floatval_t c)
198
- {
199
- return MIN(c, cost / norm);
200
- }
201
-
202
- static floatval_t tau2(floatval_t cost, floatval_t norm, floatval_t c)
203
- {
204
- return cost / (norm + 0.5 / c);
205
- }
206
-
207
- static int exchange_options(crfsuite_params_t* params, training_option_t* opt, int mode)
208
- {
209
- BEGIN_PARAM_MAP(params, mode)
210
- DDX_PARAM_INT(
211
- "type", opt->type, 1,
212
- "The strategy for updating feature weights: {\n"
213
- " 0: PA without slack variables,\n"
214
- " 1: PA type I,\n"
215
- " 2: PA type II\n"
216
- "}.\n"
217
- )
218
- DDX_PARAM_FLOAT(
219
- "c", opt->c, 1.,
220
- "The aggressiveness parameter."
221
- )
222
- DDX_PARAM_INT(
223
- "error_sensitive", opt->error_sensitive, 1,
224
- "Consider the number of incorrect labels to the cost function."
225
- )
226
- DDX_PARAM_INT(
227
- "averaging", opt->averaging, 1,
228
- "Compute the average of feature weights (similarly to Averaged Perceptron)."
229
- )
230
- DDX_PARAM_INT(
231
- "max_iterations", opt->max_iterations, 100,
232
- "The maximum number of iterations."
233
- )
234
- DDX_PARAM_FLOAT(
235
- "epsilon", opt->epsilon, 0.,
236
- "The stopping criterion (the mean loss)."
237
- )
238
- END_PARAM_MAP()
239
-
240
- return 0;
241
- }
242
-
243
- void crfsuite_train_passive_aggressive_init(crfsuite_params_t* params)
244
- {
245
- exchange_options(params, NULL, 0);
246
- }
247
-
248
- int crfsuite_train_passive_aggressive(
249
- encoder_t *gm,
250
- dataset_t *trainset,
251
- dataset_t *testset,
252
- crfsuite_params_t *params,
253
- logging_t *lg,
254
- floatval_t **ptr_w
255
- )
256
- {
257
- int n, i, u, ret = 0;
258
- int *viterbi = NULL;
259
- floatval_t *w = NULL, *ws = NULL, *wa = NULL;
260
- const int N = trainset->num_instances;
261
- const int K = gm->num_features;
262
- const int T = gm->cap_items;
263
- training_option_t opt;
264
- delta_t dc;
265
- clock_t begin = clock();
266
- floatval_t (*cost_function)(floatval_t err, floatval_t d) = NULL;
267
- floatval_t (*tau_function)(floatval_t cost, floatval_t norm, floatval_t c) = NULL;
268
-
269
- /* Initialize the variable. */
270
- if (delta_init(&dc, K) != 0) {
271
- ret = CRFSUITEERR_OUTOFMEMORY;
272
- goto error_exit;
273
- }
274
-
275
- /* Obtain parameter values. */
276
- exchange_options(params, &opt, -1);
277
-
278
- /* Allocate arrays. */
279
- w = (floatval_t*)calloc(sizeof(floatval_t), K);
280
- ws = (floatval_t*)calloc(sizeof(floatval_t), K);
281
- wa = (floatval_t*)calloc(sizeof(floatval_t), K);
282
- viterbi = (int*)calloc(sizeof(int), T);
283
- if (w == NULL || ws == NULL || wa == NULL || viterbi == NULL) {
284
- ret = CRFSUITEERR_OUTOFMEMORY;
285
- goto error_exit;
286
- }
287
-
288
- /* Set the cost function for instances. */
289
- if (opt.error_sensitive) {
290
- cost_function = cost_sensitive;
291
- } else {
292
- cost_function = cost_insensitive;
293
- }
294
-
295
- /* Set the routine for computing tau (i.e., PA, PA-I, PA-II). */
296
- if (opt.type == 1) {
297
- tau_function = tau1;
298
- } else if (opt.type == 2) {
299
- tau_function = tau2;
300
- } else {
301
- tau_function = tau0;
302
- }
303
-
304
- /* Show the parameters. */
305
- logging(lg, "Passive Aggressive\n");
306
- logging(lg, "type: %d\n", opt.type);
307
- logging(lg, "c: %f\n", opt.c);
308
- logging(lg, "error_sensitive: %d\n", opt.error_sensitive);
309
- logging(lg, "averaging: %d\n", opt.averaging);
310
- logging(lg, "max_iterations: %d\n", opt.max_iterations);
311
- logging(lg, "epsilon: %f\n", opt.epsilon);
312
- logging(lg, "\n");
313
-
314
- u = 1;
315
-
316
- /* Loop for epoch. */
317
- for (i = 0;i < opt.max_iterations;++i) {
318
- floatval_t norm = 0., sum_loss = 0.;
319
- clock_t iteration_begin = clock();
320
-
321
- /* Shuffle the instances. */
322
- dataset_shuffle(trainset);
323
-
324
- /* Loop for each instance. */
325
- for (n = 0;n < N;++n) {
326
- int d = 0;
327
- floatval_t sv;
328
- const crfsuite_instance_t *inst = dataset_get(trainset, n);
329
-
330
- /* Set the feature weights to the encoder. */
331
- gm->set_weights(gm, w, 1.);
332
- gm->set_instance(gm, inst);
333
-
334
- /* Tag the sequence with the current model. */
335
- gm->viterbi(gm, viterbi, &sv);
336
-
337
- /* Compute the number of different labels. */
338
- d = diff(inst->labels, viterbi, inst->num_items);
339
- if (0 < d) {
340
- floatval_t sc, norm2;
341
- floatval_t tau, cost;
342
-
343
- /*
344
- Compute the cost of this instance.
345
- */
346
- gm->score(gm, inst->labels, &sc);
347
- cost = cost_function(sv - sc, (double)d);
348
-
349
- /* Initialize delta[k] = 0. */
350
- delta_reset(&dc);
351
-
352
- /*
353
- For every feature k on the correct path:
354
- delta[k] += 1;
355
- */
356
- dc.c = 1;
357
- gm->features_on_path(gm, inst, inst->labels, delta_collect, &dc);
358
-
359
- /*
360
- For every feature k on the Viterbi path:
361
- delta[k] -= 1;
362
- */
363
- dc.c = -1;
364
- gm->features_on_path(gm, inst, viterbi, delta_collect, &dc);
365
-
366
- delta_finalize(&dc);
367
-
368
- /*
369
- Compute tau (dpending on PA, PA-I, and PA-II).
370
- */
371
- norm2 = delta_norm2(&dc);
372
- tau = tau_function(cost, norm2, opt.c);
373
-
374
- /*
375
- Update the feature weights:
376
- w[k] += tau * delta[k]
377
- ws[k] += tau * u * delta[k]
378
- */
379
- delta_add(&dc, w, ws, tau, u);
380
-
381
- sum_loss += cost;
382
- }
383
- ++u;
384
- }
385
-
386
- if (opt.averaging) {
387
- /* Perform averaging to wa. */
388
- veccopy(wa, w, K);
389
- vecasub(wa, 1./u, ws, K);
390
- } else {
391
- /* Simply copy the weights to wa. */
392
- veccopy(wa, w, K);
393
- }
394
-
395
- /* Output the progress. */
396
- logging(lg, "***** Iteration #%d *****\n", i+1);
397
- logging(lg, "Loss: %f\n", sum_loss);
398
- logging(lg, "Feature norm: %f\n", sqrt(vecdot(w, w, K)));
399
- logging(lg, "Seconds required for this iteration: %.3f\n", (clock() - iteration_begin) / (double)CLOCKS_PER_SEC);
400
-
401
- /* Holdout evaluation if necessary. */
402
- if (testset != NULL) {
403
- holdout_evaluation(gm, testset, wa, lg);
404
- }
405
-
406
- logging(lg, "\n");
407
-
408
- /* Convergence test. */
409
- if (sum_loss / N < opt.epsilon) {
410
- logging(lg, "Terminated with the stopping criterion\n");
411
- logging(lg, "\n");
412
- break;
413
- }
414
- }
415
-
416
- logging(lg, "Total seconds required for training: %.3f\n", (clock() - begin) / (double)CLOCKS_PER_SEC);
417
- logging(lg, "\n");
418
-
419
- free(viterbi);
420
- free(ws);
421
- free(w);
422
- *ptr_w = wa;
423
- delta_finish(&dc);
424
- return ret;
425
-
426
- error_exit:
427
- free(viterbi);
428
- free(wa);
429
- free(ws);
430
- free(w);
431
- *ptr_w = NULL;
432
- delta_finish(&dc);
433
-
434
- return ret;
435
- }