opener-opinion-detector-basic 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +30 -0
- data/bin/opinion-detector-basic +19 -0
- data/bin/opinion-detector-basic-server +10 -0
- data/config.ru +4 -0
- data/core/opinion_detector_basic_multi.py +499 -0
- data/core/packages/KafNafParser-1.3.tar.gz +0 -0
- data/core/packages/VUA_pylib-1.4.tar.gz +0 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
- data/core/vendor/src/crfsuite/AUTHORS +1 -0
- data/core/vendor/src/crfsuite/COPYING +27 -0
- data/core/vendor/src/crfsuite/ChangeLog +103 -0
- data/core/vendor/src/crfsuite/INSTALL +236 -0
- data/core/vendor/src/crfsuite/Makefile.am +19 -0
- data/core/vendor/src/crfsuite/Makefile.in +783 -0
- data/core/vendor/src/crfsuite/README +183 -0
- data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
- data/core/vendor/src/crfsuite/autogen.sh +38 -0
- data/core/vendor/src/crfsuite/compile +143 -0
- data/core/vendor/src/crfsuite/config.guess +1502 -0
- data/core/vendor/src/crfsuite/config.h.in +198 -0
- data/core/vendor/src/crfsuite/config.sub +1714 -0
- data/core/vendor/src/crfsuite/configure +14273 -0
- data/core/vendor/src/crfsuite/configure.in +149 -0
- data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
- data/core/vendor/src/crfsuite/depcomp +630 -0
- data/core/vendor/src/crfsuite/example/chunking.py +49 -0
- data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
- data/core/vendor/src/crfsuite/example/ner.py +270 -0
- data/core/vendor/src/crfsuite/example/pos.py +78 -0
- data/core/vendor/src/crfsuite/example/template.py +88 -0
- data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
- data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
- data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
- data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
- data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
- data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
- data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
- data/core/vendor/src/crfsuite/frontend/main.c +137 -0
- data/core/vendor/src/crfsuite/frontend/option.c +93 -0
- data/core/vendor/src/crfsuite/frontend/option.h +86 -0
- data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
- data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
- data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
- data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
- data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
- data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
- data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
- data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
- data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
- data/core/vendor/src/crfsuite/include/os.h +61 -0
- data/core/vendor/src/crfsuite/install-sh +520 -0
- data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
- data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
- data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
- data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
- data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
- data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
- data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
- data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
- data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
- data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
- data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
- data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
- data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
- data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
- data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
- data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
- data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
- data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
- data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
- data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
- data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
- data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
- data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
- data/core/vendor/src/crfsuite/missing +376 -0
- data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
- data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
- data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
- data/core/vendor/src/crfsuite/swig/export.i +32 -0
- data/core/vendor/src/crfsuite/swig/python/README +92 -0
- data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
- data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
- data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
- data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
- data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
- data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
- data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
- data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
- data/core/vendor/src/liblbfgs/AUTHORS +1 -0
- data/core/vendor/src/liblbfgs/COPYING +22 -0
- data/core/vendor/src/liblbfgs/ChangeLog +120 -0
- data/core/vendor/src/liblbfgs/INSTALL +231 -0
- data/core/vendor/src/liblbfgs/Makefile.am +10 -0
- data/core/vendor/src/liblbfgs/Makefile.in +638 -0
- data/core/vendor/src/liblbfgs/NEWS +0 -0
- data/core/vendor/src/liblbfgs/README +71 -0
- data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
- data/core/vendor/src/liblbfgs/autogen.sh +38 -0
- data/core/vendor/src/liblbfgs/config.guess +1411 -0
- data/core/vendor/src/liblbfgs/config.h.in +64 -0
- data/core/vendor/src/liblbfgs/config.sub +1500 -0
- data/core/vendor/src/liblbfgs/configure +21146 -0
- data/core/vendor/src/liblbfgs/configure.in +107 -0
- data/core/vendor/src/liblbfgs/depcomp +522 -0
- data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
- data/core/vendor/src/liblbfgs/install-sh +322 -0
- data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
- data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
- data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
- data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
- data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
- data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
- data/core/vendor/src/liblbfgs/missing +353 -0
- data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
- data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
- data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
- data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
- data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
- data/core/vendor/src/svm_light/LICENSE.txt +59 -0
- data/core/vendor/src/svm_light/Makefile +105 -0
- data/core/vendor/src/svm_light/kernel.h +40 -0
- data/core/vendor/src/svm_light/svm_classify.c +197 -0
- data/core/vendor/src/svm_light/svm_common.c +985 -0
- data/core/vendor/src/svm_light/svm_common.h +301 -0
- data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
- data/core/vendor/src/svm_light/svm_learn.c +4147 -0
- data/core/vendor/src/svm_light/svm_learn.h +169 -0
- data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
- data/core/vendor/src/svm_light/svm_loqo.c +211 -0
- data/ext/hack/Rakefile +17 -0
- data/ext/hack/support.rb +88 -0
- data/lib/opener/opinion_detector_basic.rb +91 -0
- data/lib/opener/opinion_detector_basic/public/markdown.css +284 -0
- data/lib/opener/opinion_detector_basic/server.rb +16 -0
- data/lib/opener/opinion_detector_basic/version.rb +5 -0
- data/lib/opener/opinion_detector_basic/views/index.erb +97 -0
- data/lib/opener/opinion_detector_basic/views/result.erb +15 -0
- data/opener-opinion-detector-basic.gemspec +36 -0
- data/pre_build_requirements.txt +1 -0
- metadata +309 -0
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Online training with averaged perceptron.
|
|
3
|
+
*
|
|
4
|
+
* Copyright (c) 2007-2010, Naoaki Okazaki
|
|
5
|
+
* All rights reserved.
|
|
6
|
+
*
|
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
|
14
|
+
* * Neither the names of the authors nor the names of its contributors
|
|
15
|
+
* may be used to endorse or promote products derived from this
|
|
16
|
+
* software without specific prior written permission.
|
|
17
|
+
*
|
|
18
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
19
|
+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
20
|
+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
21
|
+
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
|
22
|
+
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
23
|
+
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
24
|
+
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
25
|
+
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
26
|
+
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
27
|
+
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
28
|
+
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
/* $Id$ */
|
|
32
|
+
|
|
33
|
+
#ifdef HAVE_CONFIG_H
|
|
34
|
+
#include <config.h>
|
|
35
|
+
#endif/*HAVE_CONFIG_H*/
|
|
36
|
+
|
|
37
|
+
#include <os.h>
|
|
38
|
+
|
|
39
|
+
#include <stdio.h>
|
|
40
|
+
#include <stdlib.h>
|
|
41
|
+
#include <time.h>
|
|
42
|
+
|
|
43
|
+
#include <crfsuite.h>
|
|
44
|
+
#include "crfsuite_internal.h"
|
|
45
|
+
#include "logging.h"
|
|
46
|
+
#include "params.h"
|
|
47
|
+
#include "vecmath.h"
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Training parameters (configurable with crfsuite_params_t interface).
|
|
51
|
+
*/
|
|
52
|
+
typedef struct {
|
|
53
|
+
int max_iterations;
|
|
54
|
+
floatval_t epsilon;
|
|
55
|
+
} training_option_t;
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Internal data structure for updating (averaging) feature weights.
|
|
59
|
+
*/
|
|
60
|
+
typedef struct {
|
|
61
|
+
floatval_t *w;
|
|
62
|
+
floatval_t *ws;
|
|
63
|
+
floatval_t c;
|
|
64
|
+
floatval_t cs;
|
|
65
|
+
} update_data;
|
|
66
|
+
|
|
67
|
+
static void update_weights(void *instance, int fid, floatval_t value)
|
|
68
|
+
{
|
|
69
|
+
update_data *ud = (update_data*)instance;
|
|
70
|
+
ud->w[fid] += ud->c * value;
|
|
71
|
+
ud->ws[fid] += ud->cs * value;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
static int diff(int *x, int *y, int n)
|
|
75
|
+
{
|
|
76
|
+
int i, d = 0;
|
|
77
|
+
for (i = 0;i < n;++i) {
|
|
78
|
+
if (x[i] != y[i]) {
|
|
79
|
+
++d;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return d;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
static int exchange_options(crfsuite_params_t* params, training_option_t* opt, int mode)
|
|
86
|
+
{
|
|
87
|
+
BEGIN_PARAM_MAP(params, mode)
|
|
88
|
+
DDX_PARAM_INT(
|
|
89
|
+
"max_iterations", opt->max_iterations, 100,
|
|
90
|
+
"The maximum number of iterations."
|
|
91
|
+
)
|
|
92
|
+
DDX_PARAM_FLOAT(
|
|
93
|
+
"epsilon", opt->epsilon, 0.,
|
|
94
|
+
"The stopping criterion (the ratio of incorrect label predictions)."
|
|
95
|
+
)
|
|
96
|
+
END_PARAM_MAP()
|
|
97
|
+
|
|
98
|
+
return 0;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
void crfsuite_train_averaged_perceptron_init(crfsuite_params_t* params)
|
|
102
|
+
{
|
|
103
|
+
exchange_options(params, NULL, 0);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
int crfsuite_train_averaged_perceptron(
|
|
107
|
+
encoder_t *gm,
|
|
108
|
+
dataset_t *trainset,
|
|
109
|
+
dataset_t *testset,
|
|
110
|
+
crfsuite_params_t *params,
|
|
111
|
+
logging_t *lg,
|
|
112
|
+
floatval_t **ptr_w
|
|
113
|
+
)
|
|
114
|
+
{
|
|
115
|
+
int n, i, c, ret = 0;
|
|
116
|
+
int *viterbi = NULL;
|
|
117
|
+
floatval_t *w = NULL;
|
|
118
|
+
floatval_t *ws = NULL;
|
|
119
|
+
floatval_t *wa = NULL;
|
|
120
|
+
const int N = trainset->num_instances;
|
|
121
|
+
const int K = gm->num_features;
|
|
122
|
+
const int T = gm->cap_items;
|
|
123
|
+
training_option_t opt;
|
|
124
|
+
update_data ud;
|
|
125
|
+
clock_t begin = clock();
|
|
126
|
+
|
|
127
|
+
/* Initialize the variable. */
|
|
128
|
+
memset(&ud, 0, sizeof(ud));
|
|
129
|
+
|
|
130
|
+
/* Obtain parameter values. */
|
|
131
|
+
exchange_options(params, &opt, -1);
|
|
132
|
+
|
|
133
|
+
/* Allocate arrays. */
|
|
134
|
+
w = (floatval_t*)calloc(sizeof(floatval_t), K);
|
|
135
|
+
ws = (floatval_t*)calloc(sizeof(floatval_t), K);
|
|
136
|
+
wa = (floatval_t*)calloc(sizeof(floatval_t), K);
|
|
137
|
+
viterbi = (int*)calloc(sizeof(int), T);
|
|
138
|
+
if (w == NULL || ws == NULL || wa == NULL || viterbi == NULL) {
|
|
139
|
+
ret = CRFSUITEERR_OUTOFMEMORY;
|
|
140
|
+
goto error_exit;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/* Show the parameters. */
|
|
144
|
+
logging(lg, "Averaged perceptron\n");
|
|
145
|
+
logging(lg, "max_iterations: %d\n", opt.max_iterations);
|
|
146
|
+
logging(lg, "epsilon: %f\n", opt.epsilon);
|
|
147
|
+
logging(lg, "\n");
|
|
148
|
+
|
|
149
|
+
c = 1;
|
|
150
|
+
ud.w = w;
|
|
151
|
+
ud.ws = ws;
|
|
152
|
+
|
|
153
|
+
/* Loop for epoch. */
|
|
154
|
+
for (i = 0;i < opt.max_iterations;++i) {
|
|
155
|
+
floatval_t norm = 0., loss = 0.;
|
|
156
|
+
clock_t iteration_begin = clock();
|
|
157
|
+
|
|
158
|
+
/* Shuffle the instances. */
|
|
159
|
+
dataset_shuffle(trainset);
|
|
160
|
+
|
|
161
|
+
/* Loop for each instance. */
|
|
162
|
+
for (n = 0;n < N;++n) {
|
|
163
|
+
int d = 0;
|
|
164
|
+
floatval_t score;
|
|
165
|
+
const crfsuite_instance_t *inst = dataset_get(trainset, n);
|
|
166
|
+
|
|
167
|
+
/* Set the feature weights to the encoder. */
|
|
168
|
+
gm->set_weights(gm, w, 1.);
|
|
169
|
+
gm->set_instance(gm, inst);
|
|
170
|
+
|
|
171
|
+
/* Tag the sequence with the current model. */
|
|
172
|
+
gm->viterbi(gm, viterbi, &score);
|
|
173
|
+
|
|
174
|
+
/* Compute the number of different labels. */
|
|
175
|
+
d = diff(inst->labels, viterbi, inst->num_items);
|
|
176
|
+
if (0 < d) {
|
|
177
|
+
/*
|
|
178
|
+
For every feature k on the correct path:
|
|
179
|
+
w[k] += 1; ws[k] += c;
|
|
180
|
+
*/
|
|
181
|
+
ud.c = 1;
|
|
182
|
+
ud.cs = c;
|
|
183
|
+
gm->features_on_path(gm, inst, inst->labels, update_weights, &ud);
|
|
184
|
+
|
|
185
|
+
/*
|
|
186
|
+
For every feature k on the Viterbi path:
|
|
187
|
+
w[k] -= 1; ws[k] -= c;
|
|
188
|
+
*/
|
|
189
|
+
ud.c = -1;
|
|
190
|
+
ud.cs = -c;
|
|
191
|
+
gm->features_on_path(gm, inst, viterbi, update_weights, &ud);
|
|
192
|
+
|
|
193
|
+
/* We define the loss as the ratio of wrongly predicted labels. */
|
|
194
|
+
loss += d / (floatval_t)inst->num_items;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
++c;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/* Perform averaging to wa. */
|
|
201
|
+
veccopy(wa, w, K);
|
|
202
|
+
vecasub(wa, 1./c, ws, K);
|
|
203
|
+
|
|
204
|
+
/* Output the progress. */
|
|
205
|
+
logging(lg, "***** Iteration #%d *****\n", i+1);
|
|
206
|
+
logging(lg, "Loss: %f\n", loss);
|
|
207
|
+
logging(lg, "Feature norm: %f\n", sqrt(vecdot(wa, wa, K)));
|
|
208
|
+
logging(lg, "Seconds required for this iteration: %.3f\n", (clock() - iteration_begin) / (double)CLOCKS_PER_SEC);
|
|
209
|
+
|
|
210
|
+
/* Holdout evaluation if necessary. */
|
|
211
|
+
if (testset != NULL) {
|
|
212
|
+
holdout_evaluation(gm, testset, wa, lg);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
logging(lg, "\n");
|
|
216
|
+
|
|
217
|
+
/* Convergence test. */
|
|
218
|
+
if (loss / N < opt.epsilon) {
|
|
219
|
+
logging(lg, "Terminated with the stopping criterion\n");
|
|
220
|
+
logging(lg, "\n");
|
|
221
|
+
break;
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
logging(lg, "Total seconds required for training: %.3f\n", (clock() - begin) / (double)CLOCKS_PER_SEC);
|
|
226
|
+
logging(lg, "\n");
|
|
227
|
+
|
|
228
|
+
free(viterbi);
|
|
229
|
+
free(ws);
|
|
230
|
+
free(w);
|
|
231
|
+
*ptr_w = wa;
|
|
232
|
+
return ret;
|
|
233
|
+
|
|
234
|
+
error_exit:
|
|
235
|
+
free(viterbi);
|
|
236
|
+
free(wa);
|
|
237
|
+
free(ws);
|
|
238
|
+
free(w);
|
|
239
|
+
*ptr_w = NULL;
|
|
240
|
+
|
|
241
|
+
return ret;
|
|
242
|
+
}
|
|
@@ -0,0 +1,507 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Online training with L2-regularized Stochastic Gradient Descent (SGD).
|
|
3
|
+
*
|
|
4
|
+
* Copyright (c) 2007-2010, Naoaki Okazaki
|
|
5
|
+
* All rights reserved.
|
|
6
|
+
*
|
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
|
14
|
+
* * Neither the names of the authors nor the names of its contributors
|
|
15
|
+
* may be used to endorse or promote products derived from this
|
|
16
|
+
* software without specific prior written permission.
|
|
17
|
+
*
|
|
18
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
19
|
+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
20
|
+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
21
|
+
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
|
22
|
+
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
23
|
+
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
24
|
+
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
25
|
+
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
26
|
+
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
27
|
+
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
28
|
+
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
/* $Id$ */
|
|
32
|
+
|
|
33
|
+
/*
|
|
34
|
+
SGD for L2-regularized MAP estimation.
|
|
35
|
+
|
|
36
|
+
The iterative algorithm is inspired by Pegasos:
|
|
37
|
+
|
|
38
|
+
Shai Shalev-Shwartz, Yoram Singer, and Nathan Srebro.
|
|
39
|
+
Pegasos: Primal Estimated sub-GrAdient SOlver for SVM.
|
|
40
|
+
In Proc. of ICML 2007, pp 807-814, 2007.
|
|
41
|
+
|
|
42
|
+
The calibration strategy is inspired by the implementation of sgd:
|
|
43
|
+
http://leon.bottou.org/projects/sgd
|
|
44
|
+
written by Léon Bottou.
|
|
45
|
+
|
|
46
|
+
The objective function to minimize is:
|
|
47
|
+
|
|
48
|
+
f(w) = (lambda/2) * ||w||^2 + (1/N) * \sum_i^N log P^i(y|x)
|
|
49
|
+
lambda = 2 * C / N
|
|
50
|
+
|
|
51
|
+
The original version of the Pegasos algorithm.
|
|
52
|
+
|
|
53
|
+
0) Initialization
|
|
54
|
+
t = t0
|
|
55
|
+
k = [the batch size]
|
|
56
|
+
1) Computing the learning rate (eta).
|
|
57
|
+
eta = 1 / (lambda * t)
|
|
58
|
+
2) Updating feature weights.
|
|
59
|
+
w = (1 - eta * lambda) w - (eta / k) \sum_i (oexp - mexp)
|
|
60
|
+
3) Projecting feature weights within an L2-ball.
|
|
61
|
+
w = min{1, (1/sqrt(lambda))/||w||} * w
|
|
62
|
+
4) Goto 1 until convergence.
|
|
63
|
+
|
|
64
|
+
This implementation omit the step 3) because it makes the source code
|
|
65
|
+
tricky (in order to maintain L2-norm of feature weights at any time) and
|
|
66
|
+
because the project step does not have a strong impact to the quality of
|
|
67
|
+
solution.
|
|
68
|
+
|
|
69
|
+
A naive implementation requires O(K) computations for steps 2,
|
|
70
|
+
where K is the total number of features. This code implements the procedure
|
|
71
|
+
in an efficient way:
|
|
72
|
+
|
|
73
|
+
0) Initialization
|
|
74
|
+
decay = 1
|
|
75
|
+
1) Computing various factors
|
|
76
|
+
eta = 1 / (lambda * t)
|
|
77
|
+
decay *= (1 - eta * lambda)
|
|
78
|
+
gain = (eta / k) / decay
|
|
79
|
+
2) Updating feature weights
|
|
80
|
+
Updating feature weights from observation expectation:
|
|
81
|
+
delta = gain * (1.0) * f(x,y)
|
|
82
|
+
w += delta
|
|
83
|
+
Updating feature weights from model expectation:
|
|
84
|
+
delta = gain * (-P(y|x)) * f(x,y)
|
|
85
|
+
w += delta
|
|
86
|
+
4) Goto 1 until convergence.
|
|
87
|
+
*/
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
#ifdef HAVE_CONFIG_H
|
|
91
|
+
#include <config.h>
|
|
92
|
+
#endif/*HAVE_CONFIG_H*/
|
|
93
|
+
|
|
94
|
+
#include <os.h>
|
|
95
|
+
|
|
96
|
+
#include <float.h>
|
|
97
|
+
#include <stdio.h>
|
|
98
|
+
#include <stdlib.h>
|
|
99
|
+
#include <string.h>
|
|
100
|
+
#include <time.h>
|
|
101
|
+
#include <math.h>
|
|
102
|
+
|
|
103
|
+
#include <crfsuite.h>
|
|
104
|
+
#include "crfsuite_internal.h"
|
|
105
|
+
|
|
106
|
+
#include "logging.h"
|
|
107
|
+
#include "params.h"
|
|
108
|
+
#include "crf1d.h"
|
|
109
|
+
#include "vecmath.h"
|
|
110
|
+
|
|
111
|
+
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
|
112
|
+
|
|
113
|
+
typedef struct {
|
|
114
|
+
floatval_t c2;
|
|
115
|
+
floatval_t lambda;
|
|
116
|
+
floatval_t t0;
|
|
117
|
+
int max_iterations;
|
|
118
|
+
int period;
|
|
119
|
+
floatval_t delta;
|
|
120
|
+
floatval_t calibration_eta;
|
|
121
|
+
floatval_t calibration_rate;
|
|
122
|
+
int calibration_samples;
|
|
123
|
+
int calibration_candidates;
|
|
124
|
+
int calibration_max_trials;
|
|
125
|
+
} training_option_t;
|
|
126
|
+
|
|
127
|
+
static int l2sgd(
|
|
128
|
+
encoder_t *gm,
|
|
129
|
+
dataset_t *trainset,
|
|
130
|
+
dataset_t *testset,
|
|
131
|
+
floatval_t *w,
|
|
132
|
+
logging_t *lg,
|
|
133
|
+
const int N,
|
|
134
|
+
const floatval_t t0,
|
|
135
|
+
const floatval_t lambda,
|
|
136
|
+
const int num_epochs,
|
|
137
|
+
int calibration,
|
|
138
|
+
int period,
|
|
139
|
+
const floatval_t epsilon,
|
|
140
|
+
floatval_t *ptr_loss
|
|
141
|
+
)
|
|
142
|
+
{
|
|
143
|
+
int i, epoch, ret = 0;
|
|
144
|
+
floatval_t t = 0;
|
|
145
|
+
floatval_t loss = 0, sum_loss = 0;
|
|
146
|
+
floatval_t best_sum_loss = DBL_MAX;
|
|
147
|
+
floatval_t eta, gain, decay = 1.;
|
|
148
|
+
floatval_t improvement = 0.;
|
|
149
|
+
floatval_t norm2 = 0.;
|
|
150
|
+
floatval_t *pf = NULL;
|
|
151
|
+
floatval_t *best_w = NULL;
|
|
152
|
+
clock_t clk_prev, clk_begin = clock();
|
|
153
|
+
const int K = gm->num_features;
|
|
154
|
+
|
|
155
|
+
if (!calibration) {
|
|
156
|
+
pf = (floatval_t*)malloc(sizeof(floatval_t) * period);
|
|
157
|
+
best_w = (floatval_t*)calloc(K, sizeof(floatval_t));
|
|
158
|
+
if (pf == NULL || best_w == NULL) {
|
|
159
|
+
ret = CRFSUITEERR_OUTOFMEMORY;
|
|
160
|
+
goto error_exit;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/* Initialize the feature weights. */
|
|
165
|
+
vecset(w, 0, K);
|
|
166
|
+
|
|
167
|
+
/* Loop for epochs. */
|
|
168
|
+
for (epoch = 1;epoch <= num_epochs;++epoch) {
|
|
169
|
+
clk_prev = clock();
|
|
170
|
+
|
|
171
|
+
if (!calibration) {
|
|
172
|
+
logging(lg, "***** Epoch #%d *****\n", epoch);
|
|
173
|
+
/* Shuffle the training instances. */
|
|
174
|
+
dataset_shuffle(trainset);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/* Loop for instances. */
|
|
178
|
+
sum_loss = 0.;
|
|
179
|
+
for (i = 0;i < N;++i) {
|
|
180
|
+
const crfsuite_instance_t *inst = dataset_get(trainset, i);
|
|
181
|
+
|
|
182
|
+
/* Update various factors. */
|
|
183
|
+
eta = 1 / (lambda * (t0 + t));
|
|
184
|
+
decay *= (1.0 - eta * lambda);
|
|
185
|
+
gain = eta / decay;
|
|
186
|
+
|
|
187
|
+
/* Compute the loss and gradients for the instance. */
|
|
188
|
+
gm->set_weights(gm, w, decay);
|
|
189
|
+
gm->set_instance(gm, inst);
|
|
190
|
+
gm->objective_and_gradients(gm, &loss, w, gain);
|
|
191
|
+
|
|
192
|
+
sum_loss += loss;
|
|
193
|
+
++t;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/* Terminate when the loss is abnormal (NaN, -Inf, +Inf). */
|
|
197
|
+
if (!isfinite(loss)) {
|
|
198
|
+
logging(lg, "ERROR: overflow loss\n");
|
|
199
|
+
ret = CRFSUITEERR_OVERFLOW;
|
|
200
|
+
sum_loss = loss;
|
|
201
|
+
goto error_exit;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/* Scale the feature weights. */
|
|
205
|
+
vecscale(w, decay, K);
|
|
206
|
+
decay = 1.;
|
|
207
|
+
|
|
208
|
+
/* Include the L2 norm of feature weights to the objective. */
|
|
209
|
+
/* The factor N is necessary because lambda = 2 * C / N. */
|
|
210
|
+
norm2 = vecdot(w, w, K);
|
|
211
|
+
sum_loss += 0.5 * lambda * norm2 * N;
|
|
212
|
+
|
|
213
|
+
/* One epoch finished. */
|
|
214
|
+
if (!calibration) {
|
|
215
|
+
/* Check if the current epoch is the best. */
|
|
216
|
+
if (sum_loss < best_sum_loss) {
|
|
217
|
+
/* Store the feature weights to best_w. */
|
|
218
|
+
best_sum_loss = sum_loss;
|
|
219
|
+
veccopy(best_w, w, K);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/* We don't test the stopping criterion while period < epoch. */
|
|
223
|
+
if (period < epoch) {
|
|
224
|
+
improvement = (pf[(epoch-1) % period] - sum_loss) / sum_loss;
|
|
225
|
+
} else {
|
|
226
|
+
improvement = epsilon;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/* Store the current value of the objective function. */
|
|
230
|
+
pf[(epoch-1) % period] = sum_loss;
|
|
231
|
+
|
|
232
|
+
logging(lg, "Loss: %f\n", sum_loss);
|
|
233
|
+
if (period < epoch) {
|
|
234
|
+
logging(lg, "Improvement ratio: %f\n", improvement);
|
|
235
|
+
}
|
|
236
|
+
logging(lg, "Feature L2-norm: %f\n", sqrt(norm2));
|
|
237
|
+
logging(lg, "Learning rate (eta): %f\n", eta);
|
|
238
|
+
logging(lg, "Total number of feature updates: %.0f\n", t);
|
|
239
|
+
logging(lg, "Seconds required for this iteration: %.3f\n", (clock() - clk_prev) / (double)CLOCKS_PER_SEC);
|
|
240
|
+
|
|
241
|
+
/* Holdout evaluation if necessary. */
|
|
242
|
+
if (testset != NULL) {
|
|
243
|
+
holdout_evaluation(gm, testset, w, lg);
|
|
244
|
+
}
|
|
245
|
+
logging(lg, "\n");
|
|
246
|
+
|
|
247
|
+
/* Check for the stopping criterion. */
|
|
248
|
+
if (improvement < epsilon) {
|
|
249
|
+
ret = 0;
|
|
250
|
+
break;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/* Output the optimization result. */
|
|
256
|
+
if (!calibration) {
|
|
257
|
+
if (ret == 0) {
|
|
258
|
+
if (epoch < num_epochs) {
|
|
259
|
+
logging(lg, "SGD terminated with the stopping criteria\n");
|
|
260
|
+
} else {
|
|
261
|
+
logging(lg, "SGD terminated with the maximum number of iterations\n");
|
|
262
|
+
}
|
|
263
|
+
} else {
|
|
264
|
+
logging(lg, "SGD terminated with error code (%d)\n", ret);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/* Restore the best weights. */
|
|
269
|
+
if (best_w != NULL) {
|
|
270
|
+
sum_loss = best_sum_loss;
|
|
271
|
+
veccopy(w, best_w, K);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
error_exit:
|
|
275
|
+
free(best_w);
|
|
276
|
+
free(pf);
|
|
277
|
+
if (ptr_loss != NULL) {
|
|
278
|
+
*ptr_loss = sum_loss;
|
|
279
|
+
}
|
|
280
|
+
return ret;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
static floatval_t
|
|
284
|
+
l2sgd_calibration(
|
|
285
|
+
encoder_t *gm,
|
|
286
|
+
dataset_t *ds,
|
|
287
|
+
floatval_t *w,
|
|
288
|
+
logging_t *lg,
|
|
289
|
+
const training_option_t* opt
|
|
290
|
+
)
|
|
291
|
+
{
|
|
292
|
+
int i, s;
|
|
293
|
+
int dec = 0, ok, trials = 1;
|
|
294
|
+
int num = opt->calibration_candidates;
|
|
295
|
+
clock_t clk_begin = clock();
|
|
296
|
+
floatval_t loss = 0.;
|
|
297
|
+
floatval_t init_loss = 0.;
|
|
298
|
+
floatval_t best_loss = DBL_MAX;
|
|
299
|
+
floatval_t eta = opt->calibration_eta;
|
|
300
|
+
floatval_t best_eta = opt->calibration_eta;
|
|
301
|
+
const int N = ds->num_instances;
|
|
302
|
+
const int S = MIN(N, opt->calibration_samples);
|
|
303
|
+
const int K = gm->num_features;
|
|
304
|
+
const floatval_t init_eta = opt->calibration_eta;
|
|
305
|
+
const floatval_t rate = opt->calibration_rate;
|
|
306
|
+
const floatval_t lambda = opt->lambda;
|
|
307
|
+
|
|
308
|
+
logging(lg, "Calibrating the learning rate (eta)\n");
|
|
309
|
+
logging(lg, "calibration.eta: %f\n", eta);
|
|
310
|
+
logging(lg, "calibration.rate: %f\n", rate);
|
|
311
|
+
logging(lg, "calibration.samples: %d\n", S);
|
|
312
|
+
logging(lg, "calibration.candidates: %d\n", num);
|
|
313
|
+
logging(lg, "calibration.max_trials: %d\n", opt->calibration_max_trials);
|
|
314
|
+
|
|
315
|
+
/* Initialize a permutation that shuffles the instances. */
|
|
316
|
+
dataset_shuffle(ds);
|
|
317
|
+
|
|
318
|
+
/* Initialize feature weights as zero. */
|
|
319
|
+
vecset(w, 0, K);
|
|
320
|
+
|
|
321
|
+
/* Compute the initial loss. */
|
|
322
|
+
gm->set_weights(gm, w, 1.);
|
|
323
|
+
init_loss = 0;
|
|
324
|
+
for (i = 0;i < S;++i) {
|
|
325
|
+
floatval_t score;
|
|
326
|
+
const crfsuite_instance_t *inst = dataset_get(ds, i);
|
|
327
|
+
gm->set_instance(gm, inst);
|
|
328
|
+
gm->score(gm, inst->labels, &score);
|
|
329
|
+
init_loss -= score;
|
|
330
|
+
gm->partition_factor(gm, &score);
|
|
331
|
+
init_loss += score;
|
|
332
|
+
}
|
|
333
|
+
init_loss += 0.5 * lambda * vecdot(w, w, K) * N;
|
|
334
|
+
logging(lg, "Initial loss: %f\n", init_loss);
|
|
335
|
+
|
|
336
|
+
while (num > 0 || !dec) {
|
|
337
|
+
logging(lg, "Trial #%d (eta = %f): ", trials, eta);
|
|
338
|
+
|
|
339
|
+
/* Perform SGD for one epoch. */
|
|
340
|
+
l2sgd(
|
|
341
|
+
gm,
|
|
342
|
+
ds,
|
|
343
|
+
NULL,
|
|
344
|
+
w,
|
|
345
|
+
lg,
|
|
346
|
+
S, 1.0 / (lambda * eta), lambda, 1, 1, 1, 0., &loss);
|
|
347
|
+
|
|
348
|
+
/* Make sure that the learning rate decreases the log-likelihood. */
|
|
349
|
+
ok = isfinite(loss) && (loss < init_loss);
|
|
350
|
+
if (ok) {
|
|
351
|
+
logging(lg, "%f\n", loss);
|
|
352
|
+
--num;
|
|
353
|
+
} else {
|
|
354
|
+
logging(lg, "%f (worse)\n", loss);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
if (isfinite(loss) && loss < best_loss) {
|
|
358
|
+
best_loss = loss;
|
|
359
|
+
best_eta = eta;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
if (!dec) {
|
|
363
|
+
if (ok && 0 < num) {
|
|
364
|
+
eta *= rate;
|
|
365
|
+
} else {
|
|
366
|
+
dec = 1;
|
|
367
|
+
num = opt->calibration_candidates;
|
|
368
|
+
eta = init_eta / rate;
|
|
369
|
+
}
|
|
370
|
+
} else {
|
|
371
|
+
eta /= rate;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
++trials;
|
|
375
|
+
if (opt->calibration_max_trials <= trials) {
|
|
376
|
+
break;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
eta = best_eta;
|
|
381
|
+
logging(lg, "Best learning rate (eta): %f\n", eta);
|
|
382
|
+
logging(lg, "Seconds required: %.3f\n", (clock() - clk_begin) / (double)CLOCKS_PER_SEC);
|
|
383
|
+
logging(lg, "\n");
|
|
384
|
+
|
|
385
|
+
return 1.0 / (lambda * eta);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
int exchange_options(crfsuite_params_t* params, training_option_t* opt, int mode)
|
|
389
|
+
{
|
|
390
|
+
BEGIN_PARAM_MAP(params, mode)
|
|
391
|
+
DDX_PARAM_FLOAT(
|
|
392
|
+
"c2", opt->c2, 1.,
|
|
393
|
+
"Coefficient for L2 regularization."
|
|
394
|
+
)
|
|
395
|
+
DDX_PARAM_INT(
|
|
396
|
+
"max_iterations", opt->max_iterations, 1000,
|
|
397
|
+
"The maximum number of iterations (epochs) for SGD optimization."
|
|
398
|
+
)
|
|
399
|
+
DDX_PARAM_INT(
|
|
400
|
+
"period", opt->period, 10,
|
|
401
|
+
"The duration of iterations to test the stopping criterion."
|
|
402
|
+
)
|
|
403
|
+
DDX_PARAM_FLOAT(
|
|
404
|
+
"delta", opt->delta, 1e-6,
|
|
405
|
+
"The threshold for the stopping criterion; an optimization process stops when\n"
|
|
406
|
+
"the improvement of the log likelihood over the last ${period} iterations is no\n"
|
|
407
|
+
"greater than this threshold."
|
|
408
|
+
)
|
|
409
|
+
DDX_PARAM_FLOAT(
|
|
410
|
+
"calibration.eta", opt->calibration_eta, 0.1,
|
|
411
|
+
"The initial value of learning rate (eta) used for calibration."
|
|
412
|
+
)
|
|
413
|
+
DDX_PARAM_FLOAT(
|
|
414
|
+
"calibration.rate", opt->calibration_rate, 2.,
|
|
415
|
+
"The rate of increase/decrease of learning rate for calibration."
|
|
416
|
+
)
|
|
417
|
+
DDX_PARAM_INT(
|
|
418
|
+
"calibration.samples", opt->calibration_samples, 1000,
|
|
419
|
+
"The number of instances used for calibration."
|
|
420
|
+
)
|
|
421
|
+
DDX_PARAM_INT(
|
|
422
|
+
"calibration.candidates", opt->calibration_candidates, 10,
|
|
423
|
+
"The number of candidates of learning rate."
|
|
424
|
+
)
|
|
425
|
+
DDX_PARAM_INT(
|
|
426
|
+
"calibration.max_trials", opt->calibration_max_trials, 20,
|
|
427
|
+
"The maximum number of trials of learning rates for calibration."
|
|
428
|
+
)
|
|
429
|
+
END_PARAM_MAP()
|
|
430
|
+
|
|
431
|
+
return 0;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
void crfsuite_train_l2sgd_init(crfsuite_params_t* params)
|
|
435
|
+
{
|
|
436
|
+
exchange_options(params, NULL, 0);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
int crfsuite_train_l2sgd(
|
|
440
|
+
encoder_t *gm,
|
|
441
|
+
dataset_t *trainset,
|
|
442
|
+
dataset_t *testset,
|
|
443
|
+
crfsuite_params_t *params,
|
|
444
|
+
logging_t *lg,
|
|
445
|
+
floatval_t **ptr_w
|
|
446
|
+
)
|
|
447
|
+
{
|
|
448
|
+
int ret = 0;
|
|
449
|
+
floatval_t *w = NULL;
|
|
450
|
+
clock_t clk_begin;
|
|
451
|
+
floatval_t loss = 0;
|
|
452
|
+
const int N = trainset->num_instances;
|
|
453
|
+
const int K = gm->num_features;
|
|
454
|
+
const int T = gm->cap_items;
|
|
455
|
+
training_option_t opt;
|
|
456
|
+
|
|
457
|
+
/* Obtain parameter values. */
|
|
458
|
+
exchange_options(params, &opt, -1);
|
|
459
|
+
|
|
460
|
+
/* Allocate arrays. */
|
|
461
|
+
w = (floatval_t*)calloc(sizeof(floatval_t), K);
|
|
462
|
+
if (w == NULL) {
|
|
463
|
+
ret = CRFSUITEERR_OUTOFMEMORY;
|
|
464
|
+
goto error_exit;
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
opt.lambda = 2. * opt.c2 / N;
|
|
468
|
+
|
|
469
|
+
logging(lg, "Stochastic Gradient Descent (SGD)\n");
|
|
470
|
+
logging(lg, "c2: %f\n", opt.c2);
|
|
471
|
+
logging(lg, "max_iterations: %d\n", opt.max_iterations);
|
|
472
|
+
logging(lg, "period: %d\n", opt.period);
|
|
473
|
+
logging(lg, "delta: %f\n", opt.delta);
|
|
474
|
+
logging(lg, "\n");
|
|
475
|
+
clk_begin = clock();
|
|
476
|
+
|
|
477
|
+
/* Calibrate the training rate (eta). */
|
|
478
|
+
opt.t0 = l2sgd_calibration(gm, trainset, w, lg, &opt);
|
|
479
|
+
|
|
480
|
+
/* Perform stochastic gradient descent. */
|
|
481
|
+
ret = l2sgd(
|
|
482
|
+
gm,
|
|
483
|
+
trainset,
|
|
484
|
+
testset,
|
|
485
|
+
w,
|
|
486
|
+
lg,
|
|
487
|
+
N,
|
|
488
|
+
opt.t0,
|
|
489
|
+
opt.lambda,
|
|
490
|
+
opt.max_iterations,
|
|
491
|
+
0,
|
|
492
|
+
opt.period,
|
|
493
|
+
opt.delta,
|
|
494
|
+
&loss
|
|
495
|
+
);
|
|
496
|
+
|
|
497
|
+
logging(lg, "Loss: %f\n", loss);
|
|
498
|
+
logging(lg, "Total seconds required for training: %.3f\n", (clock() - clk_begin) / (double)CLOCKS_PER_SEC);
|
|
499
|
+
logging(lg, "\n");
|
|
500
|
+
|
|
501
|
+
*ptr_w = w;
|
|
502
|
+
return ret;
|
|
503
|
+
|
|
504
|
+
error_exit:
|
|
505
|
+
free(w);
|
|
506
|
+
return ret;
|
|
507
|
+
}
|