opener-opinion-detector-basic 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -0
  3. data/ext/hack/Rakefile +0 -2
  4. data/lib/opener/opinion_detector_basic/version.rb +1 -1
  5. data/opener-opinion-detector-basic.gemspec +0 -1
  6. data/task/compile.rake +1 -1
  7. data/task/requirements.rake +0 -1
  8. metadata +2 -142
  9. data/core/vendor/src/crfsuite/AUTHORS +0 -1
  10. data/core/vendor/src/crfsuite/COPYING +0 -27
  11. data/core/vendor/src/crfsuite/ChangeLog +0 -103
  12. data/core/vendor/src/crfsuite/INSTALL +0 -236
  13. data/core/vendor/src/crfsuite/Makefile.am +0 -19
  14. data/core/vendor/src/crfsuite/Makefile.in +0 -783
  15. data/core/vendor/src/crfsuite/README +0 -183
  16. data/core/vendor/src/crfsuite/aclocal.m4 +0 -9018
  17. data/core/vendor/src/crfsuite/autogen.sh +0 -38
  18. data/core/vendor/src/crfsuite/compile +0 -143
  19. data/core/vendor/src/crfsuite/config.guess +0 -1502
  20. data/core/vendor/src/crfsuite/config.h.in +0 -198
  21. data/core/vendor/src/crfsuite/config.sub +0 -1714
  22. data/core/vendor/src/crfsuite/configure +0 -14273
  23. data/core/vendor/src/crfsuite/configure.in +0 -149
  24. data/core/vendor/src/crfsuite/crfsuite.sln +0 -42
  25. data/core/vendor/src/crfsuite/depcomp +0 -630
  26. data/core/vendor/src/crfsuite/example/chunking.py +0 -49
  27. data/core/vendor/src/crfsuite/example/crfutils.py +0 -179
  28. data/core/vendor/src/crfsuite/example/ner.py +0 -270
  29. data/core/vendor/src/crfsuite/example/pos.py +0 -78
  30. data/core/vendor/src/crfsuite/example/template.py +0 -88
  31. data/core/vendor/src/crfsuite/frontend/Makefile.am +0 -29
  32. data/core/vendor/src/crfsuite/frontend/Makefile.in +0 -640
  33. data/core/vendor/src/crfsuite/frontend/dump.c +0 -116
  34. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +0 -129
  35. data/core/vendor/src/crfsuite/frontend/iwa.c +0 -273
  36. data/core/vendor/src/crfsuite/frontend/iwa.h +0 -65
  37. data/core/vendor/src/crfsuite/frontend/learn.c +0 -439
  38. data/core/vendor/src/crfsuite/frontend/main.c +0 -137
  39. data/core/vendor/src/crfsuite/frontend/option.c +0 -93
  40. data/core/vendor/src/crfsuite/frontend/option.h +0 -86
  41. data/core/vendor/src/crfsuite/frontend/readdata.h +0 -38
  42. data/core/vendor/src/crfsuite/frontend/reader.c +0 -136
  43. data/core/vendor/src/crfsuite/frontend/tag.c +0 -427
  44. data/core/vendor/src/crfsuite/genbinary.sh.in +0 -15
  45. data/core/vendor/src/crfsuite/include/Makefile.am +0 -11
  46. data/core/vendor/src/crfsuite/include/Makefile.in +0 -461
  47. data/core/vendor/src/crfsuite/include/crfsuite.h +0 -1063
  48. data/core/vendor/src/crfsuite/include/crfsuite.hpp +0 -555
  49. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +0 -400
  50. data/core/vendor/src/crfsuite/include/os.h +0 -61
  51. data/core/vendor/src/crfsuite/install-sh +0 -520
  52. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +0 -28
  53. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +0 -21
  54. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +0 -549
  55. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +0 -86
  56. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +0 -524
  57. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +0 -587
  58. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +0 -976
  59. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +0 -46
  60. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +0 -721
  61. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +0 -216
  62. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +0 -353
  63. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +0 -705
  64. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +0 -943
  65. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +0 -352
  66. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +0 -994
  67. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +0 -550
  68. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +0 -492
  69. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +0 -236
  70. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +0 -272
  71. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +0 -106
  72. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +0 -118
  73. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +0 -80
  74. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +0 -91
  75. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +0 -48
  76. data/core/vendor/src/crfsuite/lib/crf/src/params.c +0 -335
  77. data/core/vendor/src/crfsuite/lib/crf/src/params.h +0 -80
  78. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +0 -172
  79. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +0 -46
  80. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +0 -1107
  81. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +0 -160
  82. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +0 -408
  83. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +0 -242
  84. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +0 -507
  85. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +0 -338
  86. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +0 -435
  87. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +0 -341
  88. data/core/vendor/src/crfsuite/ltmain.sh +0 -8413
  89. data/core/vendor/src/crfsuite/missing +0 -376
  90. data/core/vendor/src/crfsuite/swig/Makefile.am +0 -13
  91. data/core/vendor/src/crfsuite/swig/Makefile.in +0 -365
  92. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +0 -2
  93. data/core/vendor/src/crfsuite/swig/export.i +0 -32
  94. data/core/vendor/src/crfsuite/swig/python/README +0 -92
  95. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +0 -329
  96. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +0 -14355
  97. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +0 -63
  98. data/core/vendor/src/crfsuite/swig/python/prepare.sh +0 -9
  99. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +0 -52
  100. data/core/vendor/src/crfsuite/swig/python/sample_train.py +0 -68
  101. data/core/vendor/src/crfsuite/swig/python/setup.py +0 -44
  102. data/core/vendor/src/crfsuite/win32/stdint.h +0 -679
  103. data/core/vendor/src/liblbfgs/AUTHORS +0 -1
  104. data/core/vendor/src/liblbfgs/COPYING +0 -22
  105. data/core/vendor/src/liblbfgs/ChangeLog +0 -120
  106. data/core/vendor/src/liblbfgs/INSTALL +0 -231
  107. data/core/vendor/src/liblbfgs/Makefile.am +0 -10
  108. data/core/vendor/src/liblbfgs/Makefile.in +0 -638
  109. data/core/vendor/src/liblbfgs/NEWS +0 -0
  110. data/core/vendor/src/liblbfgs/README +0 -71
  111. data/core/vendor/src/liblbfgs/aclocal.m4 +0 -6985
  112. data/core/vendor/src/liblbfgs/autogen.sh +0 -38
  113. data/core/vendor/src/liblbfgs/config.guess +0 -1411
  114. data/core/vendor/src/liblbfgs/config.h.in +0 -64
  115. data/core/vendor/src/liblbfgs/config.sub +0 -1500
  116. data/core/vendor/src/liblbfgs/configure +0 -21146
  117. data/core/vendor/src/liblbfgs/configure.in +0 -107
  118. data/core/vendor/src/liblbfgs/depcomp +0 -522
  119. data/core/vendor/src/liblbfgs/include/lbfgs.h +0 -745
  120. data/core/vendor/src/liblbfgs/install-sh +0 -322
  121. data/core/vendor/src/liblbfgs/lbfgs.sln +0 -26
  122. data/core/vendor/src/liblbfgs/lib/Makefile.am +0 -24
  123. data/core/vendor/src/liblbfgs/lib/Makefile.in +0 -499
  124. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +0 -133
  125. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +0 -294
  126. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +0 -298
  127. data/core/vendor/src/liblbfgs/lib/lbfgs.c +0 -1371
  128. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +0 -95
  129. data/core/vendor/src/liblbfgs/ltmain.sh +0 -6426
  130. data/core/vendor/src/liblbfgs/missing +0 -353
  131. data/core/vendor/src/liblbfgs/sample/Makefile.am +0 -15
  132. data/core/vendor/src/liblbfgs/sample/Makefile.in +0 -433
  133. data/core/vendor/src/liblbfgs/sample/sample.c +0 -81
  134. data/core/vendor/src/liblbfgs/sample/sample.cpp +0 -126
  135. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +0 -105
  136. data/core/vendor/src/svm_light/LICENSE.txt +0 -59
  137. data/core/vendor/src/svm_light/Makefile +0 -105
  138. data/core/vendor/src/svm_light/kernel.h +0 -40
  139. data/core/vendor/src/svm_light/svm_classify.c +0 -197
  140. data/core/vendor/src/svm_light/svm_common.c +0 -985
  141. data/core/vendor/src/svm_light/svm_common.h +0 -301
  142. data/core/vendor/src/svm_light/svm_hideo.c +0 -1062
  143. data/core/vendor/src/svm_light/svm_learn.c +0 -4147
  144. data/core/vendor/src/svm_light/svm_learn.h +0 -169
  145. data/core/vendor/src/svm_light/svm_learn_main.c +0 -397
  146. data/core/vendor/src/svm_light/svm_loqo.c +0 -211
  147. data/task/c.rake +0 -36
  148. data/task/submodules.rake +0 -5
@@ -1,160 +0,0 @@
1
- /*
2
- * RumAVL - Threaded AVL Tree Implementation
3
- *
4
- * Copyright (c) 2005-2007 Jesse Long <jpl@unknown.za.net>
5
- * All rights reserved.
6
- *
7
- * Permission is hereby granted, free of charge, to any person obtaining a
8
- * copy of this software and associated documentation files (the "Software"),
9
- * to deal in the Software without restriction, including without limitation
10
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11
- * and/or sell copies of the Software, and to permit persons to whom the
12
- * Software is furnished to do so, subject to the following conditions:
13
- *
14
- * 1. The above copyright notice and this permission notice shall be
15
- * included in all copies or substantial portions of the Software.
16
- * 2. The origin of the Software must not be misrepresented; you must not
17
- * claim that you wrote the original Software.
18
- * 3. Altered source versions of the Software must be plainly marked as
19
- * such, and must not be misrepresented as being the original Software.
20
- *
21
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27
- * DEALINGS IN THE SOFTWARE.
28
- */
29
-
30
- /*
31
- * Please see the `README' file, the documentation in the `doc' directory and
32
- * the `rumavl.c' source file for more information.
33
- */
34
-
35
- #ifndef RUMAVL_H
36
- #define RUMAVL_H 1
37
-
38
- #ifdef __cplusplus
39
- extern "C" {
40
- #endif
41
-
42
- #include <stddef.h> /* size_t */
43
-
44
-
45
-
46
-
47
- /*----------------------------------------------------------------------------
48
- * DATA TYPES
49
- *--------------------------------------------------------------------------*/
50
-
51
- /* Opaque context handle for the tree */
52
- typedef struct rumavl RUMAVL;
53
-
54
- /* Node type - used for iterating */
55
- typedef struct rumavl_node RUMAVL_NODE;
56
-
57
-
58
-
59
-
60
- /*----------------------------------------------------------------------------
61
- * FUNDEMENTAL FUNCTIONS
62
- *--------------------------------------------------------------------------*/
63
-
64
- /* Create a new RumAVL tree */
65
- RUMAVL *rumavl_new (size_t reclen,
66
- int (*cmp)(const void *, const void *, size_t, void *),
67
- void *(*alloc)(void *, size_t, void *),
68
- void *udata);
69
-
70
- /* Destroy a RumAVL tree */
71
- void rumavl_destroy (RUMAVL *tree);
72
-
73
- /* This function returns the size of each record in a tree */
74
- size_t rumavl_record_size (RUMAVL *tree);
75
-
76
- /* Get a pointer to the udata pointer */
77
- void **rumavl_udata (RUMAVL *tree);
78
-
79
- /* Insert a record into a tree, overwriting an existing record necessary */
80
- int rumavl_set (RUMAVL *tree, const void *record);
81
- /* Insert a record into a tree, never overwrites an existing record */
82
- int rumavl_insert (RUMAVL *tree, const void *record);
83
-
84
- /* Retrieve record from tree, or NULL */
85
- void *rumavl_find (RUMAVL *tree, const void *find);
86
-
87
- /* Remove record from tree */
88
- int rumavl_delete (RUMAVL *tree, const void *record);
89
-
90
-
91
-
92
-
93
- /*----------------------------------------------------------------------------
94
- * ITERATOR FUNCTIONS
95
- *--------------------------------------------------------------------------*/
96
-
97
- /* Get a pointer to the node containing a specific record */
98
- RUMAVL_NODE *rumavl_node_find (RUMAVL *tree, const void *find, void **record);
99
-
100
- /* Get the next node in sequence after a specific node, in a specific
101
- * direction, or get the first node on either end of a tree */
102
- RUMAVL_NODE *rumavl_node_next (RUMAVL *tree, RUMAVL_NODE *node, int dir,
103
- void **record);
104
- /* Possible directions */
105
- #define RUMAVL_DESC (-1)
106
- #define RUMAVL_ASC (+1)
107
-
108
- /* Get a record held by a specific node */
109
- void *rumavl_node_record (RUMAVL_NODE *node);
110
-
111
- /* Pass each record in a tree to a user defined callback function */
112
- extern int rumavl_foreach (RUMAVL *tree, int dir,
113
- int (*cbfn)(RUMAVL *, void *, void *), void *udata);
114
-
115
-
116
-
117
-
118
- /*----------------------------------------------------------------------------
119
- * CALLBACK FUNCTIONS
120
- *
121
- * Functions giving you more control over the actions of this library.
122
- *--------------------------------------------------------------------------*/
123
-
124
- int (**rumavl_owcb(RUMAVL *tree))(RUMAVL *, RUMAVL_NODE *, void *,
125
- const void *, void *);
126
- int (**rumavl_delcb(RUMAVL *tree))(RUMAVL *, RUMAVL_NODE *, void *, void *);
127
-
128
-
129
-
130
-
131
- /*----------------------------------------------------------------------------
132
- * MEMORY MANAGEMENT
133
- *
134
- * The rumavl_mem struct is used to define how a RUMAVL object allocates
135
- * and frees memory.
136
- *--------------------------------------------------------------------------*/
137
- void *(**rumavl_alloc(RUMAVL *tree))(void *ptr, size_t size, void *udata);
138
-
139
-
140
-
141
- /*----------------------------------------------------------------------------
142
- * ERROR CODES
143
- *
144
- * The functions returning int's will return these errors
145
- *--------------------------------------------------------------------------*/
146
-
147
- #define RUMAVL_ERR_INVAL (-1) /* Invalid argument */
148
- #define RUMAVL_ERR_NOMEM (-2) /* Insufficient memory */
149
- #define RUMAVL_ERR_NOENT (-3) /* Entry does not exist */
150
- #define RUMAVL_ERR_EORNG (-5) /* No nodes left in range */
151
- #define RUMAVL_ERR_EXIST (-6) /* Entry already exists */
152
-
153
- /* returns static string describing error number */
154
- extern const char *rumavl_strerror (int errno);
155
-
156
- #ifdef __cplusplus
157
- }
158
- #endif
159
-
160
- #endif /* ifndef RUMAVL_H */
@@ -1,408 +0,0 @@
1
- /*
2
- * Online training with Adaptive Regularization of Weights (AROW).
3
- *
4
- * Copyright (c) 2007-2010, Naoaki Okazaki
5
- * All rights reserved.
6
- *
7
- * Redistribution and use in source and binary forms, with or without
8
- * modification, are permitted provided that the following conditions are met:
9
- * * Redistributions of source code must retain the above copyright
10
- * notice, this list of conditions and the following disclaimer.
11
- * * Redistributions in binary form must reproduce the above copyright
12
- * notice, this list of conditions and the following disclaimer in the
13
- * documentation and/or other materials provided with the distribution.
14
- * * Neither the names of the authors nor the names of its contributors
15
- * may be used to endorse or promote products derived from this
16
- * software without specific prior written permission.
17
- *
18
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22
- * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
- */
30
-
31
- /* $Id$ */
32
-
33
- #ifdef HAVE_CONFIG_H
34
- #include <config.h>
35
- #endif/*HAVE_CONFIG_H*/
36
-
37
- #include <os.h>
38
-
39
- #include <stdio.h>
40
- #include <stdlib.h>
41
- #include <time.h>
42
-
43
- #include <crfsuite.h>
44
- #include "crfsuite_internal.h"
45
- #include "logging.h"
46
- #include "params.h"
47
- #include "vecmath.h"
48
-
49
- #define MIN(a, b) ((a) < (b) ? (a) : (b))
50
-
51
- /**
52
- * Training parameters (configurable with crfsuite_params_t interface).
53
- */
54
- typedef struct {
55
- floatval_t variance;
56
- floatval_t gamma;
57
- int max_iterations;
58
- floatval_t epsilon;
59
- } training_option_t;
60
-
61
- /**
62
- * Internal data structure for computing the sparse vector F(x, y) - F(x, y').
63
- */
64
- typedef struct {
65
- /* An array of feature indices relevant to the instance. */
66
- int *actives;
67
- int num_actives;
68
- int cap_actives;
69
- char *used;
70
-
71
- /* Coefficient for collecting feature weights. */
72
- floatval_t c;
73
- /* The difference vector [K]. */
74
- floatval_t *delta;
75
- /* The number of features. */
76
- int K;
77
- } delta_t;
78
-
79
- static int delta_init(delta_t *dc, const int K)
80
- {
81
- memset(dc, 0, sizeof(*dc));
82
- dc->used = (char*)calloc(K, sizeof(char));
83
- dc->delta = (floatval_t*)calloc(K, sizeof(floatval_t));
84
- dc->K = K;
85
- if (dc->delta == NULL || dc->used == NULL) {
86
- return 1;
87
- }
88
- return 0;
89
- }
90
-
91
- static void delta_finish(delta_t *dc)
92
- {
93
- free(dc->actives);
94
- free(dc->used);
95
- free(dc->delta);
96
- memset(dc, 0, sizeof(*dc));
97
- }
98
-
99
- static void delta_reset(delta_t *dc)
100
- {
101
- int i;
102
- for (i = 0;i < dc->num_actives;++i) {
103
- int k = dc->actives[i];
104
- dc->delta[k] = 0;
105
- }
106
- dc->num_actives = 0;
107
- }
108
-
109
- static void delta_collect(void *instance, int fid, floatval_t value)
110
- {
111
- delta_t *dc = (delta_t*)instance;
112
-
113
- /* Expand the active feature list if necessary. */
114
- if (dc->cap_actives <= dc->num_actives) {
115
- ++dc->cap_actives;
116
- dc->cap_actives *= 2;
117
- dc->actives = (int*)realloc(dc->actives, sizeof(int) * dc->cap_actives);
118
- }
119
-
120
- dc->actives[dc->num_actives++] = fid;
121
- dc->delta[fid] += dc->c * value;
122
- }
123
-
124
- static void delta_finalize(delta_t *dc)
125
- {
126
- int i, j = 0, k;
127
-
128
- /* Collapse the duplicated indices. */
129
- for (i = 0;i < dc->num_actives;++i) {
130
- k = dc->actives[i];
131
- if (!dc->used[k]) {
132
- dc->actives[j++] = k;
133
- dc->used[k] = 1;
134
- }
135
- }
136
- dc->num_actives = j; /* This is the distinct number of indices. */
137
-
138
- /* Reset the used flag. */
139
- for (i = 0;i < dc->num_actives;++i) {
140
- k = dc->actives[i];
141
- dc->used[k] = 0;
142
- }
143
- }
144
-
145
- static floatval_t delta_norm2(delta_t *dc)
146
- {
147
- int i;
148
- floatval_t norm2 = 0.;
149
-
150
- for (i = 0;i < dc->num_actives;++i) {
151
- int k = dc->actives[i];
152
- norm2 += dc->delta[k] * dc->delta[k];
153
- }
154
- return norm2;
155
- }
156
-
157
- static void delta_add(delta_t *dc, floatval_t *w, floatval_t *ws, const floatval_t tau, const floatval_t u)
158
- {
159
- int i;
160
- const floatval_t tauu = tau * u;
161
-
162
- for (i = 0;i < dc->num_actives;++i) {
163
- int k = dc->actives[i];
164
- w[k] += tau * dc->delta[k];
165
- ws[k] += tauu * dc->delta[k];
166
- }
167
- }
168
-
169
- static int diff(int *x, int *y, int n)
170
- {
171
- int i, d = 0;
172
- for (i = 0;i < n;++i) {
173
- if (x[i] != y[i]) {
174
- ++d;
175
- }
176
- }
177
- return d;
178
- }
179
-
180
- static floatval_t cost_insensitive(floatval_t err, floatval_t d)
181
- {
182
- return err + 1.;
183
- }
184
-
185
- static floatval_t cost_sensitive(floatval_t err, floatval_t d)
186
- {
187
- return err + sqrt(d);
188
- }
189
-
190
- static floatval_t tau0(floatval_t cost, floatval_t norm, floatval_t c)
191
- {
192
- return cost / norm;
193
- }
194
-
195
- static floatval_t tau1(floatval_t cost, floatval_t norm, floatval_t c)
196
- {
197
- return MIN(c, cost / norm);
198
- }
199
-
200
- static floatval_t tau2(floatval_t cost, floatval_t norm, floatval_t c)
201
- {
202
- return cost / (norm + 0.5 / c);
203
- }
204
-
205
- static int exchange_options(crfsuite_params_t* params, training_option_t* opt, int mode)
206
- {
207
- BEGIN_PARAM_MAP(params, mode)
208
- DDX_PARAM_FLOAT(
209
- "variance", opt->variance, 1.,
210
- "The initial variance of every feature weight."
211
- )
212
- DDX_PARAM_FLOAT(
213
- "gamma", opt->gamma, 1.,
214
- "Tradeoff parameter."
215
- )
216
- DDX_PARAM_INT(
217
- "max_iterations", opt->max_iterations, 100,
218
- "The maximum number of iterations."
219
- )
220
- DDX_PARAM_FLOAT(
221
- "epsilon", opt->epsilon, 0.,
222
- "The stopping criterion (the mean loss)."
223
- )
224
- END_PARAM_MAP()
225
-
226
- return 0;
227
- }
228
-
229
- void crfsuite_train_arow_init(crfsuite_params_t* params)
230
- {
231
- exchange_options(params, NULL, 0);
232
- }
233
-
234
- int crfsuite_train_arow(
235
- encoder_t *gm,
236
- dataset_t *trainset,
237
- dataset_t *testset,
238
- crfsuite_params_t *params,
239
- logging_t *lg,
240
- floatval_t **ptr_w
241
- )
242
- {
243
- int n, i, j, k, ret = 0;
244
- int *viterbi = NULL;
245
- floatval_t beta;
246
- floatval_t *mean = NULL, *cov = NULL, *prod = NULL;
247
- const int N = trainset->num_instances;
248
- const int K = gm->num_features;
249
- const int T = gm->cap_items;
250
- training_option_t opt;
251
- delta_t dc;
252
- clock_t begin = clock();
253
-
254
- /* Initialize the variable. */
255
- if (delta_init(&dc, K) != 0) {
256
- ret = CRFSUITEERR_OUTOFMEMORY;
257
- goto error_exit;
258
- }
259
-
260
- /* Obtain parameter values. */
261
- exchange_options(params, &opt, -1);
262
-
263
- /* Allocate arrays. */
264
- mean = (floatval_t*)calloc(sizeof(floatval_t), K);
265
- cov = (floatval_t*)calloc(sizeof(floatval_t), K);
266
- prod = (floatval_t*)calloc(sizeof(floatval_t), K);
267
- viterbi = (int*)calloc(sizeof(int), T);
268
- if (mean == NULL || cov == NULL || prod == NULL || viterbi == NULL) {
269
- ret = CRFSUITEERR_OUTOFMEMORY;
270
- goto error_exit;
271
- }
272
-
273
- /* Initialize the covariance vector (diagnal matrix). */
274
- vecset(cov, opt.variance, K);
275
-
276
- /* Show the parameters. */
277
- logging(lg, "Adaptive Regularization of Weights (AROW)\n");
278
- logging(lg, "variance: %f\n", opt.variance);
279
- logging(lg, "gamma: %f\n", opt.gamma);
280
- logging(lg, "max_iterations: %d\n", opt.max_iterations);
281
- logging(lg, "epsilon: %f\n", opt.epsilon);
282
- logging(lg, "\n");
283
-
284
- beta = 1.0 / opt.gamma;
285
-
286
- /* Loop for epoch. */
287
- for (i = 0;i < opt.max_iterations;++i) {
288
- floatval_t norm = 0., sum_loss = 0.;
289
- clock_t iteration_begin = clock();
290
-
291
- /* Shuffle the instances. */
292
- dataset_shuffle(trainset);
293
-
294
- /* Loop for each instance. */
295
- for (n = 0;n < N;++n) {
296
- int d = 0;
297
- floatval_t sv;
298
- const crfsuite_instance_t *inst = dataset_get(trainset, n);
299
-
300
- /* Set the feature weights to the encoder. */
301
- gm->set_weights(gm, mean, 1.);
302
- gm->set_instance(gm, inst);
303
-
304
- /* Tag the sequence with the current model. */
305
- gm->viterbi(gm, viterbi, &sv);
306
-
307
- /* Compute the number of different labels. */
308
- d = diff(inst->labels, viterbi, inst->num_items);
309
- if (0 < d) {
310
- floatval_t alpha, frac;
311
- floatval_t sc, norm2;
312
- floatval_t tau, cost;
313
-
314
- /*
315
- Compute the cost of this instance.
316
- */
317
- gm->score(gm, inst->labels, &sc);
318
- cost = sv - sc + (double)d;
319
-
320
- /* Initialize delta[k] = 0. */
321
- delta_reset(&dc);
322
-
323
- /*
324
- For every feature k on the correct path:
325
- delta[k] += 1;
326
- */
327
- dc.c = 1;
328
- gm->features_on_path(gm, inst, inst->labels, delta_collect, &dc);
329
-
330
- /*
331
- For every feature k on the Viterbi path:
332
- delta[k] -= 1;
333
- */
334
- dc.c = -1;
335
- gm->features_on_path(gm, inst, viterbi, delta_collect, &dc);
336
-
337
- delta_finalize(&dc);
338
-
339
- /* Compute prod[k] = delta[k] * delta[k]. */
340
- for (j = 0;j < dc.num_actives;++j) {
341
- k = dc.actives[j];
342
- prod[k] = dc.delta[k] * dc.delta[k];
343
- }
344
-
345
- /*
346
- Compute alpha.
347
- */
348
- frac = opt.gamma;
349
- for (j = 0;j < dc.num_actives;++j) {
350
- k = dc.actives[j];
351
- frac += prod[k] * cov[k];
352
- }
353
- alpha = cost / frac;
354
-
355
- /*
356
- Update.
357
- */
358
- for (j = 0;j < dc.num_actives;++j) {
359
- k = dc.actives[j];
360
- mean[k] += alpha * cov[k] * dc.delta[k];
361
- cov[k] = 1.0 / ((1.0 / cov[k]) + prod[k] / opt.gamma);
362
- }
363
-
364
- sum_loss += cost;
365
- }
366
- }
367
-
368
- /* Output the progress. */
369
- logging(lg, "***** Iteration #%d *****\n", i+1);
370
- logging(lg, "Loss: %f\n", sum_loss);
371
- logging(lg, "Feature norm: %f\n", sqrt(vecdot(mean, mean, K)));
372
- logging(lg, "Seconds required for this iteration: %.3f\n", (clock() - iteration_begin) / (double)CLOCKS_PER_SEC);
373
-
374
- /* Holdout evaluation if necessary. */
375
- if (testset != NULL) {
376
- holdout_evaluation(gm, testset, mean, lg);
377
- }
378
-
379
- logging(lg, "\n");
380
-
381
- /* Convergence test. */
382
- if (sum_loss / N <= opt.epsilon) {
383
- logging(lg, "Terminated with the stopping criterion\n");
384
- logging(lg, "\n");
385
- break;
386
- }
387
- }
388
-
389
- logging(lg, "Total seconds required for training: %.3f\n", (clock() - begin) / (double)CLOCKS_PER_SEC);
390
- logging(lg, "\n");
391
-
392
- free(viterbi);
393
- free(prod);
394
- free(cov);
395
- *ptr_w = mean;
396
- delta_finish(&dc);
397
- return ret;
398
-
399
- error_exit:
400
- free(viterbi);
401
- free(prod);
402
- free(cov);
403
- free(mean);
404
- *ptr_w = NULL;
405
- delta_finish(&dc);
406
-
407
- return ret;
408
- }