opener-opinion-detector-basic 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -0
  3. data/ext/hack/Rakefile +0 -2
  4. data/lib/opener/opinion_detector_basic/version.rb +1 -1
  5. data/opener-opinion-detector-basic.gemspec +0 -1
  6. data/task/compile.rake +1 -1
  7. data/task/requirements.rake +0 -1
  8. metadata +2 -142
  9. data/core/vendor/src/crfsuite/AUTHORS +0 -1
  10. data/core/vendor/src/crfsuite/COPYING +0 -27
  11. data/core/vendor/src/crfsuite/ChangeLog +0 -103
  12. data/core/vendor/src/crfsuite/INSTALL +0 -236
  13. data/core/vendor/src/crfsuite/Makefile.am +0 -19
  14. data/core/vendor/src/crfsuite/Makefile.in +0 -783
  15. data/core/vendor/src/crfsuite/README +0 -183
  16. data/core/vendor/src/crfsuite/aclocal.m4 +0 -9018
  17. data/core/vendor/src/crfsuite/autogen.sh +0 -38
  18. data/core/vendor/src/crfsuite/compile +0 -143
  19. data/core/vendor/src/crfsuite/config.guess +0 -1502
  20. data/core/vendor/src/crfsuite/config.h.in +0 -198
  21. data/core/vendor/src/crfsuite/config.sub +0 -1714
  22. data/core/vendor/src/crfsuite/configure +0 -14273
  23. data/core/vendor/src/crfsuite/configure.in +0 -149
  24. data/core/vendor/src/crfsuite/crfsuite.sln +0 -42
  25. data/core/vendor/src/crfsuite/depcomp +0 -630
  26. data/core/vendor/src/crfsuite/example/chunking.py +0 -49
  27. data/core/vendor/src/crfsuite/example/crfutils.py +0 -179
  28. data/core/vendor/src/crfsuite/example/ner.py +0 -270
  29. data/core/vendor/src/crfsuite/example/pos.py +0 -78
  30. data/core/vendor/src/crfsuite/example/template.py +0 -88
  31. data/core/vendor/src/crfsuite/frontend/Makefile.am +0 -29
  32. data/core/vendor/src/crfsuite/frontend/Makefile.in +0 -640
  33. data/core/vendor/src/crfsuite/frontend/dump.c +0 -116
  34. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +0 -129
  35. data/core/vendor/src/crfsuite/frontend/iwa.c +0 -273
  36. data/core/vendor/src/crfsuite/frontend/iwa.h +0 -65
  37. data/core/vendor/src/crfsuite/frontend/learn.c +0 -439
  38. data/core/vendor/src/crfsuite/frontend/main.c +0 -137
  39. data/core/vendor/src/crfsuite/frontend/option.c +0 -93
  40. data/core/vendor/src/crfsuite/frontend/option.h +0 -86
  41. data/core/vendor/src/crfsuite/frontend/readdata.h +0 -38
  42. data/core/vendor/src/crfsuite/frontend/reader.c +0 -136
  43. data/core/vendor/src/crfsuite/frontend/tag.c +0 -427
  44. data/core/vendor/src/crfsuite/genbinary.sh.in +0 -15
  45. data/core/vendor/src/crfsuite/include/Makefile.am +0 -11
  46. data/core/vendor/src/crfsuite/include/Makefile.in +0 -461
  47. data/core/vendor/src/crfsuite/include/crfsuite.h +0 -1063
  48. data/core/vendor/src/crfsuite/include/crfsuite.hpp +0 -555
  49. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +0 -400
  50. data/core/vendor/src/crfsuite/include/os.h +0 -61
  51. data/core/vendor/src/crfsuite/install-sh +0 -520
  52. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +0 -28
  53. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +0 -21
  54. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +0 -549
  55. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +0 -86
  56. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +0 -524
  57. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +0 -587
  58. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +0 -976
  59. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +0 -46
  60. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +0 -721
  61. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +0 -216
  62. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +0 -353
  63. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +0 -705
  64. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +0 -943
  65. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +0 -352
  66. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +0 -994
  67. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +0 -550
  68. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +0 -492
  69. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +0 -236
  70. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +0 -272
  71. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +0 -106
  72. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +0 -118
  73. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +0 -80
  74. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +0 -91
  75. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +0 -48
  76. data/core/vendor/src/crfsuite/lib/crf/src/params.c +0 -335
  77. data/core/vendor/src/crfsuite/lib/crf/src/params.h +0 -80
  78. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +0 -172
  79. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +0 -46
  80. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +0 -1107
  81. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +0 -160
  82. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +0 -408
  83. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +0 -242
  84. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +0 -507
  85. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +0 -338
  86. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +0 -435
  87. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +0 -341
  88. data/core/vendor/src/crfsuite/ltmain.sh +0 -8413
  89. data/core/vendor/src/crfsuite/missing +0 -376
  90. data/core/vendor/src/crfsuite/swig/Makefile.am +0 -13
  91. data/core/vendor/src/crfsuite/swig/Makefile.in +0 -365
  92. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +0 -2
  93. data/core/vendor/src/crfsuite/swig/export.i +0 -32
  94. data/core/vendor/src/crfsuite/swig/python/README +0 -92
  95. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +0 -329
  96. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +0 -14355
  97. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +0 -63
  98. data/core/vendor/src/crfsuite/swig/python/prepare.sh +0 -9
  99. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +0 -52
  100. data/core/vendor/src/crfsuite/swig/python/sample_train.py +0 -68
  101. data/core/vendor/src/crfsuite/swig/python/setup.py +0 -44
  102. data/core/vendor/src/crfsuite/win32/stdint.h +0 -679
  103. data/core/vendor/src/liblbfgs/AUTHORS +0 -1
  104. data/core/vendor/src/liblbfgs/COPYING +0 -22
  105. data/core/vendor/src/liblbfgs/ChangeLog +0 -120
  106. data/core/vendor/src/liblbfgs/INSTALL +0 -231
  107. data/core/vendor/src/liblbfgs/Makefile.am +0 -10
  108. data/core/vendor/src/liblbfgs/Makefile.in +0 -638
  109. data/core/vendor/src/liblbfgs/NEWS +0 -0
  110. data/core/vendor/src/liblbfgs/README +0 -71
  111. data/core/vendor/src/liblbfgs/aclocal.m4 +0 -6985
  112. data/core/vendor/src/liblbfgs/autogen.sh +0 -38
  113. data/core/vendor/src/liblbfgs/config.guess +0 -1411
  114. data/core/vendor/src/liblbfgs/config.h.in +0 -64
  115. data/core/vendor/src/liblbfgs/config.sub +0 -1500
  116. data/core/vendor/src/liblbfgs/configure +0 -21146
  117. data/core/vendor/src/liblbfgs/configure.in +0 -107
  118. data/core/vendor/src/liblbfgs/depcomp +0 -522
  119. data/core/vendor/src/liblbfgs/include/lbfgs.h +0 -745
  120. data/core/vendor/src/liblbfgs/install-sh +0 -322
  121. data/core/vendor/src/liblbfgs/lbfgs.sln +0 -26
  122. data/core/vendor/src/liblbfgs/lib/Makefile.am +0 -24
  123. data/core/vendor/src/liblbfgs/lib/Makefile.in +0 -499
  124. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +0 -133
  125. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +0 -294
  126. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +0 -298
  127. data/core/vendor/src/liblbfgs/lib/lbfgs.c +0 -1371
  128. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +0 -95
  129. data/core/vendor/src/liblbfgs/ltmain.sh +0 -6426
  130. data/core/vendor/src/liblbfgs/missing +0 -353
  131. data/core/vendor/src/liblbfgs/sample/Makefile.am +0 -15
  132. data/core/vendor/src/liblbfgs/sample/Makefile.in +0 -433
  133. data/core/vendor/src/liblbfgs/sample/sample.c +0 -81
  134. data/core/vendor/src/liblbfgs/sample/sample.cpp +0 -126
  135. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +0 -105
  136. data/core/vendor/src/svm_light/LICENSE.txt +0 -59
  137. data/core/vendor/src/svm_light/Makefile +0 -105
  138. data/core/vendor/src/svm_light/kernel.h +0 -40
  139. data/core/vendor/src/svm_light/svm_classify.c +0 -197
  140. data/core/vendor/src/svm_light/svm_common.c +0 -985
  141. data/core/vendor/src/svm_light/svm_common.h +0 -301
  142. data/core/vendor/src/svm_light/svm_hideo.c +0 -1062
  143. data/core/vendor/src/svm_light/svm_learn.c +0 -4147
  144. data/core/vendor/src/svm_light/svm_learn.h +0 -169
  145. data/core/vendor/src/svm_light/svm_learn_main.c +0 -397
  146. data/core/vendor/src/svm_light/svm_loqo.c +0 -211
  147. data/task/c.rake +0 -36
  148. data/task/submodules.rake +0 -5
@@ -1,242 +0,0 @@
1
- /*
2
- * Online training with averaged perceptron.
3
- *
4
- * Copyright (c) 2007-2010, Naoaki Okazaki
5
- * All rights reserved.
6
- *
7
- * Redistribution and use in source and binary forms, with or without
8
- * modification, are permitted provided that the following conditions are met:
9
- * * Redistributions of source code must retain the above copyright
10
- * notice, this list of conditions and the following disclaimer.
11
- * * Redistributions in binary form must reproduce the above copyright
12
- * notice, this list of conditions and the following disclaimer in the
13
- * documentation and/or other materials provided with the distribution.
14
- * * Neither the names of the authors nor the names of its contributors
15
- * may be used to endorse or promote products derived from this
16
- * software without specific prior written permission.
17
- *
18
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22
- * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
- */
30
-
31
- /* $Id$ */
32
-
33
- #ifdef HAVE_CONFIG_H
34
- #include <config.h>
35
- #endif/*HAVE_CONFIG_H*/
36
-
37
- #include <os.h>
38
-
39
- #include <stdio.h>
40
- #include <stdlib.h>
41
- #include <time.h>
42
-
43
- #include <crfsuite.h>
44
- #include "crfsuite_internal.h"
45
- #include "logging.h"
46
- #include "params.h"
47
- #include "vecmath.h"
48
-
49
- /**
50
- * Training parameters (configurable with crfsuite_params_t interface).
51
- */
52
- typedef struct {
53
- int max_iterations;
54
- floatval_t epsilon;
55
- } training_option_t;
56
-
57
- /**
58
- * Internal data structure for updating (averaging) feature weights.
59
- */
60
- typedef struct {
61
- floatval_t *w;
62
- floatval_t *ws;
63
- floatval_t c;
64
- floatval_t cs;
65
- } update_data;
66
-
67
- static void update_weights(void *instance, int fid, floatval_t value)
68
- {
69
- update_data *ud = (update_data*)instance;
70
- ud->w[fid] += ud->c * value;
71
- ud->ws[fid] += ud->cs * value;
72
- }
73
-
74
- static int diff(int *x, int *y, int n)
75
- {
76
- int i, d = 0;
77
- for (i = 0;i < n;++i) {
78
- if (x[i] != y[i]) {
79
- ++d;
80
- }
81
- }
82
- return d;
83
- }
84
-
85
- static int exchange_options(crfsuite_params_t* params, training_option_t* opt, int mode)
86
- {
87
- BEGIN_PARAM_MAP(params, mode)
88
- DDX_PARAM_INT(
89
- "max_iterations", opt->max_iterations, 100,
90
- "The maximum number of iterations."
91
- )
92
- DDX_PARAM_FLOAT(
93
- "epsilon", opt->epsilon, 0.,
94
- "The stopping criterion (the ratio of incorrect label predictions)."
95
- )
96
- END_PARAM_MAP()
97
-
98
- return 0;
99
- }
100
-
101
- void crfsuite_train_averaged_perceptron_init(crfsuite_params_t* params)
102
- {
103
- exchange_options(params, NULL, 0);
104
- }
105
-
106
- int crfsuite_train_averaged_perceptron(
107
- encoder_t *gm,
108
- dataset_t *trainset,
109
- dataset_t *testset,
110
- crfsuite_params_t *params,
111
- logging_t *lg,
112
- floatval_t **ptr_w
113
- )
114
- {
115
- int n, i, c, ret = 0;
116
- int *viterbi = NULL;
117
- floatval_t *w = NULL;
118
- floatval_t *ws = NULL;
119
- floatval_t *wa = NULL;
120
- const int N = trainset->num_instances;
121
- const int K = gm->num_features;
122
- const int T = gm->cap_items;
123
- training_option_t opt;
124
- update_data ud;
125
- clock_t begin = clock();
126
-
127
- /* Initialize the variable. */
128
- memset(&ud, 0, sizeof(ud));
129
-
130
- /* Obtain parameter values. */
131
- exchange_options(params, &opt, -1);
132
-
133
- /* Allocate arrays. */
134
- w = (floatval_t*)calloc(sizeof(floatval_t), K);
135
- ws = (floatval_t*)calloc(sizeof(floatval_t), K);
136
- wa = (floatval_t*)calloc(sizeof(floatval_t), K);
137
- viterbi = (int*)calloc(sizeof(int), T);
138
- if (w == NULL || ws == NULL || wa == NULL || viterbi == NULL) {
139
- ret = CRFSUITEERR_OUTOFMEMORY;
140
- goto error_exit;
141
- }
142
-
143
- /* Show the parameters. */
144
- logging(lg, "Averaged perceptron\n");
145
- logging(lg, "max_iterations: %d\n", opt.max_iterations);
146
- logging(lg, "epsilon: %f\n", opt.epsilon);
147
- logging(lg, "\n");
148
-
149
- c = 1;
150
- ud.w = w;
151
- ud.ws = ws;
152
-
153
- /* Loop for epoch. */
154
- for (i = 0;i < opt.max_iterations;++i) {
155
- floatval_t norm = 0., loss = 0.;
156
- clock_t iteration_begin = clock();
157
-
158
- /* Shuffle the instances. */
159
- dataset_shuffle(trainset);
160
-
161
- /* Loop for each instance. */
162
- for (n = 0;n < N;++n) {
163
- int d = 0;
164
- floatval_t score;
165
- const crfsuite_instance_t *inst = dataset_get(trainset, n);
166
-
167
- /* Set the feature weights to the encoder. */
168
- gm->set_weights(gm, w, 1.);
169
- gm->set_instance(gm, inst);
170
-
171
- /* Tag the sequence with the current model. */
172
- gm->viterbi(gm, viterbi, &score);
173
-
174
- /* Compute the number of different labels. */
175
- d = diff(inst->labels, viterbi, inst->num_items);
176
- if (0 < d) {
177
- /*
178
- For every feature k on the correct path:
179
- w[k] += 1; ws[k] += c;
180
- */
181
- ud.c = 1;
182
- ud.cs = c;
183
- gm->features_on_path(gm, inst, inst->labels, update_weights, &ud);
184
-
185
- /*
186
- For every feature k on the Viterbi path:
187
- w[k] -= 1; ws[k] -= c;
188
- */
189
- ud.c = -1;
190
- ud.cs = -c;
191
- gm->features_on_path(gm, inst, viterbi, update_weights, &ud);
192
-
193
- /* We define the loss as the ratio of wrongly predicted labels. */
194
- loss += d / (floatval_t)inst->num_items;
195
- }
196
-
197
- ++c;
198
- }
199
-
200
- /* Perform averaging to wa. */
201
- veccopy(wa, w, K);
202
- vecasub(wa, 1./c, ws, K);
203
-
204
- /* Output the progress. */
205
- logging(lg, "***** Iteration #%d *****\n", i+1);
206
- logging(lg, "Loss: %f\n", loss);
207
- logging(lg, "Feature norm: %f\n", sqrt(vecdot(wa, wa, K)));
208
- logging(lg, "Seconds required for this iteration: %.3f\n", (clock() - iteration_begin) / (double)CLOCKS_PER_SEC);
209
-
210
- /* Holdout evaluation if necessary. */
211
- if (testset != NULL) {
212
- holdout_evaluation(gm, testset, wa, lg);
213
- }
214
-
215
- logging(lg, "\n");
216
-
217
- /* Convergence test. */
218
- if (loss / N < opt.epsilon) {
219
- logging(lg, "Terminated with the stopping criterion\n");
220
- logging(lg, "\n");
221
- break;
222
- }
223
- }
224
-
225
- logging(lg, "Total seconds required for training: %.3f\n", (clock() - begin) / (double)CLOCKS_PER_SEC);
226
- logging(lg, "\n");
227
-
228
- free(viterbi);
229
- free(ws);
230
- free(w);
231
- *ptr_w = wa;
232
- return ret;
233
-
234
- error_exit:
235
- free(viterbi);
236
- free(wa);
237
- free(ws);
238
- free(w);
239
- *ptr_w = NULL;
240
-
241
- return ret;
242
- }