opener-opinion-detector-basic 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -0
  3. data/ext/hack/Rakefile +0 -2
  4. data/lib/opener/opinion_detector_basic/version.rb +1 -1
  5. data/opener-opinion-detector-basic.gemspec +0 -1
  6. data/task/compile.rake +1 -1
  7. data/task/requirements.rake +0 -1
  8. metadata +2 -142
  9. data/core/vendor/src/crfsuite/AUTHORS +0 -1
  10. data/core/vendor/src/crfsuite/COPYING +0 -27
  11. data/core/vendor/src/crfsuite/ChangeLog +0 -103
  12. data/core/vendor/src/crfsuite/INSTALL +0 -236
  13. data/core/vendor/src/crfsuite/Makefile.am +0 -19
  14. data/core/vendor/src/crfsuite/Makefile.in +0 -783
  15. data/core/vendor/src/crfsuite/README +0 -183
  16. data/core/vendor/src/crfsuite/aclocal.m4 +0 -9018
  17. data/core/vendor/src/crfsuite/autogen.sh +0 -38
  18. data/core/vendor/src/crfsuite/compile +0 -143
  19. data/core/vendor/src/crfsuite/config.guess +0 -1502
  20. data/core/vendor/src/crfsuite/config.h.in +0 -198
  21. data/core/vendor/src/crfsuite/config.sub +0 -1714
  22. data/core/vendor/src/crfsuite/configure +0 -14273
  23. data/core/vendor/src/crfsuite/configure.in +0 -149
  24. data/core/vendor/src/crfsuite/crfsuite.sln +0 -42
  25. data/core/vendor/src/crfsuite/depcomp +0 -630
  26. data/core/vendor/src/crfsuite/example/chunking.py +0 -49
  27. data/core/vendor/src/crfsuite/example/crfutils.py +0 -179
  28. data/core/vendor/src/crfsuite/example/ner.py +0 -270
  29. data/core/vendor/src/crfsuite/example/pos.py +0 -78
  30. data/core/vendor/src/crfsuite/example/template.py +0 -88
  31. data/core/vendor/src/crfsuite/frontend/Makefile.am +0 -29
  32. data/core/vendor/src/crfsuite/frontend/Makefile.in +0 -640
  33. data/core/vendor/src/crfsuite/frontend/dump.c +0 -116
  34. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +0 -129
  35. data/core/vendor/src/crfsuite/frontend/iwa.c +0 -273
  36. data/core/vendor/src/crfsuite/frontend/iwa.h +0 -65
  37. data/core/vendor/src/crfsuite/frontend/learn.c +0 -439
  38. data/core/vendor/src/crfsuite/frontend/main.c +0 -137
  39. data/core/vendor/src/crfsuite/frontend/option.c +0 -93
  40. data/core/vendor/src/crfsuite/frontend/option.h +0 -86
  41. data/core/vendor/src/crfsuite/frontend/readdata.h +0 -38
  42. data/core/vendor/src/crfsuite/frontend/reader.c +0 -136
  43. data/core/vendor/src/crfsuite/frontend/tag.c +0 -427
  44. data/core/vendor/src/crfsuite/genbinary.sh.in +0 -15
  45. data/core/vendor/src/crfsuite/include/Makefile.am +0 -11
  46. data/core/vendor/src/crfsuite/include/Makefile.in +0 -461
  47. data/core/vendor/src/crfsuite/include/crfsuite.h +0 -1063
  48. data/core/vendor/src/crfsuite/include/crfsuite.hpp +0 -555
  49. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +0 -400
  50. data/core/vendor/src/crfsuite/include/os.h +0 -61
  51. data/core/vendor/src/crfsuite/install-sh +0 -520
  52. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +0 -28
  53. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +0 -21
  54. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +0 -549
  55. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +0 -86
  56. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +0 -524
  57. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +0 -587
  58. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +0 -976
  59. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +0 -46
  60. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +0 -721
  61. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +0 -216
  62. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +0 -353
  63. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +0 -705
  64. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +0 -943
  65. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +0 -352
  66. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +0 -994
  67. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +0 -550
  68. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +0 -492
  69. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +0 -236
  70. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +0 -272
  71. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +0 -106
  72. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +0 -118
  73. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +0 -80
  74. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +0 -91
  75. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +0 -48
  76. data/core/vendor/src/crfsuite/lib/crf/src/params.c +0 -335
  77. data/core/vendor/src/crfsuite/lib/crf/src/params.h +0 -80
  78. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +0 -172
  79. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +0 -46
  80. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +0 -1107
  81. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +0 -160
  82. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +0 -408
  83. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +0 -242
  84. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +0 -507
  85. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +0 -338
  86. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +0 -435
  87. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +0 -341
  88. data/core/vendor/src/crfsuite/ltmain.sh +0 -8413
  89. data/core/vendor/src/crfsuite/missing +0 -376
  90. data/core/vendor/src/crfsuite/swig/Makefile.am +0 -13
  91. data/core/vendor/src/crfsuite/swig/Makefile.in +0 -365
  92. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +0 -2
  93. data/core/vendor/src/crfsuite/swig/export.i +0 -32
  94. data/core/vendor/src/crfsuite/swig/python/README +0 -92
  95. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +0 -329
  96. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +0 -14355
  97. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +0 -63
  98. data/core/vendor/src/crfsuite/swig/python/prepare.sh +0 -9
  99. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +0 -52
  100. data/core/vendor/src/crfsuite/swig/python/sample_train.py +0 -68
  101. data/core/vendor/src/crfsuite/swig/python/setup.py +0 -44
  102. data/core/vendor/src/crfsuite/win32/stdint.h +0 -679
  103. data/core/vendor/src/liblbfgs/AUTHORS +0 -1
  104. data/core/vendor/src/liblbfgs/COPYING +0 -22
  105. data/core/vendor/src/liblbfgs/ChangeLog +0 -120
  106. data/core/vendor/src/liblbfgs/INSTALL +0 -231
  107. data/core/vendor/src/liblbfgs/Makefile.am +0 -10
  108. data/core/vendor/src/liblbfgs/Makefile.in +0 -638
  109. data/core/vendor/src/liblbfgs/NEWS +0 -0
  110. data/core/vendor/src/liblbfgs/README +0 -71
  111. data/core/vendor/src/liblbfgs/aclocal.m4 +0 -6985
  112. data/core/vendor/src/liblbfgs/autogen.sh +0 -38
  113. data/core/vendor/src/liblbfgs/config.guess +0 -1411
  114. data/core/vendor/src/liblbfgs/config.h.in +0 -64
  115. data/core/vendor/src/liblbfgs/config.sub +0 -1500
  116. data/core/vendor/src/liblbfgs/configure +0 -21146
  117. data/core/vendor/src/liblbfgs/configure.in +0 -107
  118. data/core/vendor/src/liblbfgs/depcomp +0 -522
  119. data/core/vendor/src/liblbfgs/include/lbfgs.h +0 -745
  120. data/core/vendor/src/liblbfgs/install-sh +0 -322
  121. data/core/vendor/src/liblbfgs/lbfgs.sln +0 -26
  122. data/core/vendor/src/liblbfgs/lib/Makefile.am +0 -24
  123. data/core/vendor/src/liblbfgs/lib/Makefile.in +0 -499
  124. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +0 -133
  125. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +0 -294
  126. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +0 -298
  127. data/core/vendor/src/liblbfgs/lib/lbfgs.c +0 -1371
  128. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +0 -95
  129. data/core/vendor/src/liblbfgs/ltmain.sh +0 -6426
  130. data/core/vendor/src/liblbfgs/missing +0 -353
  131. data/core/vendor/src/liblbfgs/sample/Makefile.am +0 -15
  132. data/core/vendor/src/liblbfgs/sample/Makefile.in +0 -433
  133. data/core/vendor/src/liblbfgs/sample/sample.c +0 -81
  134. data/core/vendor/src/liblbfgs/sample/sample.cpp +0 -126
  135. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +0 -105
  136. data/core/vendor/src/svm_light/LICENSE.txt +0 -59
  137. data/core/vendor/src/svm_light/Makefile +0 -105
  138. data/core/vendor/src/svm_light/kernel.h +0 -40
  139. data/core/vendor/src/svm_light/svm_classify.c +0 -197
  140. data/core/vendor/src/svm_light/svm_common.c +0 -985
  141. data/core/vendor/src/svm_light/svm_common.h +0 -301
  142. data/core/vendor/src/svm_light/svm_hideo.c +0 -1062
  143. data/core/vendor/src/svm_light/svm_learn.c +0 -4147
  144. data/core/vendor/src/svm_light/svm_learn.h +0 -169
  145. data/core/vendor/src/svm_light/svm_learn_main.c +0 -397
  146. data/core/vendor/src/svm_light/svm_loqo.c +0 -211
  147. data/task/c.rake +0 -36
  148. data/task/submodules.rake +0 -5
@@ -1,705 +0,0 @@
1
- /*
2
- * CRF1d context (forward-backward, viterbi, etc).
3
- *
4
- * Copyright (c) 2007-2010, Naoaki Okazaki
5
- * All rights reserved.
6
- *
7
- * Redistribution and use in source and binary forms, with or without
8
- * modification, are permitted provided that the following conditions are met:
9
- * * Redistributions of source code must retain the above copyright
10
- * notice, this list of conditions and the following disclaimer.
11
- * * Redistributions in binary form must reproduce the above copyright
12
- * notice, this list of conditions and the following disclaimer in the
13
- * documentation and/or other materials provided with the distribution.
14
- * * Neither the names of the authors nor the names of its contributors
15
- * may be used to endorse or promote products derived from this
16
- * software without specific prior written permission.
17
- *
18
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22
- * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
- */
30
-
31
- /* $Id$ */
32
-
33
- #ifdef HAVE_CONFIG_H
34
- #include <config.h>
35
- #endif/*HAVE_CONFIG_H*/
36
-
37
- #include <os.h>
38
-
39
- #include <float.h>
40
- #include <math.h>
41
- #include <stdio.h>
42
- #include <stdlib.h>
43
-
44
- #include <crfsuite.h>
45
-
46
- #include "crf1d.h"
47
- #include "vecmath.h"
48
-
49
-
50
-
51
- crf1d_context_t* crf1dc_new(int flag, int L, int T)
52
- {
53
- int ret = 0;
54
- crf1d_context_t* ctx = NULL;
55
-
56
- ctx = (crf1d_context_t*)calloc(1, sizeof(crf1d_context_t));
57
- if (ctx != NULL) {
58
- ctx->flag = flag;
59
- ctx->num_labels = L;
60
-
61
- ctx->trans = (floatval_t*)calloc(L * L, sizeof(floatval_t));
62
- if (ctx->trans == NULL) goto error_exit;
63
-
64
- if (ctx->flag & CTXF_MARGINALS) {
65
- ctx->exp_trans = (floatval_t*)_aligned_malloc((L * L + 4) * sizeof(floatval_t), 16);
66
- if (ctx->exp_trans == NULL) goto error_exit;
67
- ctx->mexp_trans = (floatval_t*)calloc(L * L, sizeof(floatval_t));
68
- if (ctx->mexp_trans == NULL) goto error_exit;
69
- }
70
-
71
- if (ret = crf1dc_set_num_items(ctx, T)) {
72
- goto error_exit;
73
- }
74
-
75
- /* T gives the 'hint' for maximum length of items. */
76
- ctx->num_items = 0;
77
- }
78
-
79
- return ctx;
80
-
81
- error_exit:
82
- crf1dc_delete(ctx);
83
- return NULL;
84
- }
85
-
86
- int crf1dc_set_num_items(crf1d_context_t* ctx, int T)
87
- {
88
- const int L = ctx->num_labels;
89
-
90
- ctx->num_items = T;
91
-
92
- if (ctx->cap_items < T) {
93
- free(ctx->backward_edge);
94
- free(ctx->mexp_state);
95
- _aligned_free(ctx->exp_state);
96
- free(ctx->scale_factor);
97
- free(ctx->row);
98
- free(ctx->beta_score);
99
- free(ctx->alpha_score);
100
-
101
- ctx->alpha_score = (floatval_t*)calloc(T * L, sizeof(floatval_t));
102
- if (ctx->alpha_score == NULL) return CRFSUITEERR_OUTOFMEMORY;
103
- ctx->beta_score = (floatval_t*)calloc(T * L, sizeof(floatval_t));
104
- if (ctx->beta_score == NULL) return CRFSUITEERR_OUTOFMEMORY;
105
- ctx->scale_factor = (floatval_t*)calloc(T, sizeof(floatval_t));
106
- if (ctx->scale_factor == NULL) return CRFSUITEERR_OUTOFMEMORY;
107
- ctx->row = (floatval_t*)calloc(L, sizeof(floatval_t));
108
- if (ctx->row == NULL) return CRFSUITEERR_OUTOFMEMORY;
109
-
110
- if (ctx->flag & CTXF_VITERBI) {
111
- ctx->backward_edge = (int*)calloc(T * L, sizeof(int));
112
- if (ctx->backward_edge == NULL) return CRFSUITEERR_OUTOFMEMORY;
113
- }
114
-
115
- ctx->state = (floatval_t*)calloc(T * L, sizeof(floatval_t));
116
- if (ctx->state == NULL) return CRFSUITEERR_OUTOFMEMORY;
117
-
118
- if (ctx->flag & CTXF_MARGINALS) {
119
- ctx->exp_state = (floatval_t*)_aligned_malloc((T * L + 4) * sizeof(floatval_t), 16);
120
- if (ctx->exp_state == NULL) return CRFSUITEERR_OUTOFMEMORY;
121
- ctx->mexp_state = (floatval_t*)calloc(T * L, sizeof(floatval_t));
122
- if (ctx->mexp_state == NULL) return CRFSUITEERR_OUTOFMEMORY;
123
- }
124
-
125
- ctx->cap_items = T;
126
- }
127
-
128
- return 0;
129
- }
130
-
131
- void crf1dc_delete(crf1d_context_t* ctx)
132
- {
133
- if (ctx != NULL) {
134
- free(ctx->backward_edge);
135
- free(ctx->mexp_state);
136
- _aligned_free(ctx->exp_state);
137
- free(ctx->state);
138
- free(ctx->scale_factor);
139
- free(ctx->row);
140
- free(ctx->beta_score);
141
- free(ctx->alpha_score);
142
- free(ctx->mexp_trans);
143
- _aligned_free(ctx->exp_trans);
144
- free(ctx->trans);
145
- }
146
- free(ctx);
147
- }
148
-
149
- void crf1dc_reset(crf1d_context_t* ctx, int flag)
150
- {
151
- const int T = ctx->num_items;
152
- const int L = ctx->num_labels;
153
-
154
- if (flag & RF_STATE) {
155
- veczero(ctx->state, T*L);
156
- }
157
- if (flag & RF_TRANS) {
158
- veczero(ctx->trans, L*L);
159
- }
160
-
161
- if (ctx->flag & CTXF_MARGINALS) {
162
- veczero(ctx->mexp_state, T*L);
163
- veczero(ctx->mexp_trans, L*L);
164
- ctx->log_norm = 0;
165
- }
166
- }
167
-
168
- void crf1dc_exp_state(crf1d_context_t* ctx)
169
- {
170
- const int T = ctx->num_items;
171
- const int L = ctx->num_labels;
172
-
173
- veccopy(ctx->exp_state, ctx->state, L * T);
174
- vecexp(ctx->exp_state, L * T);
175
- }
176
-
177
- void crf1dc_exp_transition(crf1d_context_t* ctx)
178
- {
179
- const int L = ctx->num_labels;
180
-
181
- veccopy(ctx->exp_trans, ctx->trans, L * L);
182
- vecexp(ctx->exp_trans, L * L);
183
- }
184
-
185
- void crf1dc_alpha_score(crf1d_context_t* ctx)
186
- {
187
- int i, t;
188
- floatval_t sum, *cur = NULL;
189
- floatval_t *scale = &ctx->scale_factor[0];
190
- const floatval_t *prev = NULL, *trans = NULL, *state = NULL;
191
- const int T = ctx->num_items;
192
- const int L = ctx->num_labels;
193
-
194
- /* Compute the alpha scores on nodes (0, *).
195
- alpha[0][j] = state[0][j]
196
- */
197
- cur = ALPHA_SCORE(ctx, 0);
198
- state = EXP_STATE_SCORE(ctx, 0);
199
- veccopy(cur, state, L);
200
- sum = vecsum(cur, L);
201
- *scale = (sum != 0.) ? 1. / sum : 1.;
202
- vecscale(cur, *scale, L);
203
- ++scale;
204
-
205
- /* Compute the alpha scores on nodes (t, *).
206
- alpha[t][j] = state[t][j] * \sum_{i} alpha[t-1][i] * trans[i][j]
207
- */
208
- for (t = 1;t < T;++t) {
209
- prev = ALPHA_SCORE(ctx, t-1);
210
- cur = ALPHA_SCORE(ctx, t);
211
- state = EXP_STATE_SCORE(ctx, t);
212
-
213
- veczero(cur, L);
214
- for (i = 0;i < L;++i) {
215
- trans = EXP_TRANS_SCORE(ctx, i);
216
- vecaadd(cur, prev[i], trans, L);
217
- }
218
- vecmul(cur, state, L);
219
- sum = vecsum(cur, L);
220
- *scale = (sum != 0.) ? 1. / sum : 1.;
221
- vecscale(cur, *scale, L);
222
- ++scale;
223
- }
224
-
225
- /* Compute the logarithm of the normalization factor here.
226
- norm = 1. / (C[0] * C[1] ... * C[T-1])
227
- log(norm) = - \sum_{t = 0}^{T-1} log(C[t]).
228
- */
229
- ctx->log_norm = -vecsumlog(ctx->scale_factor, T);
230
- }
231
-
232
- void crf1dc_beta_score(crf1d_context_t* ctx)
233
- {
234
- int i, t;
235
- floatval_t *cur = NULL;
236
- floatval_t *row = ctx->row;
237
- const floatval_t *next = NULL, *state = NULL, *trans = NULL;
238
- const int T = ctx->num_items;
239
- const int L = ctx->num_labels;
240
- const floatval_t *scale = &ctx->scale_factor[T-1];
241
-
242
- /* Compute the beta scores at (T-1, *). */
243
- cur = BETA_SCORE(ctx, T-1);
244
- vecset(cur, *scale, L);
245
- --scale;
246
-
247
- /* Compute the beta scores at (t, *). */
248
- for (t = T-2;0 <= t;--t) {
249
- cur = BETA_SCORE(ctx, t);
250
- next = BETA_SCORE(ctx, t+1);
251
- state = EXP_STATE_SCORE(ctx, t+1);
252
-
253
- veccopy(row, next, L);
254
- vecmul(row, state, L);
255
-
256
- /* Compute the beta score at (t, i). */
257
- for (i = 0;i < L;++i) {
258
- trans = EXP_TRANS_SCORE(ctx, i);
259
- cur[i] = vecdot(trans, row, L);
260
- }
261
- vecscale(cur, *scale, L);
262
- --scale;
263
- }
264
- }
265
-
266
- void crf1dc_marginals(crf1d_context_t* ctx)
267
- {
268
- int i, j, t;
269
- const int T = ctx->num_items;
270
- const int L = ctx->num_labels;
271
-
272
- /*
273
- Compute the model expectations of states.
274
- p(t,i) = fwd[t][i] * bwd[t][i] / norm
275
- = (1. / C[t]) * fwd'[t][i] * bwd'[t][i]
276
- */
277
- for (t = 0;t < T;++t) {
278
- floatval_t *fwd = ALPHA_SCORE(ctx, t);
279
- floatval_t *bwd = BETA_SCORE(ctx, t);
280
- floatval_t *prob = STATE_MEXP(ctx, t);
281
- veccopy(prob, fwd, L);
282
- vecmul(prob, bwd, L);
283
- vecscale(prob, 1. / ctx->scale_factor[t], L);
284
- }
285
-
286
- /*
287
- Compute the model expectations of transitions.
288
- p(t,i,t+1,j)
289
- = fwd[t][i] * edge[i][j] * state[t+1][j] * bwd[t+1][j] / norm
290
- = (fwd'[t][i] / (C[0] ... C[t])) * edge[i][j] * state[t+1][j] * (bwd'[t+1][j] / (C[t+1] ... C[T-1])) * (C[0] * ... * C[T-1])
291
- = fwd'[t][i] * edge[i][j] * state[t+1][j] * bwd'[t+1][j]
292
- The model expectation of a transition (i -> j) is the sum of the marginal
293
- probabilities p(t,i,t+1,j) over t.
294
- */
295
- for (t = 0;t < T-1;++t) {
296
- floatval_t *fwd = ALPHA_SCORE(ctx, t);
297
- floatval_t *state = EXP_STATE_SCORE(ctx, t+1);
298
- floatval_t *bwd = BETA_SCORE(ctx, t+1);
299
- floatval_t *row = ctx->row;
300
-
301
- /* row[j] = state[t+1][j] * bwd'[t+1][j] */
302
- veccopy(row, bwd, L);
303
- vecmul(row, state, L);
304
-
305
- for (i = 0;i < L;++i) {
306
- floatval_t *edge = EXP_TRANS_SCORE(ctx, i);
307
- floatval_t *prob = TRANS_MEXP(ctx, i);
308
- for (j = 0;j < L;++j) {
309
- prob[j] += fwd[i] * edge[j] * row[j];
310
- }
311
- }
312
- }
313
- }
314
-
315
- floatval_t crf1dc_marginal_point(crf1d_context_t *ctx, int l, int t)
316
- {
317
- floatval_t *fwd = ALPHA_SCORE(ctx, t);
318
- floatval_t *bwd = BETA_SCORE(ctx, t);
319
- return fwd[l] * bwd[l] / ctx->scale_factor[t];
320
- }
321
-
322
- floatval_t crf1dc_marginal_path(crf1d_context_t *ctx, const int *path, int begin, int end)
323
- {
324
- int t;
325
- /*
326
- Compute the marginal probability of a (partial) path.
327
- a = path[begin], b = path[begin+1], ..., y = path[end-2], z = path[end-1]
328
- fwd[begin][a] = (fwd'[begin][a] / (C[0] ... C[begin])
329
- bwd[end-1][z] = (bwd'[end-1][z] / (C[end-1] ... C[T-1]))
330
- norm = 1 / (C[0] * ... * C[T-1])
331
- p(a, b, ..., z)
332
- = fwd[begin][a] * edge[a][b] * state[begin+1][b] * ... * edge[y][z] * state[end-1][z] * bwd[end-1][z] / norm
333
- = fwd'[begin][a] * edge[a][b] * state[begin+1][b] * ... * edge[y][z] * state[end-1][z] * bwd'[end-1][z] * (C[begin+1] * ... * C[end-2])
334
- */
335
- floatval_t *fwd = ALPHA_SCORE(ctx, begin);
336
- floatval_t *bwd = BETA_SCORE(ctx, end-1);
337
- floatval_t prob = fwd[path[begin]] * bwd[path[end-1]] / ctx->scale_factor[begin];
338
-
339
- for (t = begin;t < end-1;++t) {
340
- floatval_t *state = EXP_STATE_SCORE(ctx, t+1);
341
- floatval_t *edge = EXP_TRANS_SCORE(ctx, path[t]);
342
- prob *= (edge[path[t+1]] * state[path[t+1]] * ctx->scale_factor[t]);
343
- }
344
-
345
- return prob;
346
- }
347
-
348
- #if 0
349
- /* Sigh, this was found to be slower than the forward-backward algorithm. */
350
-
351
- #define ADJACENCY(ctx, i) \
352
- (&MATRIX(ctx->adj, ctx->num_labels, 0, i))
353
-
354
- void crf1dc_marginal_without_beta(crf1d_context_t* ctx)
355
- {
356
- int i, j, t;
357
- floatval_t *prob = NULL;
358
- floatval_t *row = ctx->row;
359
- const floatval_t *fwd = NULL;
360
- const int T = ctx->num_items;
361
- const int L = ctx->num_labels;
362
-
363
- /*
364
- Compute marginal probabilities of states at T-1
365
- p(T-1,j) = fwd'[T-1][j]
366
- */
367
- fwd = ALPHA_SCORE(ctx, T-1);
368
- prob = STATE_MEXP(ctx, T-1);
369
- veccopy(prob, fwd, L);
370
-
371
- /*
372
- Repeat the following computation for t = T-1,T-2, ..., 1.
373
- 1) Compute p(t-1,i,t,j) using p(t,j)
374
- 2) Compute p(t,i) using p(t-1,i,t,j)
375
- */
376
- for (t = T-1;0 < t;--t) {
377
- fwd = ALPHA_SCORE(ctx, t-1);
378
- prob = STATE_MEXP(ctx, t);
379
-
380
- veczero(ctx->adj, L*L);
381
- veczero(row, L);
382
-
383
- /*
384
- Compute adj[i][j] and row[j].
385
- adj[i][j] = fwd'[t-1][i] * edge[i][j]
386
- row[j] = \sum_{i} adj[i][j]
387
- */
388
- for (i = 0;i < L;++i) {
389
- floatval_t *adj = ADJACENCY(ctx, i);
390
- floatval_t *edge = EXP_TRANS_SCORE(ctx, i);
391
- vecaadd(adj, fwd[i], edge, L);
392
- vecadd(row, adj, L);
393
- }
394
-
395
- /*
396
- Find z such that z * \sum_{i] adj[i][j] = p(t,j).
397
- Thus, z = p(t,j) / row[j]; we overwrite row with z.
398
- */
399
- vecinv(row, L);
400
- vecmul(row, prob, L);
401
-
402
- /*
403
- Apply the partition factor z (row[j]) to adj[i][j].
404
- */
405
- for (i = 0;i < L;++i) {
406
- floatval_t *adj = ADJACENCY(ctx, i);
407
- vecmul(adj, row, L);
408
- }
409
-
410
- /*
411
- Now that adj[i][j] presents p(t-1,i,t,j),
412
- accumulate model expectations of transitions.
413
- */
414
- for (i = 0;i < L;++i) {
415
- floatval_t *adj = ADJACENCY(ctx, i);
416
- floatval_t *prob = TRANS_MEXP(ctx, i);
417
- vecadd(prob, adj, L);
418
- }
419
-
420
- /*
421
- Compute the marginal probability of states at t-1.
422
- p(t-1,i) = \sum_{j} p(t-1,i,t,j)
423
- */
424
- prob = STATE_MEXP(ctx, t-1);
425
- for (i = 0;i < L;++i) {
426
- floatval_t *adj = ADJACENCY(ctx, i);
427
- prob[i] = vecsum(adj, L);
428
- }
429
- }
430
- }
431
- #endif
432
-
433
- floatval_t crf1dc_score(crf1d_context_t* ctx, const int *labels)
434
- {
435
- int i, j, t;
436
- floatval_t ret = 0;
437
- const floatval_t *state = NULL, *cur = NULL, *trans = NULL;
438
- const int T = ctx->num_items;
439
- const int L = ctx->num_labels;
440
-
441
- /* Stay at (0, labels[0]). */
442
- i = labels[0];
443
- state = STATE_SCORE(ctx, 0);
444
- ret = state[i];
445
-
446
- /* Loop over the rest of items. */
447
- for (t = 1;t < T;++t) {
448
- j = labels[t];
449
- trans = TRANS_SCORE(ctx, i);
450
- state = STATE_SCORE(ctx, t);
451
-
452
- /* Transit from (t-1, i) to (t, j). */
453
- ret += trans[j];
454
- ret += state[j];
455
- i = j;
456
- }
457
- return ret;
458
- }
459
-
460
- floatval_t crf1dc_lognorm(crf1d_context_t* ctx)
461
- {
462
- return ctx->log_norm;
463
- }
464
-
465
- floatval_t crf1dc_viterbi(crf1d_context_t* ctx, int *labels)
466
- {
467
- int i, j, t;
468
- int *back = NULL;
469
- floatval_t max_score, score, *cur = NULL;
470
- const floatval_t *prev = NULL, *state = NULL, *trans = NULL;
471
- const int T = ctx->num_items;
472
- const int L = ctx->num_labels;
473
-
474
- /*
475
- This function assumes state and trans scores to be in the logarithm domain.
476
- */
477
-
478
- /* Compute the scores at (0, *). */
479
- cur = ALPHA_SCORE(ctx, 0);
480
- state = STATE_SCORE(ctx, 0);
481
- for (j = 0;j < L;++j) {
482
- cur[j] = state[j];
483
- }
484
-
485
- /* Compute the scores at (t, *). */
486
- for (t = 1;t < T;++t) {
487
- prev = ALPHA_SCORE(ctx, t-1);
488
- cur = ALPHA_SCORE(ctx, t);
489
- state = STATE_SCORE(ctx, t);
490
- back = BACKWARD_EDGE_AT(ctx, t);
491
-
492
- /* Compute the score of (t, j). */
493
- for (j = 0;j < L;++j) {
494
- max_score = -FLOAT_MAX;
495
-
496
- for (i = 0;i < L;++i) {
497
- /* Transit from (t-1, i) to (t, j). */
498
- trans = TRANS_SCORE(ctx, i);
499
- score = prev[i] + trans[j];
500
-
501
- /* Store this path if it has the maximum score. */
502
- if (max_score < score) {
503
- max_score = score;
504
- /* Backward link (#t, #j) -> (#t-1, #i). */
505
- back[j] = i;
506
- }
507
- }
508
- /* Add the state score on (t, j). */
509
- cur[j] = max_score + state[j];
510
- }
511
- }
512
-
513
- /* Find the node (#T, #i) that reaches EOS with the maximum score. */
514
- max_score = -FLOAT_MAX;
515
- prev = ALPHA_SCORE(ctx, T-1);
516
- for (i = 0;i < L;++i) {
517
- if (max_score < prev[i]) {
518
- max_score = prev[i];
519
- labels[T-1] = i; /* Tag the item #T. */
520
- }
521
- }
522
-
523
- /* Tag labels by tracing the backward links. */
524
- for (t = T-2;0 <= t;--t) {
525
- back = BACKWARD_EDGE_AT(ctx, t+1);
526
- labels[t] = back[labels[t+1]];
527
- }
528
-
529
- /* Return the maximum score (without the normalization factor subtracted). */
530
- return max_score;
531
- }
532
-
533
- static void check_values(FILE *fp, floatval_t cv, floatval_t tv)
534
- {
535
- if (fabs(cv - tv) < 1e-9) {
536
- fprintf(fp, "OK (%f)\n", cv);
537
- } else {
538
- fprintf(fp, "FAIL: %f (%f)\n", cv, tv);
539
- }
540
- }
541
-
542
- void crf1dc_debug_context(FILE *fp)
543
- {
544
- int y1, y2, y3;
545
- floatval_t norm = 0;
546
- const int L = 3;
547
- const int T = 3;
548
- crf1d_context_t *ctx = crf1dc_new(CTXF_MARGINALS, L, T);
549
- floatval_t *trans = NULL, *state = NULL;
550
- floatval_t scores[3][3][3];
551
- int labels[3];
552
-
553
- /* Initialize the state scores. */
554
- state = EXP_STATE_SCORE(ctx, 0);
555
- state[0] = .4; state[1] = .5; state[2] = .1;
556
- state = EXP_STATE_SCORE(ctx, 1);
557
- state[0] = .4; state[1] = .1; state[2] = .5;
558
- state = EXP_STATE_SCORE(ctx, 2);
559
- state[0] = .4; state[1] = .1; state[2] = .5;
560
-
561
- /* Initialize the transition scores. */
562
- trans = EXP_TRANS_SCORE(ctx, 0);
563
- trans[0] = .3; trans[1] = .1; trans[2] = .4;
564
- trans = EXP_TRANS_SCORE(ctx, 1);
565
- trans[0] = .6; trans[1] = .2; trans[2] = .1;
566
- trans = EXP_TRANS_SCORE(ctx, 2);
567
- trans[0] = .5; trans[1] = .2; trans[2] = .1;
568
-
569
- ctx->num_items = ctx->cap_items;
570
- crf1dc_alpha_score(ctx);
571
- crf1dc_beta_score(ctx);
572
-
573
- /* Compute the score of every label sequence. */
574
- for (y1 = 0;y1 < L;++y1) {
575
- floatval_t s1 = EXP_STATE_SCORE(ctx, 0)[y1];
576
- for (y2 = 0;y2 < L;++y2) {
577
- floatval_t s2 = s1;
578
- s2 *= EXP_TRANS_SCORE(ctx, y1)[y2];
579
- s2 *= EXP_STATE_SCORE(ctx, 1)[y2];
580
- for (y3 = 0;y3 < L;++y3) {
581
- floatval_t s3 = s2;
582
- s3 *= EXP_TRANS_SCORE(ctx, y2)[y3];
583
- s3 *= EXP_STATE_SCORE(ctx, 2)[y3];
584
- scores[y1][y2][y3] = s3;
585
- }
586
- }
587
- }
588
-
589
- /* Compute the partition factor. */
590
- norm = 0.;
591
- for (y1 = 0;y1 < L;++y1) {
592
- for (y2 = 0;y2 < L;++y2) {
593
- for (y3 = 0;y3 < L;++y3) {
594
- norm += scores[y1][y2][y3];
595
- }
596
- }
597
- }
598
-
599
- /* Check the partition factor. */
600
- fprintf(fp, "Check for the partition factor... ");
601
- check_values(fp, exp(ctx->log_norm), norm);
602
-
603
- /* Compute the sequence probabilities. */
604
- for (y1 = 0;y1 < L;++y1) {
605
- for (y2 = 0;y2 < L;++y2) {
606
- for (y3 = 0;y3 < L;++y3) {
607
- floatval_t logp;
608
-
609
- labels[0] = y1;
610
- labels[1] = y2;
611
- labels[2] = y3;
612
- logp = crf1dc_score(ctx, labels) - crf1dc_lognorm(ctx);
613
-
614
- fprintf(fp, "Check for the sequence %d-%d-%d... ", y1, y2, y3);
615
- check_values(fp, exp(logp), scores[y1][y2][y3] / norm);
616
- }
617
- }
618
- }
619
-
620
- /* Compute the marginal probability at t=0 */
621
- for (y1 = 0;y1 < L;++y1) {
622
- floatval_t a, b, c, s = 0.;
623
- for (y2 = 0;y2 < L;++y2) {
624
- for (y3 = 0;y3 < L;++y3) {
625
- s += scores[y1][y2][y3];
626
- }
627
- }
628
-
629
- a = ALPHA_SCORE(ctx, 0)[y1];
630
- b = BETA_SCORE(ctx, 0)[y1];
631
- c = 1. / ctx->scale_factor[0];
632
-
633
- fprintf(fp, "Check for the marginal probability (0,%d)... ", y1);
634
- check_values(fp, a * b * c, s / norm);
635
- }
636
-
637
- /* Compute the marginal probability at t=1 */
638
- for (y2 = 0;y2 < L;++y2) {
639
- floatval_t a, b, c, s = 0.;
640
- for (y1 = 0;y1 < L;++y1) {
641
- for (y3 = 0;y3 < L;++y3) {
642
- s += scores[y1][y2][y3];
643
- }
644
- }
645
-
646
- a = ALPHA_SCORE(ctx, 1)[y2];
647
- b = BETA_SCORE(ctx, 1)[y2];
648
- c = 1. / ctx->scale_factor[1];
649
-
650
- fprintf(fp, "Check for the marginal probability (1,%d)... ", y2);
651
- check_values(fp, a * b * c, s / norm);
652
- }
653
-
654
- /* Compute the marginal probability at t=2 */
655
- for (y3 = 0;y3 < L;++y3) {
656
- floatval_t a, b, c, s = 0.;
657
- for (y1 = 0;y1 < L;++y1) {
658
- for (y2 = 0;y2 < L;++y2) {
659
- s += scores[y1][y2][y3];
660
- }
661
- }
662
-
663
- a = ALPHA_SCORE(ctx, 2)[y3];
664
- b = BETA_SCORE(ctx, 2)[y3];
665
- c = 1. / ctx->scale_factor[2];
666
-
667
- fprintf(fp, "Check for the marginal probability (2,%d)... ", y3);
668
- check_values(fp, a * b * c, s / norm);
669
- }
670
-
671
- /* Compute the marginal probabilities of transitions. */
672
- for (y1 = 0;y1 < L;++y1) {
673
- for (y2 = 0;y2 < L;++y2) {
674
- floatval_t a, b, s, t, p = 0.;
675
- for (y3 = 0;y3 < L;++y3) {
676
- p += scores[y1][y2][y3];
677
- }
678
-
679
- a = ALPHA_SCORE(ctx, 0)[y1];
680
- b = BETA_SCORE(ctx, 1)[y2];
681
- s = EXP_STATE_SCORE(ctx, 1)[y2];
682
- t = EXP_TRANS_SCORE(ctx, y1)[y2];
683
-
684
- fprintf(fp, "Check for the marginal probability (0,%d)-(1,%d)... ", y1, y2);
685
- check_values(fp, a * t * s * b, p / norm);
686
- }
687
- }
688
-
689
- for (y2 = 0;y2 < L;++y2) {
690
- for (y3 = 0;y3 < L;++y3) {
691
- floatval_t a, b, s, t, p = 0.;
692
- for (y1 = 0;y1 < L;++y1) {
693
- p += scores[y1][y2][y3];
694
- }
695
-
696
- a = ALPHA_SCORE(ctx, 1)[y2];
697
- b = BETA_SCORE(ctx, 2)[y3];
698
- s = EXP_STATE_SCORE(ctx, 2)[y3];
699
- t = EXP_TRANS_SCORE(ctx, y2)[y3];
700
-
701
- fprintf(fp, "Check for the marginal probability (1,%d)-(2,%d)... ", y2, y3);
702
- check_values(fp, a * t * s * b, p / norm);
703
- }
704
- }
705
- }