chaine 4.0.0b2__cp314-cp314-musllinux_1_2_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. chaine/__init__.py +2 -0
  2. chaine/_core/crf.cpp +19496 -0
  3. chaine/_core/crf.cpython-314-x86_64-linux-musl.so +0 -0
  4. chaine/_core/crfsuite/include/crfsuite.h +1077 -0
  5. chaine/_core/crfsuite/include/crfsuite_api.hpp +406 -0
  6. chaine/_core/crfsuite/lib/cqdb/src/cqdb.c +639 -0
  7. chaine/_core/crfsuite/lib/cqdb/src/lookup3.c +1271 -0
  8. chaine/_core/crfsuite/lib/crf/src/crf1d_context.c +788 -0
  9. chaine/_core/crfsuite/lib/crf/src/crf1d_encode.c +1020 -0
  10. chaine/_core/crfsuite/lib/crf/src/crf1d_feature.c +382 -0
  11. chaine/_core/crfsuite/lib/crf/src/crf1d_model.c +1085 -0
  12. chaine/_core/crfsuite/lib/crf/src/crf1d_tag.c +582 -0
  13. chaine/_core/crfsuite/lib/crf/src/crfsuite.c +500 -0
  14. chaine/_core/crfsuite/lib/crf/src/crfsuite_train.c +302 -0
  15. chaine/_core/crfsuite/lib/crf/src/dataset.c +115 -0
  16. chaine/_core/crfsuite/lib/crf/src/dictionary.c +127 -0
  17. chaine/_core/crfsuite/lib/crf/src/holdout.c +83 -0
  18. chaine/_core/crfsuite/lib/crf/src/json.c +1497 -0
  19. chaine/_core/crfsuite/lib/crf/src/logging.c +85 -0
  20. chaine/_core/crfsuite/lib/crf/src/params.c +370 -0
  21. chaine/_core/crfsuite/lib/crf/src/quark.c +180 -0
  22. chaine/_core/crfsuite/lib/crf/src/rumavl.c +1178 -0
  23. chaine/_core/crfsuite/lib/crf/src/train_arow.c +409 -0
  24. chaine/_core/crfsuite/lib/crf/src/train_averaged_perceptron.c +237 -0
  25. chaine/_core/crfsuite/lib/crf/src/train_l2sgd.c +491 -0
  26. chaine/_core/crfsuite/lib/crf/src/train_lbfgs.c +323 -0
  27. chaine/_core/crfsuite/lib/crf/src/train_passive_aggressive.c +442 -0
  28. chaine/_core/crfsuite/swig/crfsuite.cpp +1 -0
  29. chaine/_core/liblbfgs/lib/lbfgs.c +1531 -0
  30. chaine/_core/tagger_wrapper.hpp +58 -0
  31. chaine/_core/trainer_wrapper.cpp +32 -0
  32. chaine/_core/trainer_wrapper.hpp +26 -0
  33. chaine/crf.py +505 -0
  34. chaine/logging.py +214 -0
  35. chaine/optimization/__init__.py +10 -0
  36. chaine/optimization/metrics.py +129 -0
  37. chaine/optimization/spaces.py +394 -0
  38. chaine/optimization/trial.py +103 -0
  39. chaine/optimization/utils.py +119 -0
  40. chaine/training.py +184 -0
  41. chaine/typing.py +18 -0
  42. chaine/validation.py +43 -0
  43. chaine-4.0.0b2.dist-info/METADATA +343 -0
  44. chaine-4.0.0b2.dist-info/RECORD +50 -0
  45. chaine-4.0.0b2.dist-info/WHEEL +5 -0
  46. chaine-4.0.0b2.dist-info/licenses/LICENSE +22 -0
  47. chaine-4.0.0b2.dist-info/sboms/auditwheel.cdx.json +1 -0
  48. chaine-4.0.0b2.dist-info/top_level.txt +1 -0
  49. chaine.libs/libgcc_s-0cd532bd.so.1 +0 -0
  50. chaine.libs/libstdc++-5d72f927.so.6.0.33 +0 -0
@@ -0,0 +1,491 @@
1
+ /*
2
+ * Online training with L2-regularized Stochastic Gradient Descent (SGD).
3
+ *
4
+ * Copyright (c) 2007-2010, Naoaki Okazaki
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ * * Neither the names of the authors nor the names of its contributors
15
+ * may be used to endorse or promote products derived from this
16
+ * software without specific prior written permission.
17
+ *
18
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ /* $Id$ */
32
+
33
+ /*
34
+ SGD for L2-regularized MAP estimation.
35
+
36
+ The iterative algorithm is inspired by Pegasos:
37
+
38
+ Shai Shalev-Shwartz, Yoram Singer, and Nathan Srebro.
39
+ Pegasos: Primal Estimated sub-GrAdient SOlver for SVM.
40
+ In Proc. of ICML 2007, pp 807-814, 2007.
41
+
42
+ The calibration strategy is inspired by the implementation of sgd:
43
+ http://leon.bottou.org/projects/sgd
44
+ written by Léon Bottou.
45
+
46
+ The objective function to minimize is:
47
+
48
+ f(w) = (lambda/2) * ||w||^2 + (1/N) * \sum_i^N log P^i(y|x)
49
+ lambda = 2 * C / N
50
+
51
+ The original version of the Pegasos algorithm.
52
+
53
+ 0) Initialization
54
+ t = t0
55
+ k = [the batch size]
56
+ 1) Computing the learning rate (eta).
57
+ eta = 1 / (lambda * t)
58
+ 2) Updating feature weights.
59
+ w = (1 - eta * lambda) w - (eta / k) \sum_i (oexp - mexp)
60
+ 3) Projecting feature weights within an L2-ball.
61
+ w = min{1, (1/sqrt(lambda))/||w||} * w
62
+ 4) Goto 1 until convergence.
63
+
64
+ This implementation omit the step 3) because it makes the source code
65
+ tricky (in order to maintain L2-norm of feature weights at any time) and
66
+ because the project step does not have a strong impact to the quality of
67
+ solution.
68
+
69
+ A naive implementation requires O(K) computations for steps 2,
70
+ where K is the total number of features. This code implements the procedure
71
+ in an efficient way:
72
+
73
+ 0) Initialization
74
+ decay = 1
75
+ 1) Computing various factors
76
+ eta = 1 / (lambda * t)
77
+ decay *= (1 - eta * lambda)
78
+ gain = (eta / k) / decay
79
+ 2) Updating feature weights
80
+ Updating feature weights from observation expectation:
81
+ delta = gain * (1.0) * f(x,y)
82
+ w += delta
83
+ Updating feature weights from model expectation:
84
+ delta = gain * (-P(y|x)) * f(x,y)
85
+ w += delta
86
+ 4) Goto 1 until convergence.
87
+ */
88
+
89
+ #ifdef HAVE_CONFIG_H
90
+ #include <config.h>
91
+ #endif /*HAVE_CONFIG_H*/
92
+
93
+ #include <os.h>
94
+
95
+ #include <float.h>
96
+ #include <stdio.h>
97
+ #include <stdlib.h>
98
+ #include <string.h>
99
+ #include <time.h>
100
+ #include <math.h>
101
+
102
+ #include <crfsuite.h>
103
+ #include "crfsuite_internal.h"
104
+
105
+ #include "logging.h"
106
+ #include "params.h"
107
+ #include "crf1d.h"
108
+ #include "vecmath.h"
109
+
110
+ #define MIN(a, b) ((a) < (b) ? (a) : (b))
111
+
112
+ typedef struct
113
+ {
114
+ floatval_t c2;
115
+ floatval_t lambda;
116
+ floatval_t t0;
117
+ int max_iterations;
118
+ int period;
119
+ floatval_t delta;
120
+ floatval_t calibration_eta;
121
+ floatval_t calibration_rate;
122
+ int calibration_samples;
123
+ int calibration_candidates;
124
+ int calibration_max_trials;
125
+ } training_option_t;
126
+
127
+ static int l2sgd(
128
+ encoder_t *gm,
129
+ dataset_t *trainset,
130
+ dataset_t *testset,
131
+ floatval_t *w,
132
+ logging_t *lg,
133
+ const int N,
134
+ const floatval_t t0,
135
+ const floatval_t lambda,
136
+ const int num_epochs,
137
+ int calibration,
138
+ int period,
139
+ const floatval_t epsilon,
140
+ floatval_t *ptr_loss)
141
+ {
142
+ int i, epoch, ret = 0;
143
+ floatval_t t = 0;
144
+ floatval_t loss = 0, sum_loss = 0;
145
+ floatval_t best_sum_loss = DBL_MAX;
146
+ floatval_t eta, gain, decay = 1.;
147
+ floatval_t improvement = 0.;
148
+ floatval_t norm2 = 0.;
149
+ floatval_t *pf = NULL;
150
+ floatval_t *best_w = NULL;
151
+ clock_t clk_prev, clk_begin = clock();
152
+ const int K = gm->num_features;
153
+
154
+ if (!calibration)
155
+ {
156
+ pf = (floatval_t *)malloc(sizeof(floatval_t) * period);
157
+ best_w = (floatval_t *)calloc(K, sizeof(floatval_t));
158
+ if (pf == NULL || best_w == NULL)
159
+ {
160
+ ret = CRFSUITEERR_OUTOFMEMORY;
161
+ goto error_exit;
162
+ }
163
+ }
164
+
165
+ /* Initialize the feature weights. */
166
+ vecset(w, 0, K);
167
+
168
+ /* Loop for epochs. */
169
+ for (epoch = 1; epoch <= num_epochs; ++epoch)
170
+ {
171
+ clk_prev = clock();
172
+
173
+ if (!calibration)
174
+ {
175
+ /* Shuffle the training instances. */
176
+ dataset_shuffle(trainset);
177
+ }
178
+
179
+ /* Loop for instances. */
180
+ sum_loss = 0.;
181
+ for (i = 0; i < N; ++i)
182
+ {
183
+ const crfsuite_instance_t *inst = dataset_get(trainset, i);
184
+
185
+ /* Update various factors. */
186
+ eta = 1 / (lambda * (t0 + t));
187
+ decay *= (1.0 - eta * lambda);
188
+ gain = eta / decay;
189
+
190
+ /* Compute the loss and gradients for the instance. */
191
+ gm->set_weights(gm, w, decay);
192
+ gm->set_instance(gm, inst);
193
+ gm->objective_and_gradients(gm, &loss, w, gain, inst->weight);
194
+
195
+ sum_loss += loss;
196
+ ++t;
197
+ }
198
+
199
+ /* Terminate when the loss is abnormal (NaN, -Inf, +Inf). */
200
+ if (!isfinite(loss))
201
+ {
202
+ logging(lg, "Loss is abnormal");
203
+ ret = CRFSUITEERR_OVERFLOW;
204
+ sum_loss = loss;
205
+ goto error_exit;
206
+ }
207
+
208
+ /* Scale the feature weights. */
209
+ vecscale(w, decay, K);
210
+ decay = 1.;
211
+
212
+ /* Include the L2 norm of feature weights to the objective. */
213
+ /* The factor N is necessary because lambda = 2 * C / N. */
214
+ norm2 = vecdot(w, w, K);
215
+ sum_loss += 0.5 * lambda * norm2 * N;
216
+
217
+ /* One epoch finished. */
218
+ if (!calibration)
219
+ {
220
+ /* Check if the current epoch is the best. */
221
+ if (sum_loss < best_sum_loss)
222
+ {
223
+ /* Store the feature weights to best_w. */
224
+ best_sum_loss = sum_loss;
225
+ veccopy(best_w, w, K);
226
+ }
227
+
228
+ /* We don't test the stopping criterion while period < epoch. */
229
+ if (period < epoch)
230
+ {
231
+ improvement = (pf[(epoch - 1) % period] - sum_loss) / sum_loss;
232
+ }
233
+ else
234
+ {
235
+ improvement = epsilon;
236
+ }
237
+
238
+ /* Store the current value of the objective function. */
239
+ pf[(epoch - 1) % period] = sum_loss;
240
+
241
+ logging(lg, "Epoch %d, learning rate: %f, training loss: %f", epoch, eta, sum_loss);
242
+
243
+ /* Check for the stopping criterion. */
244
+ if (improvement < epsilon)
245
+ {
246
+ ret = 0;
247
+ break;
248
+ }
249
+ }
250
+ }
251
+
252
+ /* Output the optimization result. */
253
+ if (!calibration)
254
+ {
255
+ if (ret == 0)
256
+ {
257
+ if (epoch < num_epochs)
258
+ {
259
+ logging(lg, "Loss has converged, terminating training");
260
+ }
261
+ else
262
+ {
263
+ logging(lg, "Reached maximum number of iterations, terminating training");
264
+ }
265
+ }
266
+ }
267
+
268
+ /* Restore the best weights. */
269
+ if (best_w != NULL)
270
+ {
271
+ sum_loss = best_sum_loss;
272
+ veccopy(w, best_w, K);
273
+ }
274
+
275
+ error_exit:
276
+ free(best_w);
277
+ free(pf);
278
+ if (ptr_loss != NULL)
279
+ {
280
+ *ptr_loss = sum_loss;
281
+ }
282
+ return ret;
283
+ }
284
+
285
+ static floatval_t
286
+ l2sgd_calibration(
287
+ encoder_t *gm,
288
+ dataset_t *ds,
289
+ floatval_t *w,
290
+ logging_t *lg,
291
+ const training_option_t *opt)
292
+ {
293
+ int i;
294
+ int dec = 0, ok, trials = 1;
295
+ int num = opt->calibration_candidates;
296
+ clock_t clk_begin = clock();
297
+ floatval_t loss = 0.;
298
+ floatval_t init_loss = 0.;
299
+ floatval_t best_loss = DBL_MAX;
300
+ floatval_t eta = opt->calibration_eta;
301
+ floatval_t best_eta = opt->calibration_eta;
302
+ const int N = ds->num_instances;
303
+ const int S = MIN(N, opt->calibration_samples);
304
+ const int K = gm->num_features;
305
+ const floatval_t init_eta = opt->calibration_eta;
306
+ const floatval_t rate = opt->calibration_rate;
307
+ const floatval_t lambda = opt->lambda;
308
+
309
+ logging(lg, "Calibrating learning rate");
310
+
311
+ /* Initialize a permutation that shuffles the instances. */
312
+ dataset_shuffle(ds);
313
+
314
+ /* Initialize feature weights as zero. */
315
+ vecset(w, 0, K);
316
+
317
+ /* Compute the initial loss. */
318
+ gm->set_weights(gm, w, 1.);
319
+ init_loss = 0;
320
+ for (i = 0; i < S; ++i)
321
+ {
322
+ floatval_t score;
323
+ const crfsuite_instance_t *inst = dataset_get(ds, i);
324
+ gm->set_instance(gm, inst);
325
+ gm->score(gm, inst->labels, &score);
326
+ init_loss -= score;
327
+ gm->partition_factor(gm, &score);
328
+ init_loss += score;
329
+ }
330
+ init_loss += 0.5 * lambda * vecdot(w, w, K) * N;
331
+ logging(lg, "Initial training loss: %f", init_loss);
332
+
333
+ while (num > 0 || !dec)
334
+ {
335
+ /* Perform SGD for one epoch. */
336
+ l2sgd(
337
+ gm,
338
+ ds,
339
+ NULL,
340
+ w,
341
+ lg,
342
+ S, 1.0 / (lambda * eta), lambda, 1, 1, 1, 0., &loss);
343
+
344
+ /* Make sure that the learning rate decreases the log-likelihood. */
345
+ ok = isfinite(loss) && (loss < init_loss);
346
+
347
+ logging(lg, "Trial %d, learning rate %f, training loss: %f", trials, eta, loss);
348
+
349
+ if (ok)
350
+ {
351
+ --num;
352
+ }
353
+
354
+ if (isfinite(loss) && loss < best_loss)
355
+ {
356
+ best_loss = loss;
357
+ best_eta = eta;
358
+ }
359
+
360
+ if (!dec)
361
+ {
362
+ if (ok && 0 < num)
363
+ {
364
+ eta *= rate;
365
+ }
366
+ else
367
+ {
368
+ dec = 1;
369
+ num = opt->calibration_candidates;
370
+ eta = init_eta / rate;
371
+ }
372
+ }
373
+ else
374
+ {
375
+ eta /= rate;
376
+ }
377
+
378
+ ++trials;
379
+ if (opt->calibration_max_trials <= trials)
380
+ {
381
+ break;
382
+ }
383
+ }
384
+
385
+ eta = best_eta;
386
+ logging(lg, "Best learning rate: %f", eta);
387
+
388
+ return 1.0 / (lambda * eta);
389
+ }
390
+
391
+ int exchange_options(crfsuite_params_t *params, training_option_t *opt, int mode)
392
+ {
393
+ BEGIN_PARAM_MAP(params, mode)
394
+ DDX_PARAM_FLOAT(
395
+ "c2", opt->c2, 1.,
396
+ "Coefficient for L2 regularization.")
397
+ DDX_PARAM_INT(
398
+ "max_iterations", opt->max_iterations, 1000,
399
+ "The maximum number of iterations (epochs) for SGD optimization.")
400
+ DDX_PARAM_INT(
401
+ "period", opt->period, 10,
402
+ "The duration of iterations to test the stopping criterion.")
403
+ DDX_PARAM_FLOAT(
404
+ "delta", opt->delta, 1e-6,
405
+ "The threshold for the stopping criterion; an optimization process stops when\n"
406
+ "the improvement of the log likelihood over the last ${period} iterations is no\n"
407
+ "greater than this threshold.")
408
+ DDX_PARAM_FLOAT(
409
+ "calibration.eta", opt->calibration_eta, 0.1,
410
+ "The initial value of learning rate (eta) used for calibration.")
411
+ DDX_PARAM_FLOAT(
412
+ "calibration.rate", opt->calibration_rate, 2.,
413
+ "The rate of increase/decrease of learning rate for calibration.")
414
+ DDX_PARAM_INT(
415
+ "calibration.samples", opt->calibration_samples, 1000,
416
+ "The number of instances used for calibration.")
417
+ DDX_PARAM_INT(
418
+ "calibration.candidates", opt->calibration_candidates, 10,
419
+ "The number of candidates of learning rate.")
420
+ DDX_PARAM_INT(
421
+ "calibration.max_trials", opt->calibration_max_trials, 20,
422
+ "The maximum number of trials of learning rates for calibration.")
423
+ END_PARAM_MAP()
424
+
425
+ return 0;
426
+ }
427
+
428
+ void crfsuite_train_l2sgd_init(crfsuite_params_t *params)
429
+ {
430
+ exchange_options(params, NULL, 0);
431
+ }
432
+
433
+ int crfsuite_train_l2sgd(
434
+ encoder_t *gm,
435
+ dataset_t *trainset,
436
+ dataset_t *testset,
437
+ crfsuite_params_t *params,
438
+ logging_t *lg,
439
+ floatval_t **ptr_w)
440
+ {
441
+ int ret = 0;
442
+ floatval_t *w = NULL;
443
+ clock_t clk_begin;
444
+ floatval_t loss = 0;
445
+ const int N = trainset->num_instances;
446
+ const int K = gm->num_features;
447
+ const int T = gm->cap_items;
448
+ training_option_t opt;
449
+
450
+ /* Obtain parameter values. */
451
+ exchange_options(params, &opt, -1);
452
+
453
+ /* Allocate arrays. */
454
+ w = (floatval_t *)calloc(sizeof(floatval_t), K);
455
+ if (w == NULL)
456
+ {
457
+ ret = CRFSUITEERR_OUTOFMEMORY;
458
+ goto error_exit;
459
+ }
460
+
461
+ opt.lambda = 2. * opt.c2 / N;
462
+
463
+ logging(lg, "Start training with SGD");
464
+ clk_begin = clock();
465
+
466
+ /* Calibrate the training rate (eta). */
467
+ opt.t0 = l2sgd_calibration(gm, trainset, w, lg, &opt);
468
+
469
+ /* Perform stochastic gradient descent. */
470
+ ret = l2sgd(
471
+ gm,
472
+ trainset,
473
+ testset,
474
+ w,
475
+ lg,
476
+ N,
477
+ opt.t0,
478
+ opt.lambda,
479
+ opt.max_iterations,
480
+ 0,
481
+ opt.period,
482
+ opt.delta,
483
+ &loss);
484
+
485
+ *ptr_w = w;
486
+ return ret;
487
+
488
+ error_exit:
489
+ free(w);
490
+ return ret;
491
+ }