RubyGems - wapiti - Versions diffs - 0.0.5 → 0.1.0 - Mend

wapiti 0.0.5 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

checksums.yaml +7 -0
data/.simplecov +3 -0
data/Gemfile +25 -2
data/HISTORY.md +5 -1
data/LICENSE +14 -13
data/README.md +9 -16
data/Rakefile +38 -8
data/ext/wapiti/bcd.c +126 -124
data/ext/wapiti/decoder.c +203 -124
data/ext/wapiti/decoder.h +6 -4
data/ext/wapiti/extconf.rb +2 -2
data/ext/wapiti/gradient.c +491 -320
data/ext/wapiti/gradient.h +52 -34
data/ext/wapiti/lbfgs.c +74 -33
data/ext/wapiti/model.c +47 -37
data/ext/wapiti/model.h +22 -20
data/ext/wapiti/native.c +850 -839
data/ext/wapiti/native.h +1 -1
data/ext/wapiti/options.c +52 -20
data/ext/wapiti/options.h +37 -30
data/ext/wapiti/pattern.c +35 -33
data/ext/wapiti/pattern.h +12 -11
data/ext/wapiti/progress.c +14 -13
data/ext/wapiti/progress.h +3 -2
data/ext/wapiti/quark.c +14 -16
data/ext/wapiti/quark.h +6 -5
data/ext/wapiti/reader.c +83 -69
data/ext/wapiti/reader.h +11 -9
data/ext/wapiti/rprop.c +84 -43
data/ext/wapiti/sequence.h +18 -16
data/ext/wapiti/sgdl1.c +45 -43
data/ext/wapiti/thread.c +19 -17
data/ext/wapiti/thread.h +5 -4
data/ext/wapiti/tools.c +7 -7
data/ext/wapiti/tools.h +3 -4
data/ext/wapiti/trainers.h +1 -1
data/ext/wapiti/vmath.c +40 -38
data/ext/wapiti/vmath.h +12 -11
data/ext/wapiti/wapiti.c +159 -37
data/ext/wapiti/wapiti.h +18 -4
data/lib/wapiti.rb +15 -15
data/lib/wapiti/errors.rb +15 -15
data/lib/wapiti/model.rb +92 -84
data/lib/wapiti/options.rb +123 -124
data/lib/wapiti/utility.rb +14 -14
data/lib/wapiti/version.rb +2 -2
data/spec/spec_helper.rb +29 -9
data/spec/wapiti/model_spec.rb +230 -194
data/spec/wapiti/native_spec.rb +7 -8
data/spec/wapiti/options_spec.rb +184 -174
data/wapiti.gemspec +22 -8
metadata +38 -42
data/.gitignore +0 -5

data/ext/wapiti/thread.c CHANGED

@@ -1,7 +1,7 @@
 /*
  *      Wapiti - A linear-chain CRF tool
  *
- * Copyright (c) 2009-2011  CNRS
+ * Copyright (c) 2009-2013  CNRS
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -25,6 +25,8 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
+#include <stdint.h>
 #include "model.h"
 #include "tools.h"
 #include "thread.h"
@@ -51,10 +53,10 @@
  ******************************************************************************/
 #ifdef MTH_ANSI
 struct job_s {
-	size_t size;
+	uint32_t size;
 };
-bool mth_getjob(job_t *job, size_t *cnt, size_t *pos) {
+bool mth_getjob(job_t *job, uint32_t *cnt, uint32_t *pos) {
 	if (job->size == 0)
 		return false;
 	*cnt = job->size;
@@ -63,7 +65,7 @@ bool mth_getjob(job_t *job, size_t *cnt, size_t *pos) {
 	return true;
 }
-void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
+void mth_spawn(func_t *f, uint32_t W, void *ud[W], uint32_t size, uint32_t batch) {
 	unused(batch);
 	if (size == 0) {
 		f(NULL, 0, 1, ud[0]);
@@ -78,19 +80,19 @@ void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
 #include <pthread.h>
 struct job_s {
-	size_t size;
-	size_t send;
-	size_t batch;
+	uint32_t size;
+	uint32_t send;
+	uint32_t batch;
 	pthread_mutex_t lock;
 };
 typedef struct mth_s mth_t;
 struct mth_s {
-	job_t  *job;
-	int     id;
-	int     cnt;
-	func_t *f;
-	void   *ud;
+	job_t    *job;
+	uint32_t  id;
+	uint32_t  cnt;
+	func_t   *f;
+	void     *ud;
 };
 /* mth_getjob:
@@ -100,7 +102,7 @@ struct mth_s {
  *   This function use a lock to ensure thread safety as it will be called by
  *   the multiple workers threads.
  */
-bool mth_getjob(job_t *job, size_t *cnt, size_t *pos) {
+bool mth_getjob(job_t *job, uint32_t *cnt, uint32_t *pos) {
 	if (job == NULL)
 		return false;
 	if (job->send == job->size)
@@ -124,7 +126,7 @@ static void *mth_stub(void *ud) {
  *   will get a unique identifier between 0 and W-1 and a user data from the
  *   'ud' array.
  */
-void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
+void mth_spawn(func_t *f, uint32_t W, void *ud[W], uint32_t size, uint32_t batch) {
 	// First prepare the jobs scheduler
 	job_t job, *pjob = NULL;
 	if (size != 0) {
@@ -144,7 +146,7 @@ void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
 	// We prepare the parameters structures that will be send to the threads
 	// with informations for calling the user function.
 	mth_t p[W];
-	for (int w = 0; w < W; w++) {
+	for (uint32_t w = 0; w < W; w++) {
 		p[w].job = pjob;
 		p[w].id  = w;
 		p[w].cnt = W;
@@ -159,10 +161,10 @@ void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
 	pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
 	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
 	pthread_t th[W];
-	for (int w = 0; w < W; w++)
+	for (uint32_t w = 0; w < W; w++)
 		if (pthread_create(&th[w], &attr, &mth_stub, &p[w]) != 0)
 			fatal("failed to create thread");
-	for (int w = 0; w < W; w++)
+	for (uint32_t w = 0; w < W; w++)
 		if (pthread_join(th[w], NULL) != 0)
 			fatal("failed to join thread");
 	pthread_attr_destroy(&attr);

data/ext/wapiti/thread.h CHANGED

@@ -1,7 +1,7 @@
 /*
  *      Wapiti - A linear-chain CRF tool
  *
- * Copyright (c) 2009-2011  CNRS
+ * Copyright (c) 2009-2013  CNRS
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -28,15 +28,16 @@
 #ifndef thread_h
 #define thread_h
+#include <stdint.h>
 #include <pthread.h>
 #include "model.h"
 typedef struct job_s job_t;
-typedef void (func_t)(job_t *job, int id, int cnt, void *ud);
+typedef void (func_t)(job_t *job, uint32_t id, uint32_t cnt, void *ud);
-bool mth_getjob(job_t *job, size_t *cnt, size_t *pos);
-void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch);
+bool mth_getjob(job_t *job, uint32_t *cnt, uint32_t *pos);
+void mth_spawn(func_t *f, uint32_t W, void *ud[W], uint32_t size, uint32_t batch);
 #endif

data/ext/wapiti/tools.c CHANGED

@@ -69,7 +69,7 @@ void fatal(const char *msg, ...) {
 	rb_raise(cNativeError, msg, args);
-	va_end(args);
+	va_end(args);
 }
 /* pfatal:
@@ -80,7 +80,7 @@ void fatal(const char *msg, ...) {
  *   calling pfatal.
  */
 void pfatal(const char *msg, ...) {
-	const char *err = strerror(errno);
+	// const char *err = strerror(errno);
 	va_list args;
 	va_start(args, msg);
@@ -123,7 +123,7 @@ void info(const char *msg, ...) {
 	va_end(args);
 }
-/* wapiti_xmalloc:
+/*  wapiti_xmalloc:
  *   A simple wrapper around malloc who violently fail if memory cannot be
  *   allocated, so it will never return NULL.
  */
@@ -134,8 +134,8 @@ void *wapiti_xmalloc(size_t size) {
 	return ptr;
 }
-/* wapiti_xrealloc:
- *   As wapiti_xmalloc, this is a simple wrapper around realloc who fail on memory
+/*  wapiti_xrealloc:
+ *   As  wapiti_xmalloc, this is a simple wrapper around realloc who fail on memory
  *   error and so never return NULL.
  */
 void *wapiti_xrealloc(void *ptr, size_t size) {
@@ -151,7 +151,7 @@ void *wapiti_xrealloc(void *ptr, size_t size) {
  */
 char *xstrdup(const char *str) {
 	const int len = strlen(str) + 1;
-	char *res = wapiti_xmalloc(sizeof(char) * len);
+	char *res =  wapiti_xmalloc(sizeof(char) * len);
 	memcpy(res, str, len);
 	return res;
 }
@@ -175,7 +175,7 @@ char *ns_readstr(FILE *file) {
 	int len;
 	if (fscanf(file, "%d:", &len) != 1)
 		pfatal("cannot read from file");
-	char *buf = wapiti_xmalloc(len + 1);
+	char *buf =  wapiti_xmalloc(len + 1);
 	if (fread(buf, len, 1, file) != 1)
 		pfatal("cannot read from file");
 	if (fgetc(file) != ',')

data/ext/wapiti/tools.h CHANGED

@@ -1,7 +1,7 @@
 /*
  *      Wapiti - A linear-chain CRF tool
  *
- * Copyright (c) 2009-2011  CNRS
+ * Copyright (c) 2009-2013  CNRS
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,12 +29,11 @@
 #include <stdarg.h>
 #include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
-#include <ruby.h>
 #define unused(v) ((void)(v))
-#define none ((size_t)-1)
+#define none ((uint64_t)-1)
 #define min(a, b) ((a) < (b) ? (a) : (b))
 #define max(a, b) ((a) < (b) ? (b) : (a))

data/ext/wapiti/trainers.h CHANGED

@@ -1,7 +1,7 @@
 /*
  *      Wapiti - A linear-chain CRF tool
  *
- * Copyright (c) 2009-2011  CNRS
+ * Copyright (c) 2009-2013  CNRS
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without

data/ext/wapiti/vmath.c CHANGED

@@ -1,7 +1,7 @@
 /*
  *      Wapiti - A linear-chain CRF tool
  *
- * Copyright (c) 2009-2011  CNRS
+ * Copyright (c) 2009-2013  CNRS
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -28,6 +28,7 @@
 #include <assert.h>
 #include <math.h>
 #include <stddef.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include "wapiti.h"
@@ -54,7 +55,7 @@ const char *xvm_mode(void) {
  *   ensure that the vector size contains the need padding. You must only use
  *   vector allocated by this function if you use the optimized code paths.
  */
-double *xvm_new(size_t N) {
+double *xvm_new(uint64_t N) {
 #if defined(__SSE2__) && !defined(XVM_ANSI)
 	if (N % 4 != 0)
 		N += 4 - N % 4;
@@ -82,12 +83,12 @@ void xvm_free(double x[]) {
  *   Return the component-wise negation of the given vector:
  *       r = -x
  */
-void xvm_neg(double r[], const double x[], size_t N) {
+void xvm_neg(double r[], const double x[], uint64_t N) {
 #if defined(__SSE2__) && !defined(XVM_ANSI)
-	assert(r != NULL && ((size_t)r % 16) == 0);
-	assert(x != NULL && ((size_t)x % 16) == 0);
+	assert(r != NULL && ((uintptr_t)r % 16) == 0);
+	assert(x != NULL && ((uintptr_t)x % 16) == 0);
 	const __m128d vz = _mm_setzero_pd();
-	for (size_t n = 0; n < N; n += 4) {
+	for (uint64_t n = 0; n < N; n += 4) {
 		const __m128d x0 = _mm_load_pd(x + n    );
 		const __m128d x1 = _mm_load_pd(x + n + 2);
 		const __m128d r0 = _mm_sub_pd(vz, x0);
@@ -96,7 +97,7 @@ void xvm_neg(double r[], const double x[], size_t N) {
 		_mm_store_pd(r + n + 2, r1);
 	}
 #else
-	for (size_t n = 0; n < N; n++)
+	for (uint64_t n = 0; n < N; n++)
 		r[n] = -x[n];
 #endif
 }
@@ -105,12 +106,12 @@ void xvm_neg(double r[], const double x[], size_t N) {
  *   Return the difference of the two given vector:
  *       r = x .- y
  */
-void xvm_sub(double r[], const double x[], const double y[], size_t N) {
+void xvm_sub(double r[], const double x[], const double y[], uint64_t N) {
 #if defined(__SSE2__) && !defined(XVM_ANSI)
-	assert(r != NULL && ((size_t)r % 16) == 0);
-	assert(x != NULL && ((size_t)x % 16) == 0);
-	assert(y != NULL && ((size_t)y % 16) == 0);
-	for (size_t n = 0; n < N; n += 4) {
+	assert(r != NULL && ((uintptr_t)r % 16) == 0);
+	assert(x != NULL && ((uintptr_t)x % 16) == 0);
+	assert(y != NULL && ((uintptr_t)y % 16) == 0);
+	for (uint64_t n = 0; n < N; n += 4) {
 		const __m128d x0 = _mm_load_pd(x + n    );
 		const __m128d x1 = _mm_load_pd(x + n + 2);
 		const __m128d y0 = _mm_load_pd(y + n    );
@@ -121,7 +122,7 @@ void xvm_sub(double r[], const double x[], const double y[], size_t N) {
 		_mm_store_pd(r + n + 2, r1);
 	}
 #else
-	for (size_t n = 0; n < N; n++)
+	for (uint64_t n = 0; n < N; n++)
 		r[n] = x[n] - y[n];
 #endif
 }
@@ -130,8 +131,8 @@ void xvm_sub(double r[], const double x[], const double y[], size_t N) {
  *   Return the given vector scaled by a constant:
  *     r = a * x
  */
-void xvm_scale(double r[], const double x[], double a, size_t N) {
-	for (size_t n = 0; n < N; n++)
+void xvm_scale(double r[], const double x[], double a, uint64_t N) {
+	for (uint64_t n = 0; n < N; n++)
 		r[n] = x[n] * a;
 }
@@ -139,9 +140,9 @@ void xvm_scale(double r[], const double x[], double a, size_t N) {
  *   Store a normalized copy of the given vector in r and return the
  *   normalization factor.
  */
-double xvm_unit(double r[], const double x[], size_t N) {
+double xvm_unit(double r[], const double x[], uint64_t N) {
 	double sum = 0.0;
-	for (size_t n = 0; n < N; n++)
+	for (uint64_t n = 0; n < N; n++)
 		sum += x[n];
 	const double scale = 1.0 / sum;
 	xvm_scale(r, x, scale, N);
@@ -151,11 +152,11 @@ double xvm_unit(double r[], const double x[], size_t N) {
 /* xvm_norm:
  *   Return the euclidian norm of the given vector.
  */
-double xvm_norm(const double x[], size_t N) {
+double xvm_norm(const double x[], uint64_t N) {
 	double r = 0.0;
 #if defined(__SSE2__) && !defined(XVM_ANSI)
-	assert(x != NULL && ((size_t)x % 16) == 0);
-	size_t n, d = N % 4;
+	assert(x != NULL && ((uintptr_t)x % 16) == 0);
+	uint64_t n, d = N % 4;
 	__m128d s0 = _mm_setzero_pd();
 	__m128d s1 = _mm_setzero_pd();
 	for (n = 0; n < N - d; n += 4) {
@@ -173,7 +174,7 @@ double xvm_norm(const double x[], size_t N) {
 	for ( ; n < N; n++)
 		r += x[n] * x[n];
 #else
-	for (size_t n = 0; n < N; n++)
+	for (uint64_t n = 0; n < N; n++)
 		r += x[n] * x[n];
 #endif
 	return sqrt(r);
@@ -182,12 +183,12 @@ double xvm_norm(const double x[], size_t N) {
 /* xvm_dot:
  *   Return the dot product of the two given vectors.
  */
-double xvm_dot(const double x[], const double y[], size_t N) {
+double xvm_dot(const double x[], const double y[], uint64_t N) {
 	double r = 0.0;
 #if defined(__SSE2__) && !defined(XVM_ANSI)
-	assert(x != NULL && ((size_t)x % 16) == 0);
-	assert(y != NULL && ((size_t)y % 16) == 0);
-	size_t n, d = N % 4;
+	assert(x != NULL && ((uintptr_t)x % 16) == 0);
+	assert(y != NULL && ((uintptr_t)y % 16) == 0);
+	uint64_t n, d = N % 4;
 	__m128d s0 = _mm_setzero_pd();
 	__m128d s1 = _mm_setzero_pd();
 	for (n = 0; n < N - d; n += 4) {
@@ -207,7 +208,7 @@ double xvm_dot(const double x[], const double y[], size_t N) {
 	for ( ; n < N; n++)
 		r += x[n] * y[n];
 #else
-	for (size_t n = 0; n < N; n++)
+	for (uint64_t n = 0; n < N; n++)
 		r += x[n] * y[n];
 #endif
 	return r;
@@ -217,13 +218,14 @@ double xvm_dot(const double x[], const double y[], size_t N) {
  *   Return the sum of x scaled by a and y:
  *       r = a * x + y
  */
-void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N) {
+void xvm_axpy(double r[], double a, const double x[], const double y[],
+		uint64_t N) {
 #if defined(__SSE2__) && !defined(XVM_ANSI)
-	assert(r != NULL && ((size_t)r % 16) == 0);
-	assert(x != NULL && ((size_t)x % 16) == 0);
-	assert(y != NULL && ((size_t)y % 16) == 0);
+	assert(r != NULL && ((uintptr_t)r % 16) == 0);
+	assert(x != NULL && ((uintptr_t)x % 16) == 0);
+	assert(y != NULL && ((uintptr_t)y % 16) == 0);
 	const __m128d va = _mm_set1_pd(a);
-	for (size_t n = 0; n < N; n += 4) {
+	for (uint64_t n = 0; n < N; n += 4) {
 		const __m128d x0 = _mm_load_pd(x + n    );
 		const __m128d x1 = _mm_load_pd(x + n + 2);
 		const __m128d y0 = _mm_load_pd(y + n    );
@@ -236,7 +238,7 @@ void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N
 		_mm_store_pd(r + n + 2, r1);
 	}
 #else
-	for (size_t n = 0; n < N; n++)
+	for (uint64_t n = 0; n < N; n++)
 		r[n] = a * x[n] + y[n];
 #endif
 }
@@ -270,14 +272,14 @@ void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N
  *     -inf or -oor  -->   return  0.0
  *     qNaN or sNaN  -->   return qNaN
  *
- *   This code is copyright 2004-2011 Thomas Lavergne and licenced under the
+ *   This code is copyright 2004-2013 Thomas Lavergne and licenced under the
  *   BSD licence like the remaining of Wapiti.
  */
-void xvm_expma(double r[], const double x[], double a, size_t N) {
+void xvm_expma(double r[], const double x[], double a, uint64_t N) {
 #if defined(__SSE2__) && !defined(XVM_ANSI)
   #define xvm_vconst(v) (_mm_castsi128_pd(_mm_set1_epi64x((v))))
-	assert(r != NULL && ((size_t)r % 16) == 0);
-	assert(x != NULL && ((size_t)x % 16) == 0);
+	assert(r != NULL && ((uintptr_t)r % 16) == 0);
+	assert(x != NULL && ((uintptr_t)x % 16) == 0);
 	const __m128i vl  = _mm_set1_epi64x(0x3ff0000000000000ULL);
 	const __m128d ehi = xvm_vconst(0x4086232bdd7abcd2ULL);
 	const __m128d elo = xvm_vconst(0xc086232bdd7abcd2ULL);
@@ -300,7 +302,7 @@ void xvm_expma(double r[], const double x[], double a, size_t N) {
 	const __m128d p10 = xvm_vconst(0x3e9299068168ac8fULL);
 	const __m128d p11 = xvm_vconst(0x3e5ac52350b60b19ULL);
 	const __m128d va  = _mm_set1_pd(a);
-	for (size_t n = 0; n < N; n += 4) {
+	for (uint64_t n = 0; n < N; n += 4) {
 		__m128d mn1, mn2, mi1, mi2;
 		__m128d t1,  t2,  d1,  d2;
 		__m128d v1,  v2,  w1,  w2;
@@ -365,7 +367,7 @@ void xvm_expma(double r[], const double x[], double a, size_t N) {
 		_mm_store_pd(r + n + 2, v2);
 	}
 #else
-	for (size_t n = 0; n < N; n++)
+	for (uint64_t n = 0; n < N; n++)
 		r[n] = exp(x[n]) - a;
 #endif
 }

data/ext/wapiti/vmath.h CHANGED

@@ -1,7 +1,7 @@
 /*
  *      Wapiti - A linear-chain CRF tool
  *
- * Copyright (c) 2009-2011  CNRS
+ * Copyright (c) 2009-2013  CNRS
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -28,24 +28,25 @@
 #ifndef vmath_h
 #define vmath_h
-#include <stddef.h>
+#include <stdint.h>
 const char *xvm_mode(void);
-double *xvm_new(size_t N);
+double *xvm_new(uint64_t N);
 void    xvm_free(double x[]);
-void xvm_neg(double r[], const double x[], size_t N);
-void xvm_sub(double r[], const double x[], const double y[], size_t N);
-void xvm_scale(double r[], const double x[], double a, size_t N);
-double xvm_unit(double r[], const double x[], size_t N);
+void xvm_neg(double r[], const double x[], uint64_t N);
+void xvm_sub(double r[], const double x[], const double y[], uint64_t N);
+void xvm_scale(double r[], const double x[], double a, uint64_t N);
+double xvm_unit(double r[], const double x[], uint64_t N);
-double xvm_norm(const double x[], size_t N);
-double xvm_dot(const double x[], const double y[], size_t N);
+double xvm_norm(const double x[], uint64_t N);
+double xvm_dot(const double x[], const double y[], uint64_t N);
-void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N);
+void xvm_axpy(double r[], double a, const double x[], const double y[],
+		uint64_t N);
-void xvm_expma(double r[], const double x[], double a, size_t N);
+void xvm_expma(double r[], const double x[], double a, uint64_t N);
 #endif