ruby-esvidi 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 03766101e6f4fa0d77d72a2c30aa1f56170b5632
4
+ data.tar.gz: 24265d2eb3e6ec16464ec5927ce162c2b04a7a7f
5
+ SHA512:
6
+ metadata.gz: 3ed34234dc9d2ff57e2c7dcc5e5d1c8da7df7f94b5cbd8bfc1f77e3a186840c7c75cf18ae29870a5816bf7bae3ec59167488d2e6cab6cf8f89ccffaeba464cb6
7
+ data.tar.gz: fc0100467875e5da37526471adfaa8bc332ffdbc10f9c0b34b069f506ef7aa5662a342919743dbd4975c6c021df8827d0329483a4cd7d512ce797999c0e53d96
data/LICENSE ADDED
@@ -0,0 +1,2 @@
1
+ Public Domain
2
+
data/README.md ADDED
@@ -0,0 +1,38 @@
1
+ Fork from [willcannings/Ruby-SVD](https://github.com/willcannings/Ruby-SVD) to fix some bugs.
2
+
3
+ Ruby SVD
4
+ ========
5
+ Singular Value Decomposition for Ruby with no dependency on GSL or LAPACK.
6
+
7
+ About
8
+ -----
9
+ Ruby SVD provides an interface to the Numerical Recipies C implementation of an SVD matrix decomposer. It also includes an extension to the native Ruby Matrix class with a simple LSA
10
+ method (decomposes the matrix, transposes matrix V, diagonalises the S array into a matrix, then removes all but the two leading terms in S to compress the original matrix to two dimensions).
11
+
12
+ Sample Usage
13
+ ------------
14
+ ```ruby
15
+ require 'ruby-svd'
16
+
17
+ tdm = SVDMatrix.new(4, 2)
18
+ tdm.set_row(0, [1,0])
19
+ tdm.set_row(1, [1,0])
20
+ tdm.set_row(2, [0,1])
21
+ tdm.set_row(3, [0,1])
22
+
23
+ puts "== Term document matrix:"
24
+ p tdm
25
+
26
+ puts "\n== Decomposing matrix:"
27
+ lsa = LSA.new(tdm)
28
+ p lsa
29
+
30
+ puts "\n== Classifying new column vector: [1, 0.5, 0, 0.5]"
31
+ puts "Format is [column, similarity]"
32
+ ranks = lsa.classify_vector([1,0.5,0,0.5])
33
+ p ranks
34
+
35
+ sorted_ranks = ranks.sort_by(&:last).reverse
36
+ puts "\n== Vector most similar to column #{sorted_ranks.first[0]}"
37
+ p tdm.column(sorted_ranks.first[0])
38
+ ```
data/ext/extconf.rb ADDED
@@ -0,0 +1,3 @@
1
+ # file: extconf.rb
2
+ require 'mkmf'
3
+ create_makefile('svd')
data/ext/nrutil.h ADDED
@@ -0,0 +1,376 @@
1
+ /* nrutil.h */
2
+ #include <stdio.h>
3
+ #include <stddef.h>
4
+ #include <stdlib.h>
5
+ #define NR_END 1
6
+ #define FREE_ARG char*
7
+
8
+
9
+ #ifndef _NR_UTILS_H_
10
+ #define _NR_UTILS_H_
11
+
12
+ static float sqrarg;
13
+ #define SQR(a) ((sqrarg = (a)) == 0.0 ? 0.0 : sqrarg*sqrarg)
14
+
15
+ static double dsqrarg;
16
+ #define DSQR(a) ((dsqrarg = (a)) == 0.0 ? 0.0 : dsqrarg*dsqrarg)
17
+
18
+ static double dmaxarg1, dmaxarg2;
19
+ #define DMAX(a, b) (dmaxarg1 = (a), dmaxarg2 = (b), (dmaxarg1) > (dmaxarg2) ? (dmaxarg1) : (dmaxarg2))
20
+
21
+ static double dminarg1, dminarg2;
22
+ #define DMIN(a, b) (dminarg1 = (a), dminarg2 = (b), (dminarg1) < (dminarg2) ? (dminarg1) : (dminarg2))
23
+
24
+ static float maxarg1, maxarg2;
25
+ #define FMAX(a, b) (maxarg1 = (a), maxarg2 = (b), (maxarg1) > (maxarg2) ? (maxarg1) : (maxarg2))
26
+
27
+ static float minarg1, minarg2;
28
+ #define FMIN(a, b) (minarg1 = (a), minarg2 = (b), (minarg1) < (minarg2) ? (minarg1) : (minarg2))
29
+
30
+ static long lmaxarg1, lmaxarg2;
31
+ #define LMAX(a, b) (lmaxarg1 = (a), lmaxarg2 = (b), (lmaxarg1) > (lmaxarg2) ? (lmaxarg1) : (lmaxarg2))
32
+
33
+ static long lminarg1, lminarg2;
34
+ #define LMIN(a, b) (lminarg1 = (a), lminarg2 = (b), (lminarg1) < (lminarg2) ? (lminarg1) : (lminarg2))
35
+
36
+ static int imaxarg1, imaxarg2;
37
+ #define IMAX(a, b) (imaxarg1 = (a), imaxarg2 = (b), (imaxarg1) > (imaxarg2) ? (imaxarg1) : (imaxarg2))
38
+
39
+ static int iminarg1, iminarg2;
40
+ #define IMIN(a, b) (iminarg1 = (a), iminarg2 = (b), (iminarg1) < (iminarg2) ? (iminarg1) : (iminarg2))
41
+
42
+ #define SIGN(a, b) ((b) >= 0.0 ? fabs(a) : -fabs(a))
43
+
44
+
45
+ /* prototype declaration */
46
+
47
+ #if defined(__STDC__) || defined(ANSI) || defined(NRANSI) /* ANSI */
48
+
49
+ void nrerror(char error_test[]);
50
+ float *vector(long nl, long nh);
51
+ int *ivector(long nl, long nh);
52
+ unsigned char *cvector(long nl, long nh);
53
+ unsigned long *lvector(long nl, long nh);
54
+ double *dvector(long nl, long nh);
55
+ float **matrix(long nrl, long nrh, long ncl, long nch);
56
+ double **dmatrix(long nrl, long nrh, long ncl, long nch);
57
+ int **imatrix(long nrl, long nrh, long ncl, long nch);
58
+ float **submatrix(float **a, long oldrl, long oldrh,
59
+ long oldcl, long oldch, long newrl, long newcl);
60
+ float **convert_matrix(float *a, long nrl, long nrh, long ncl, long nch);
61
+ float ***f3tensor(long nrl, long nrh, long ncl, long nch,
62
+ long ndl, long ndh);
63
+ void free_vector(float *v, long nl, long nh);
64
+ void free_ivector(int *v, long nl, long nh);
65
+ void free_cvector(unsigned char *v, long nl, long nh);
66
+ void free_lvector(unsigned long *v, long nl, long nh);
67
+ void free_dvector(double *v, long nl, long nh);
68
+ void free_matrix(float **m, long nrl, long nrh, long ncl, long nch);
69
+ void free_dmatrix(double **m, long nrl, long nrh, long ncl, long nch);
70
+ void free_imatrix(int **m, long nrl, long nrh, long ncl, long nch);
71
+ void free_submatrix(float **b, long nrl, long nrh, long ncl, long nch);
72
+ void free_convert_matrix(float **b, long nrl, long nrh,
73
+ long ncl, long nch);
74
+ void free_f3tensor(float ***t, long nrl, long nrh, long ncl, long nch,
75
+ long ndl, long ndh);
76
+
77
+ #else /* ANSI */
78
+ /* traditional -K&R */
79
+
80
+ void nrerror();
81
+ float *vector();
82
+
83
+ #endif /* ANSI */
84
+
85
+ #endif /* _NR_UTILS_H_ */
86
+
87
+
88
+ /* function definition */
89
+
90
+
91
+ void nrerror(char error_text[])
92
+ /* Numerical Recipes standard error handler */
93
+ {
94
+ fprintf(stderr, "Numerical Recipes run-time error...\n");
95
+ fprintf(stderr, "%s\n",error_text);
96
+ fprintf(stderr, "...now exiting to system...\n");
97
+ exit(1);
98
+ }
99
+
100
+ float *vector(long nl, long nh)
101
+ /* allocate a float vector with subscript range v[nl..nr] */
102
+ {
103
+ float *v;
104
+
105
+ v = (float *)malloc((size_t)((nh - nl + 1 + NR_END) * sizeof(float)));
106
+ if (!v) nrerror("allocation failure in vector()");
107
+ return v - nl + NR_END;
108
+ }
109
+
110
+ int *ivector(long nl, long nh)
111
+ /* allocate an int vector with subscript range v[nl..nr] */
112
+ {
113
+ int *v;
114
+
115
+ v = (int *)malloc((size_t)((nh - nl + 1 + NR_END)*sizeof(int)));
116
+ if (!v) nrerror("allocation failure in ivector()");
117
+ return v - nl + NR_END;
118
+ }
119
+
120
+ unsigned char *cvector(long nl, long nh)
121
+ /* allocate an unsigned char vector with subscript range v[nl..nr] */
122
+ {
123
+ unsigned char *v;
124
+
125
+ v = (unsigned char *)malloc((size_t)((nh - nl + 1 + NR_END)
126
+ * sizeof(unsigned char)));
127
+ if (!v) nrerror("allocation failure in cvector()");
128
+ return v - nl + NR_END;
129
+ }
130
+
131
+ unsigned long *lvector(long nl, long nh)
132
+ /* allocate an unsigned long vector with subscript range v[nl..nr] */
133
+ {
134
+ unsigned long *v;
135
+
136
+ v = (unsigned long *)malloc((size_t)((nh - nl + 1 + NR_END) * sizeof(long)));
137
+ if (!v) nrerror("allocation failure in lvector()");
138
+ return v - nl + NR_END;
139
+ }
140
+
141
+ double *dvector(long nl, long nh)
142
+ /* allocate a double vector with subscript range v[nl..nr] */
143
+ {
144
+ double *v;
145
+
146
+ v = (double *)malloc((size_t)((nh - nl + 1 + NR_END) * sizeof(double)));
147
+ if (!v) nrerror("allocation failure in dvector()");
148
+ return v - nl + NR_END;
149
+ }
150
+
151
+ float **matrix(long nrl, long nrh, long ncl, long nch)
152
+ /* allocate a float matrix with subscript range m[nrl..nrh][ncl, nch] */
153
+ {
154
+ long i, nrow = nrh - nrl + 1, ncol = nch - ncl + 1;
155
+ float **m;
156
+
157
+ /* allocate pointers to rows */
158
+ m = (float **)malloc((size_t)((nrow + NR_END) * sizeof(float *)));
159
+ if (!m) nrerror("allocation failure 1 in matrix()");
160
+ m += NR_END;
161
+ m -= nrl;
162
+
163
+ /* allocate rows and set pointers to them */
164
+ m[nrl] = (float *)malloc((size_t)((nrow * ncol + NR_END) * sizeof(float)));
165
+ if (!m[nrl]) nrerror("allocation failure 2 in matrix()");
166
+ m[nrl] += NR_END;
167
+ m[nrl] -= ncl;
168
+
169
+ for (i = nrl + 1; i <= nrh; i++) m[i] = m[i-1] + ncol;
170
+
171
+ /* return pointer to array of pointers to rows */
172
+ return m;
173
+ }
174
+
175
+ double **dmatrix(long nrl, long nrh, long ncl, long nch)
176
+ /* allocate a double matrix with subscript range m[nrl..nrh][ncl, nch] */
177
+ {
178
+ long i, nrow = nrh - nrl + 1, ncol = nch - ncl + 1;
179
+ double **m;
180
+
181
+ /* allocate pointers to rows */
182
+ m = (double **)malloc((size_t)((nrow + NR_END) * sizeof(double *)));
183
+ if (!m) nrerror("allocation failure 1 in matrix()");
184
+ m += NR_END;
185
+ m -= nrl;
186
+
187
+ /* allocate rows and set pointers to them */
188
+ m[nrl] = (double *)malloc((size_t)((nrow * ncol + NR_END) * sizeof(double)));
189
+ if (!m[nrl]) nrerror("allocation failure 2 in matrix()");
190
+ m[nrl] += NR_END;
191
+ m[nrl] -= ncl;
192
+
193
+ for (i = nrl + 1; i <= nrh; i++) m[i] = m[i-1] + ncol;
194
+
195
+ /* return pointer to array of pointers to rows */
196
+ return m;
197
+ }
198
+
199
+ int **imatrix(long nrl, long nrh, long ncl, long nch)
200
+ /* allocate an int matrix with subscript range m[nrl..nrh][ncl, nch] */
201
+ {
202
+ long i, nrow = nrh - nrl + 1, ncol = nch - ncl + 1;
203
+ int **m;
204
+
205
+ /* allocate pointers to rows */
206
+ m = (int **)malloc((size_t)((nrow + NR_END) * sizeof(int *)));
207
+ if (!m) nrerror("allocation failure 1 in matrix()");
208
+ m += NR_END;
209
+ m -= nrl;
210
+
211
+ /* allocate rows and set pointers to them */
212
+ m[nrl] = (int *)malloc((size_t)((nrow * ncol + NR_END) * sizeof(int)));
213
+ if (!m[nrl]) nrerror("allocation failure 2 in matrix()");
214
+ m[nrl] += NR_END;
215
+ m[nrl] -= ncl;
216
+
217
+ for (i = nrl + 1; i <= nrh; i++) m[i] = m[i-1] + ncol;
218
+
219
+ /* return pointer to array of pointers to rows */
220
+ return m;
221
+ }
222
+
223
+ float **submatrix(float **a, long oldrl, long oldrh, long oldcl, long oldch,
224
+ long newrl, long newcl)
225
+ /* point a submatrix [newrl..][newcl..]
226
+ to a[oldrl..oldrh][oldcl..oldch] */
227
+
228
+ {
229
+ long i, j, nrow = oldrh - oldrl + 1, ncol = oldcl - newcl;
230
+ float **m;
231
+
232
+ /* allocate array of pointers to rows */
233
+ m = (float **)malloc((size_t)((nrow + NR_END) * sizeof(float *)));
234
+ if (!m) nrerror("allocation failure in submatrix()");
235
+ m += NR_END;
236
+ m -= newrl;
237
+
238
+ /* set pointers to rows */
239
+ for (i = oldrl, j = newrl; i <= oldrh; i++, j++) m[j] = a[i] + ncol;
240
+
241
+ /* return pointer to array of pointers to rows */
242
+ return m;
243
+ }
244
+
245
+ float **convert_matrix(float *a, long nrl, long nrh, long ncl, long nch)
246
+ /* allocate a float matrix m[nrl..nrh][ncl..nch] that points to the matrix
247
+ declared in the standard C manner as a[nrow][ncol],
248
+ where nrow = nrh - nrl + 1 and ncol = nch - ncl + 1. The routine should
249
+ be called with the address &a[0][0] as the first argument. */
250
+ {
251
+ long i, j, nrow = nrh -nrl + 1, ncol = nch - ncl + 1;
252
+ float **m;
253
+
254
+ /* allocate pointers to rows */
255
+ m = (float **)malloc((size_t)((nrow + NR_END) * sizeof(float *)));
256
+ if (!m) nrerror("allocation failure in convert_matrix()");
257
+ m += NR_END;
258
+ m -= nrl;
259
+
260
+ /* set pointers to rows */
261
+ m[nrl] = a - ncl;
262
+ for (i = 1, j = nrl + 1; i < nrow; i++, j++) m[j] = m[j - 1] + ncol;
263
+
264
+ /* return pointer to array of pointers to rows */
265
+ return m;
266
+ }
267
+
268
+ float ***f3tensor(long nrl, long nrh, long ncl, long nch, long ndl, long ndh)
269
+ /* allocate a float 3tensor with range t[nrl..nrh][ncl..nch][ndl..ndh] */
270
+ {
271
+ long i, j, nrow = nrh - nrl + 1, ncol = nch - ncl + 1, ndep = ndh - ndl + 1;
272
+ float ***t;
273
+
274
+ /* allocate pointers to pointers to rows */
275
+ t = (float ***)malloc((size_t)((nrow + NR_END) * sizeof(float **)));
276
+ if (!t) nrerror("allocation failure 1 in f3tensor()");
277
+ t += NR_END;
278
+ t -= nrl;
279
+
280
+ /* allocate pointers to rows and set pointers to them */
281
+ t[nrl] = (float **)malloc((size_t)((nrow * ncol + NR_END)
282
+ * sizeof(float *)));
283
+ if (!t[nrl]) nrerror("allocation failure 2 in f3tensor()");
284
+ t[nrl] += NR_END;
285
+ t[nrl] -= ncl;
286
+
287
+ /* allocate rows and set pointers to them */
288
+ t[nrl][ncl] = (float *)malloc((size_t)((nrow * ncol * ndep + NR_END)
289
+ * sizeof(float)));
290
+ if (!t[nrl][ncl]) nrerror("allocation failure 3 in f3tensor()");
291
+ t[nrl][ncl] += NR_END;
292
+ t[nrl][ncl] -= ndl;
293
+
294
+ for (j = ncl + 1; j <= nch; j++) t[nrl][j] = t[nrl][j - 1] + ndep;
295
+ for (i = nrl + 1; i <= nrh; i++) {
296
+ t[i] = t[i - 1] + ncol;
297
+ t[i][ncl] = t[i - 1][ncl] + ncol * ndep;
298
+ for ( j = ncl + 1; j <= nch; j++) t[i][j] = t[i][j - 1] + ndep;
299
+ }
300
+
301
+ /* return pointer to array of pointers to rows */
302
+ return t;
303
+ }
304
+
305
+ void free_vector(float *v, long nl, long nh)
306
+ /* free a float vector allocated with vector() */
307
+ {
308
+ free((FREE_ARG)(v + nl - NR_END));
309
+ }
310
+
311
+ void free_ivector(int *v, long nl, long nh)
312
+ /* free an int vector allocated with ivector() */
313
+ {
314
+ free((FREE_ARG)(v + nl - NR_END));
315
+ }
316
+
317
+ void free_cvector(unsigned char *v, long nl, long nh)
318
+ /* free an unsigned char vector allocated with cvector() */
319
+ {
320
+ free((FREE_ARG)(v + nl - NR_END));
321
+ }
322
+
323
+ void free_lvector(unsigned long *v, long nl, long nh)
324
+ /* free an unsigned long vector allocated with lvector() */
325
+ {
326
+ free((FREE_ARG)(v + nl - NR_END));
327
+ }
328
+
329
+ void free_dvector(double *v, long nl, long nh)
330
+ /* free a double vector allocated with dvector() */
331
+ {
332
+ free((FREE_ARG)(v + nl - NR_END));
333
+ }
334
+
335
+ void free_matrix(float **m, long nrl, long nrh, long ncl, long nch)
336
+ /* free a float matrix allocated by matrix() */
337
+ {
338
+ free((FREE_ARG)(m[nrl] + ncl - NR_END));
339
+ free((FREE_ARG)(m + nrl - NR_END));
340
+ }
341
+
342
+ void free_dmatrix(double **m, long nrl, long nrh, long ncl, long nch)
343
+ /* free a double matrix allocated by dmatrix() */
344
+ {
345
+ free((FREE_ARG)(m[nrl] + ncl - NR_END));
346
+ free((FREE_ARG)(m + nrl - NR_END));
347
+ }
348
+
349
+ void free_imatrix(int **m, long nrl, long nrh, long ncl, long nch)
350
+ /* free an int matrix allocated by imatrix() */
351
+ {
352
+ free((FREE_ARG)(m[nrl] + ncl - NR_END));
353
+ free((FREE_ARG)(m + nrl - NR_END));
354
+ }
355
+
356
+ void free_submatirx(float **b, long nrl, long nrh, long ncl, long nch)
357
+ /* free a submatirx allocated by submatirx() */
358
+ {
359
+ free((FREE_ARG)(b + nrl - NR_END));
360
+ }
361
+
362
+ void free_convert_matirx(float **b, long nrl, long nrh, long ncl, long nch)
363
+ /* free a matirx allocated by convert_matirx() */
364
+ {
365
+ free((FREE_ARG)(b + nrl - NR_END));
366
+ }
367
+
368
+ void free_f3tensor(float ***t, long nrl, long nrh, long ncl, long nch,
369
+ long ndl, long ndh)
370
+ /* free a float f3tensor allocated by f3tensor() */
371
+ {
372
+ free((FREE_ARG)(t[nrl][ncl] + ndl - NR_END));
373
+ free((FREE_ARG)(t[nrl] + ncl - NR_END));
374
+ free((FREE_ARG)(t + nrl - NR_END));
375
+ }
376
+
data/ext/svd.c ADDED
@@ -0,0 +1,66 @@
1
+ #include <stdio.h>
2
+ #include <ruby.h>
3
+ #include "svd.h"
4
+
5
+ VALUE decompose(VALUE module, VALUE matrix_ruby, VALUE m_ruby, VALUE n_ruby) {
6
+ int m = NUM2INT(m_ruby);
7
+ int n = NUM2INT(n_ruby);
8
+ float **u = matrix(1, m, 1, n);
9
+ float **v = matrix(1, m, 1, n);
10
+ float *w = vector(1, n);
11
+ VALUE *matrix_values = RARRAY_PTR(matrix_ruby);
12
+ int offset = 0;
13
+ int i, j;
14
+
15
+ /* output arrays */
16
+ VALUE u_output = rb_ary_new();
17
+ VALUE v_output = rb_ary_new();
18
+ VALUE w_output = rb_ary_new();
19
+ VALUE output = rb_ary_new();
20
+
21
+ /* precondition */
22
+ if((m*n) != RARRAY_LEN(matrix_ruby)) {
23
+ rb_raise(rb_eRangeError, "Size of the array is not equal to m * n");
24
+ return;
25
+ }
26
+
27
+ /* convert to u matrix */
28
+ for(i = 1; i <= m; i++) {
29
+ for(j = 1; j <= n; j++) {
30
+ offset = ((i-1)*n) + (j-1);
31
+ u[i][j] = (float) NUM2DBL(matrix_values[offset]);
32
+ }
33
+ }
34
+
35
+ /* perform SVD */
36
+ svdcmp(u, m, n, w, v);
37
+
38
+ /* create w output array */
39
+ for(i = 1; i <= n; i++)
40
+ rb_ary_push(w_output, rb_float_new(w[i]));
41
+
42
+ /* create u arrays */
43
+ for(i = 1; i <= m; i++) {
44
+ for(j = 1; j <= n; j++) {
45
+ rb_ary_push(u_output, rb_float_new(u[i][j]));
46
+ }
47
+ }
48
+
49
+ /* create v arrays */
50
+ for(i = 1; i <= n; i++) {
51
+ for(j = 1; j <= n; j++) {
52
+ rb_ary_push(v_output, rb_float_new(v[i][j]));
53
+ }
54
+ }
55
+
56
+ rb_ary_push(output, u_output);
57
+ rb_ary_push(output, w_output);
58
+ rb_ary_push(output, v_output);
59
+ return output;
60
+ }
61
+
62
+ void Init_svd()
63
+ {
64
+ VALUE module = rb_define_module("SVD");
65
+ rb_define_module_function(module, "decompose", decompose, 3);
66
+ }
data/ext/svd.h ADDED
@@ -0,0 +1,194 @@
1
+ /* svd.h */
2
+ /* Singular Value Decomposition for solving linear algebraic equations */
3
+ #include <stdio.h>
4
+ #include <stdlib.h>
5
+ #include <math.h>
6
+ #include "nrutil.h"
7
+
8
+ float pythag(float a, float b)
9
+ {
10
+ float absa,absb;
11
+ absa=fabs(a);
12
+ absb=fabs(b);
13
+ if (absa > absb) return absa*sqrt(1.0+SQR(absb/absa));
14
+ else return (absb == 0.0 ? 0.0 : absb*sqrt(1.0+SQR(absa/absb)));
15
+ }
16
+
17
+ void svdcmp(float **a, int m, int n, float w[], float **v)
18
+ {
19
+ float pythag(float a, float b);
20
+ int flag,i,its,j,jj,k,l,nm;
21
+ float anorm,c,f,g,h,s,scale,x,y,z,*rv1;
22
+
23
+ rv1=vector(1,n);
24
+ g=scale=anorm=0.0;
25
+ for (i=1;i<=n;i++) {
26
+ l=i+1;
27
+ rv1[i]=scale*g;
28
+ g=s=scale=0.0;
29
+ if (i <= m) {
30
+ for (k=i;k<=m;k++) scale += fabs(a[k][i]);
31
+ if (scale) {
32
+ for (k=i;k<=m;k++) {
33
+ a[k][i] /= scale;
34
+ s += a[k][i]*a[k][i];
35
+ }
36
+ f=a[i][i];
37
+ g = -SIGN(sqrt(s),f);
38
+ h=f*g-s;
39
+ a[i][i]=f-g;
40
+ for (j=l;j<=n;j++) {
41
+ for (s=0.0,k=i;k<=m;k++) s += a[k][i]*a[k][j];
42
+ f=s/h;
43
+ for (k=i;k<=m;k++) a[k][j] += f*a[k][i];
44
+ }
45
+ for (k=i;k<=m;k++) a[k][i] *= scale;
46
+ }
47
+ }
48
+ w[i]=scale *g;
49
+ g=s=scale=0.0;
50
+ if (i <= m && i != n) {
51
+ for (k=l;k<=n;k++) scale += fabs(a[i][k]);
52
+ if (scale) {
53
+ for (k=l;k<=n;k++) {
54
+ a[i][k] /= scale;
55
+ s += a[i][k]*a[i][k];
56
+ }
57
+ f=a[i][l];
58
+ g = -SIGN(sqrt(s),f);
59
+ h=f*g-s;
60
+ a[i][l]=f-g;
61
+ for (k=l;k<=n;k++) rv1[k]=a[i][k]/h;
62
+ for (j=l;j<=m;j++) {
63
+ for (s=0.0,k=l;k<=n;k++) s += a[j][k]*a[i][k];
64
+ for (k=l;k<=n;k++) a[j][k] += s*rv1[k];
65
+ }
66
+ for (k=l;k<=n;k++) a[i][k] *= scale;
67
+ }
68
+ }
69
+ anorm=FMAX(anorm,(fabs(w[i])+fabs(rv1[i])));
70
+ }
71
+ for (i=n;i>=1;i--) {
72
+ if (i < n) {
73
+ if (g) {
74
+ for (j=l;j<=n;j++)
75
+ v[j][i]=(a[i][j]/a[i][l])/g;
76
+ for (j=l;j<=n;j++) {
77
+ for (s=0.0,k=l;k<=n;k++) s += a[i][k]*v[k][j];
78
+ for (k=l;k<=n;k++) v[k][j] += s*v[k][i];
79
+ }
80
+ }
81
+ for (j=l;j<=n;j++) v[i][j]=v[j][i]=0.0;
82
+ }
83
+ v[i][i]=1.0;
84
+ g=rv1[i];
85
+ l=i;
86
+ }
87
+ for (i=IMIN(m,n);i>=1;i--) {
88
+ l=i+1;
89
+ g=w[i];
90
+ for (j=l;j<=n;j++) a[i][j]=0.0;
91
+ if (g) {
92
+ g=1.0/g;
93
+ for (j=l;j<=n;j++) {
94
+ for (s=0.0,k=l;k<=m;k++) s += a[k][i]*a[k][j];
95
+ f=(s/a[i][i])*g;
96
+ for (k=i;k<=m;k++) a[k][j] += f*a[k][i];
97
+ }
98
+ for (j=i;j<=m;j++) a[j][i] *= g;
99
+ } else for (j=i;j<=m;j++) a[j][i]=0.0;
100
+ ++a[i][i];
101
+ }
102
+ for (k=n;k>=1;k--) {
103
+ for (its=1;its<=30;its++) {
104
+ flag=1;
105
+ for (l=k;l>=1;l--) {
106
+ nm=l-1;
107
+ if ((float)(fabs(rv1[l])+anorm) == anorm) {
108
+ flag=0;
109
+ break;
110
+ }
111
+ if ((float)(fabs(w[nm])+anorm) == anorm) break;
112
+ }
113
+ if (flag) {
114
+ c=0.0;
115
+ s=1.0;
116
+ for (i=l;i<=k;i++) {
117
+ f=s*rv1[i];
118
+ rv1[i]=c*rv1[i];
119
+ if ((float)(fabs(f)+anorm) == anorm) break;
120
+ g=w[i];
121
+ h=pythag(f,g);
122
+ w[i]=h;
123
+ h=1.0/h;
124
+ c=g*h;
125
+ s = -f*h;
126
+ for (j=1;j<=m;j++) {
127
+ y=a[j][nm];
128
+ z=a[j][i];
129
+ a[j][nm]=y*c+z*s;
130
+ a[j][i]=z*c-y*s;
131
+ }
132
+ }
133
+ }
134
+ z=w[k];
135
+ if (l == k) {
136
+ if (z < 0.0) {
137
+ w[k] = -z;
138
+ for (j=1;j<=n;j++) v[j][k] = -v[j][k];
139
+ }
140
+ break;
141
+ }
142
+ if (its == 30) nrerror("no convergence in 30 svdcmp iterations");
143
+ x=w[l];
144
+ nm=k-1;
145
+ y=w[nm];
146
+ g=rv1[nm];
147
+ h=rv1[k];
148
+ f=((y-z)*(y+z)+(g-h)*(g+h))/(2.0*h*y);
149
+ g=pythag(f,1.0);
150
+ f=((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x;
151
+ c=s=1.0;
152
+ for (j=l;j<=nm;j++) {
153
+ i=j+1;
154
+ g=rv1[i];
155
+ y=w[i];
156
+ h=s*g;
157
+ g=c*g;
158
+ z=pythag(f,h);
159
+ rv1[j]=z;
160
+ c=f/z;
161
+ s=h/z;
162
+ f=x*c+g*s;
163
+ g = g*c-x*s;
164
+ h=y*s;
165
+ y *= c;
166
+ for (jj=1;jj<=n;jj++) {
167
+ x=v[jj][j];
168
+ z=v[jj][i];
169
+ v[jj][j]=x*c+z*s;
170
+ v[jj][i]=z*c-x*s;
171
+ }
172
+ z=pythag(f,h);
173
+ w[j]=z;
174
+ if (z) {
175
+ z=1.0/z;
176
+ c=f*z;
177
+ s=h*z;
178
+ }
179
+ f=c*g+s*y;
180
+ x=c*y-s*g;
181
+ for (jj=1;jj<=m;jj++) {
182
+ y=a[jj][j];
183
+ z=a[jj][i];
184
+ a[jj][j]=y*c+z*s;
185
+ a[jj][i]=z*c-y*s;
186
+ }
187
+ }
188
+ rv1[l]=0.0;
189
+ rv1[k]=f;
190
+ w[k]=x;
191
+ }
192
+ }
193
+ free_vector(rv1,1,n);
194
+ }
data/lib/lsa.rb ADDED
@@ -0,0 +1,43 @@
1
+ require 'svd_matrix'
2
+
3
+ class LSA
4
+ attr_accessor :u, :s, :v
5
+
6
+ def initialize(matrix)
7
+ @u, @s, @v = matrix.decompose(2)
8
+ end
9
+
10
+ def inspect
11
+ "U:\n#{@u.inspect}\n\nS:\n#{@s.inspect}\n\nV:\n#{@v.inspect}"
12
+ end
13
+
14
+ # Return a distance (cosine similarity) between a new vector,
15
+ # and all the clusters (columns) used in the original matrix.
16
+ # Returns a sorted list of indexes and distances,
17
+ def classify_vector(values)
18
+ raise "Unsupported vector length" unless values.size == @u.row_size || values.size == @v.row_size
19
+ vector = Matrix.row_vector(values)
20
+ mult_matrix = (values.size == @u.row_size ? @u : @v)
21
+ comp_matrix = (values.size == @u.row_size ? @v : @u)
22
+
23
+ position = vector * Matrix[*mult_matrix] * @s.inverse
24
+ x = position[0,0]
25
+ y = position[0,1]
26
+ results = []
27
+
28
+ comp_matrix.row_size.times do |index|
29
+ results << [index, cosine_similarity(x, y, comp_matrix[index, 0], comp_matrix[index, 1])]
30
+ end
31
+
32
+ results.sort {|a, b| b[1] <=> a[1]}
33
+ end
34
+
35
+ # Determines the cosine similarity between two 2D points
36
+ def cosine_similarity(x1, y1, x2, y2)
37
+ dp = (x1 * x2) + (y1 * y2)
38
+ mag1 = Math.sqrt((x1 ** 2) + (y1 ** 2))
39
+ mag2 = Math.sqrt((x2 ** 2) + (y2 ** 2))
40
+ return 0 if mag1 == 0 || mag2 == 0
41
+ return (dp / (mag1 * mag2))
42
+ end
43
+ end
@@ -0,0 +1,3 @@
1
+ require 'svd'
2
+ require 'svd_matrix'
3
+ require 'lsa'
data/lib/svd_matrix.rb ADDED
@@ -0,0 +1,70 @@
1
+ require 'mathn'
2
+ require 'svd'
3
+
4
+ class SVDMatrix < Matrix
5
+ public_class_method :new
6
+
7
+ # Create a new SVD Matrix with m rows, n columns
8
+ def initialize(m, n)
9
+ @rows = Array.new(m)
10
+ @column_size = n
11
+ m.times {|i| @rows[i] = Array.new(n)}
12
+ end
13
+
14
+ # Set the value of the cell i, j
15
+ def []=(i, j, val)
16
+ @rows[i][j] = val
17
+ end
18
+
19
+ # Set the value of a row to an array
20
+ def set_row(i, row)
21
+ @rows[i] = row
22
+ end
23
+
24
+ # Nicely formatted inspect string for the matrix
25
+ def inspect
26
+ @rows.collect {|row| row.inspect}.join("\n")
27
+ end
28
+
29
+ # Perform SVD and decompose the matrix into three matrices:
30
+ # U, W, and V. You can choose to reduce the dimensionality of
31
+ # the data by setting a number of diagonal cells to 0. For
32
+ # example, reduce_dimentions_to = 2 will set a 4x4 W
33
+ # matrix into:
34
+ # [NUM, 0, 0, 0]
35
+ # [0, NUM, 0, 0]
36
+ # [ 0, 0, 0, 0 ]
37
+ # [ 0, 0, 0, 0 ]
38
+ def decompose(reduce_dimensions_to = nil)
39
+ input_array = []
40
+ @rows.each {|row| input_array += row}
41
+ u_array, w_array, v_array = SVD.decompose(input_array, row_size, @column_size)
42
+
43
+ # recompose U matrix
44
+ u = SVDMatrix.new(row_size, reduce_dimensions_to || @column_size)
45
+ row_size.times {|i| u.set_row(i, u_array.slice!(0, @column_size)[0...(reduce_dimensions_to || @column_size)])}
46
+
47
+ # recompose V matrix
48
+ v = SVDMatrix.new(@column_size, reduce_dimensions_to || @column_size)
49
+ @column_size.times {|i| v.set_row(i, v_array.slice!(0, @column_size)[0...(reduce_dimensions_to || @column_size)])}
50
+
51
+ # diagonalise W array as a matrix
52
+ if reduce_dimensions_to
53
+ w_array = w_array[0...reduce_dimensions_to]
54
+ end
55
+ w = Matrix.diagonal(*w_array)
56
+
57
+ [u, w, v]
58
+ end
59
+
60
+ # Reduce the number of dimensions of the data to dimensions.
61
+ # Returns a back a recombined matrix (conceptually the original
62
+ # matrix dimensionally reduced). For example Latent Semantic
63
+ # Analysis uses 2 dimensions, and commonly tf-idf cell data.
64
+ # The recombined matrix, and the 3 decomposed matrices are
65
+ # returned.
66
+ def reduce_dimensions(dimensions = 2)
67
+ u, w, v = self.decompose(dimensions)
68
+ [(u * w * v.transpose), u, w, v]
69
+ end
70
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-esvidi
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.2
5
+ platform: ruby
6
+ authors:
7
+ - Will Cannings
8
+ - Patricio Sard
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2017-12-03 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Singular Value Decomposition with no dependency on GSL or LAPACK
15
+ email: me@willcannings.com
16
+ executables: []
17
+ extensions:
18
+ - ext/extconf.rb
19
+ extra_rdoc_files:
20
+ - LICENSE
21
+ - README.md
22
+ files:
23
+ - LICENSE
24
+ - README.md
25
+ - ext/extconf.rb
26
+ - ext/nrutil.h
27
+ - ext/svd.c
28
+ - ext/svd.h
29
+ - lib/lsa.rb
30
+ - lib/ruby-esvidi.rb
31
+ - lib/svd_matrix.rb
32
+ homepage: http://github.com/willcannings/ruby-svd
33
+ licenses: []
34
+ metadata: {}
35
+ post_install_message:
36
+ rdoc_options: []
37
+ require_paths:
38
+ - lib
39
+ - ext
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ requirements: []
51
+ rubyforge_project:
52
+ rubygems_version: 2.6.14
53
+ signing_key:
54
+ specification_version: 3
55
+ summary: SVD for Ruby
56
+ test_files: []