jashmenn-apriori 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +121 -0
- data/README.txt +139 -0
- data/Rakefile +4 -0
- data/TODO.txt +60 -0
- data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
- data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
- data/attic/c_ext_test1/mytest.rb +10 -0
- data/attic/test.c +12 -0
- data/config/hoe.rb +77 -0
- data/config/requirements.rb +15 -0
- data/examples/01_simple_example.rb +23 -0
- data/examples/02_small_file_example.rb +17 -0
- data/examples/03_large_file_example.rb +22 -0
- data/examples/test_data/market_basket_basic_test.dat +9 -0
- data/ext/Apriori.c +149 -0
- data/ext/Makefile +149 -0
- data/ext/apriori/doc/apriori.html +1301 -0
- data/ext/apriori/doc/arem.gp +68 -0
- data/ext/apriori/doc/c_rev.gp +89 -0
- data/ext/apriori/doc/chi2.tex +156 -0
- data/ext/apriori/doc/copying +504 -0
- data/ext/apriori/doc/line.gif +0 -0
- data/ext/apriori/doc/uparrow.gif +0 -0
- data/ext/apriori/ex/flg2set +15 -0
- data/ext/apriori/ex/hdr2set +13 -0
- data/ext/apriori/ex/readme +71 -0
- data/ext/apriori/ex/row2set +7 -0
- data/ext/apriori/ex/rulesort +24 -0
- data/ext/apriori/ex/tab2set +9 -0
- data/ext/apriori/ex/test.app +2 -0
- data/ext/apriori/ex/test.rul +9 -0
- data/ext/apriori/ex/test1.rul +43 -0
- data/ext/apriori/ex/test1.tab +10 -0
- data/ext/apriori/ex/test2.tab +10 -0
- data/ext/apriori/ex/test3.tab +30 -0
- data/ext/apriori/ex/test4.tab +11 -0
- data/ext/apriori/ex/test5.tab +39 -0
- data/ext/apriori/ex/tid2set +23 -0
- data/ext/apriori/ex/xhdr2set +33 -0
- data/ext/apriori/src/apriori.c +750 -0
- data/ext/apriori/src/apriori.dsp +120 -0
- data/ext/apriori/src/apriori.dsw +29 -0
- data/ext/apriori/src/apriori.mak +99 -0
- data/ext/apriori/src/istree.c +1411 -0
- data/ext/apriori/src/istree.h +160 -0
- data/ext/apriori/src/makefile +105 -0
- data/ext/apriori/src/tract.c +870 -0
- data/ext/apriori/src/tract.h +261 -0
- data/ext/apriori_wrapper.c +757 -0
- data/ext/apriori_wrapper.h +10 -0
- data/ext/extconf.rb +32 -0
- data/ext/math/doc/copying +504 -0
- data/ext/math/src/chi2.c +151 -0
- data/ext/math/src/chi2.h +27 -0
- data/ext/math/src/choose.c +71 -0
- data/ext/math/src/choose.h +16 -0
- data/ext/math/src/gamma.c +446 -0
- data/ext/math/src/gamma.h +39 -0
- data/ext/math/src/intexp.c +35 -0
- data/ext/math/src/intexp.h +15 -0
- data/ext/math/src/makefile +164 -0
- data/ext/math/src/math.mak +48 -0
- data/ext/math/src/normal.c +387 -0
- data/ext/math/src/normal.h +44 -0
- data/ext/math/src/radfn.c +113 -0
- data/ext/math/src/radfn.h +34 -0
- data/ext/math/src/zeta.c +49 -0
- data/ext/math/src/zeta.h +15 -0
- data/ext/pre-clean.rb +8 -0
- data/ext/pre-setup.rb +9 -0
- data/ext/util/doc/copying +504 -0
- data/ext/util/src/listops.c +76 -0
- data/ext/util/src/listops.h +26 -0
- data/ext/util/src/makefile +103 -0
- data/ext/util/src/memsys.c +84 -0
- data/ext/util/src/memsys.h +42 -0
- data/ext/util/src/nstats.c +288 -0
- data/ext/util/src/nstats.h +69 -0
- data/ext/util/src/params.c +86 -0
- data/ext/util/src/params.h +19 -0
- data/ext/util/src/parse.c +133 -0
- data/ext/util/src/parse.h +81 -0
- data/ext/util/src/scan.c +767 -0
- data/ext/util/src/scan.h +111 -0
- data/ext/util/src/symtab.c +443 -0
- data/ext/util/src/symtab.h +121 -0
- data/ext/util/src/tabscan.c +279 -0
- data/ext/util/src/tabscan.h +99 -0
- data/ext/util/src/util.mak +91 -0
- data/ext/util/src/vecops.c +317 -0
- data/ext/util/src/vecops.h +42 -0
- data/lib/apriori/adapter.rb +13 -0
- data/lib/apriori/association_rule.rb +85 -0
- data/lib/apriori/version.rb +9 -0
- data/lib/apriori.rb +133 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +82 -0
- data/setup.rb +1585 -0
- data/tasks/apriori.rake +20 -0
- data/tasks/attic.rake +28 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/install.rake +6 -0
- data/tasks/website.rake +17 -0
- data/test/apriori_test.rb +13 -0
- data/test/fixtures/market_basket_results_test.txt +5 -0
- data/test/fixtures/market_basket_string_test.txt +7 -0
- data/test/fixtures/results.txt +2 -0
- data/test/fixtures/sample.txt +7 -0
- data/test/test_helper.rb +5 -0
- data/test/unit/test_apriori.rb +68 -0
- data/test/unit/test_itemsets_and_parsing.rb +82 -0
- data/website/index.html +233 -0
- data/website/index.txt +142 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +142 -0
- data/website/template.html.erb +49 -0
- metadata +188 -0
@@ -0,0 +1,317 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : vecops.c
|
3
|
+
Contents: some special vector operations
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 1996.09.16 file created
|
6
|
+
1999.02.04 long int changed to int
|
7
|
+
2001.06.03 function v_shuffle added
|
8
|
+
2002.01.02 functions v_intsort, v_fltsort, v_dblsort added
|
9
|
+
2002.03.03 functions v_reverse, v_intrev etc. added
|
10
|
+
2003.08.21 function v_heapsort added
|
11
|
+
2007.01.16 shuffle functions for basic data types added
|
12
|
+
2007.12.02 bug in reverse functions fixed
|
13
|
+
----------------------------------------------------------------------*/
|
14
|
+
#include <assert.h>
|
15
|
+
#include "vecops.h"
|
16
|
+
|
17
|
+
/*----------------------------------------------------------------------
|
18
|
+
Preprocessor Definitions
|
19
|
+
----------------------------------------------------------------------*/
|
20
|
+
#define TH_INSERT 16 /* threshold for insertion sort */
|
21
|
+
#define BUFSIZE 4096 /* size of buffers for shifting */
|
22
|
+
|
23
|
+
/*----------------------------------------------------------------------
|
24
|
+
Functions
|
25
|
+
----------------------------------------------------------------------*/
|
26
|
+
|
27
|
+
static void _rec (void **vec, int n, VCMPFN cmpfn, void *data)
|
28
|
+
{ /* --- recursive part of sort */
|
29
|
+
void **l, **r; /* pointers to exchange positions */
|
30
|
+
void *x, *t; /* pivot element and exchange buffer */
|
31
|
+
int m; /* number of elements in 2nd section */
|
32
|
+
|
33
|
+
do { /* sections sort loop */
|
34
|
+
l = vec; r = l +n -1; /* start at left and right boundary */
|
35
|
+
if (cmpfn(*l, *r, data) > 0) { /* bring the first and last */
|
36
|
+
t = *l; *l = *r; *r = t; } /* element into proper order */
|
37
|
+
x = vec[n >> 1]; /* get the middle element as pivot */
|
38
|
+
if (cmpfn(x, *l, data) < 0) x = *l; /* try to find a */
|
39
|
+
else if (cmpfn(x, *r, data) > 0) x = *r; /* better pivot */
|
40
|
+
while (1) { /* split and exchange loop */
|
41
|
+
while (cmpfn(*++l, x, data) < 0) /* skip left elements that */
|
42
|
+
; /* are smaller than the pivot element */
|
43
|
+
while (cmpfn(*--r, x, data) > 0) /* skip right elements that */
|
44
|
+
; /* are greater than the pivot element */
|
45
|
+
if (l >= r) { /* if less than two elements left, */
|
46
|
+
if (l <= r) { l++; r--; } break; } /* abort the loop */
|
47
|
+
t = *l; *l = *r; *r = t; /* otherwise exchange elements */
|
48
|
+
}
|
49
|
+
m = (int)(vec +n -l); /* compute the number of elements */
|
50
|
+
n = (int)(r -vec +1); /* right and left of the split */
|
51
|
+
if (n > m) { /* if right section is smaller, */
|
52
|
+
if (m >= TH_INSERT) /* but larger than the threshold, */
|
53
|
+
_rec(l, m, cmpfn, data); } /* sort it by a recursive call, */
|
54
|
+
else { /* if the left section is smaller, */
|
55
|
+
if (n >= TH_INSERT) /* but larger than the threshold, */
|
56
|
+
_rec(vec, n, cmpfn, data); /* sort it by a recursive call, */
|
57
|
+
vec = l; n = m; /* then switch to the right section */
|
58
|
+
} /* keeping its size m in variable n */
|
59
|
+
} while (n >= TH_INSERT); /* while greater than threshold */
|
60
|
+
} /* _rec() */
|
61
|
+
|
62
|
+
/*--------------------------------------------------------------------*/
|
63
|
+
|
64
|
+
void v_sort (void *vec, int n, VCMPFN cmpfn, void *data)
|
65
|
+
{ /* --- quick sort for pointer vectors */
|
66
|
+
int k; /* size of first section */
|
67
|
+
void **l, **r; /* to traverse the vector */
|
68
|
+
void *t; /* exchange buffer */
|
69
|
+
|
70
|
+
assert(vec && (n >= 0) && cmpfn); /* check the function arguments */
|
71
|
+
if (n <= 1) return; /* do not sort less than two elements */
|
72
|
+
if (n < TH_INSERT) /* if fewer elements than threshold */
|
73
|
+
k = n; /* for insertion sort, note the */
|
74
|
+
else { /* number of elements, otherwise */
|
75
|
+
_rec(vec, n, cmpfn, data); /* call the recursive function */
|
76
|
+
k = TH_INSERT -1; /* and get the number of elements */
|
77
|
+
} /* in the first vector section */
|
78
|
+
for (l = r = vec; --k > 0; ) /* find the smallest element within */
|
79
|
+
if (cmpfn(*++r, *l, data) < 0) l = r; /* the first k elements */
|
80
|
+
r = vec; /* swap the smallest element */
|
81
|
+
t = *l; *l = *r; *r = t; /* to front as a sentinel */
|
82
|
+
while (--n > 0) { /* insertion sort loop */
|
83
|
+
t = *++r; /* note the element to insert */
|
84
|
+
for (l = r; cmpfn(*--l, t, data) > 0; ) /* shift right elements */
|
85
|
+
l[1] = *l; /* that are greater than the one to */
|
86
|
+
l[1] = t; /* insert and store the element to */
|
87
|
+
} /* insert in the place thus found */
|
88
|
+
} /* v_sort() */
|
89
|
+
|
90
|
+
/*--------------------------------------------------------------------*/
|
91
|
+
|
92
|
+
static void _sift (void **vec, int l, int r, VCMPFN cmpfn, void *data)
|
93
|
+
{ /* --- let element sift down in heap */
|
94
|
+
void *t; /* buffer for element */
|
95
|
+
int i; /* index of first successor in heap */
|
96
|
+
|
97
|
+
t = vec[l]; /* note sift element */
|
98
|
+
i = l +l +1; /* compute index of first successor */
|
99
|
+
do { /* sift loop */
|
100
|
+
if ((i < r) /* if second successor is greater */
|
101
|
+
&& (cmpfn(vec[i], vec[i+1], data) < 0))
|
102
|
+
i++; /* go to the second successor */
|
103
|
+
if (cmpfn(t, vec[i], data) >= 0) /* if the successor is greater */
|
104
|
+
break; /* than the sift element, */
|
105
|
+
vec[l] = vec[i]; /* let the successor ascend in heap */
|
106
|
+
l = i; i += i +1; /* compute index of first successor */
|
107
|
+
} while (i <= r); /* while still within heap */
|
108
|
+
vec[l] = t; /* store the sift element */
|
109
|
+
} /* _sift() */
|
110
|
+
|
111
|
+
/*--------------------------------------------------------------------*/
|
112
|
+
|
113
|
+
void v_heapsort (void *vec, int n, VCMPFN cmpfn, void *data)
|
114
|
+
{ /* --- heap sort for pointer vectors */
|
115
|
+
int l, r; /* boundaries of heap section */
|
116
|
+
void *t, **v; /* exchange buffer, vector */
|
117
|
+
|
118
|
+
if (n <= 1) return; /* do not sort less than two elements */
|
119
|
+
l = n >> 1; /* at start, only the second half */
|
120
|
+
r = n -1; /* of the vector has heap structure */
|
121
|
+
while (--l >= 0) /* while the heap is not complete, */
|
122
|
+
_sift(vec, l, r, cmpfn, data); /* extend it by one element */
|
123
|
+
v = vec; /* type the vector pointer */
|
124
|
+
while (1) { /* heap reduction loop */
|
125
|
+
t = v[0]; v[0] = v[r]; /* swap the greatest element */
|
126
|
+
v[r] = t; /* to the end of the vector */
|
127
|
+
if (--r <= 0) break; /* if the heap is empty, abort */
|
128
|
+
_sift(v, 0, r, cmpfn, data);
|
129
|
+
} /* let the element that has been */
|
130
|
+
} /* v_heapsort() */ /* swapped to front sift down */
|
131
|
+
|
132
|
+
/*--------------------------------------------------------------------*/
|
133
|
+
|
134
|
+
void v_move (void *vec, int off, int n, int pos, int esz)
|
135
|
+
{ /* --- move a vector section */
|
136
|
+
int i; /* loop variable */
|
137
|
+
int mid, end; /* middle and end index */
|
138
|
+
int *src, *dst; /* to traverse vector */
|
139
|
+
int buf[BUFSIZE]; /* buffer for vector elements */
|
140
|
+
|
141
|
+
assert(vec /* check the function arguments */
|
142
|
+
&& (off >= 0) && (n >= 0) && (pos >= 0) && (esz >= 0));
|
143
|
+
esz /= (int)sizeof(int); /* adapt size, offsets, and counter */
|
144
|
+
pos *= esz; off *= esz; n *= esz;
|
145
|
+
end = off +n; /* normalize vector indices */
|
146
|
+
if (pos <= off) { mid = off; off = pos; }
|
147
|
+
else { mid = end; end = pos; }
|
148
|
+
if (mid -off < end -mid) { /* if first section is smaller */
|
149
|
+
while (mid > off) { /* while there are elements to shift */
|
150
|
+
n = (mid -off < BUFSIZE) ? mid -off : BUFSIZE;
|
151
|
+
src = (int*)vec +mid -n; /* get number of elements and */
|
152
|
+
dst = buf; /* copy source to the buffer */
|
153
|
+
for (i = n; --i >= 0; ) *dst++ = *src++;
|
154
|
+
dst = (int*)vec +mid -n; /* shift down/left second section */
|
155
|
+
for (i = end -mid; --i >= 0; ) *dst++ = *src++;
|
156
|
+
src = buf; /* copy buffer to destination */
|
157
|
+
for (i = n; --i >= 0; ) *dst++ = *src++;
|
158
|
+
mid -= n; end -= n; /* second section has been shifted */
|
159
|
+
} } /* down/left cnt elements */
|
160
|
+
else { /* if second section is smaller */
|
161
|
+
while (end > mid) { /* while there are elements to shift */
|
162
|
+
n = (end -mid < BUFSIZE) ? end -mid : BUFSIZE;
|
163
|
+
src = (int*)vec +mid +n; /* get number of elements and */
|
164
|
+
dst = buf +n; /* copy source to the buffer */
|
165
|
+
for (i = n; --i >= 0; ) *--dst = *--src;
|
166
|
+
dst = (int*)vec +mid +n; /* shift up/right first section */
|
167
|
+
for (i = mid -off; --i >= 0; ) *--dst = *--src;
|
168
|
+
src = buf +n; /* copy buffer to destination */
|
169
|
+
for (i = n; --i >= 0; ) *--dst = *--src;
|
170
|
+
mid += n; off += n; /* first section has been shifted */
|
171
|
+
} /* up/right cnt elements */
|
172
|
+
}
|
173
|
+
} /* v_move() */
|
174
|
+
|
175
|
+
/*--------------------------------------------------------------------*/
|
176
|
+
|
177
|
+
void v_shuffle (void *vec, int n, double randfn (void))
|
178
|
+
{ /* --- shuffle vector entries */
|
179
|
+
int i; /* vector index */
|
180
|
+
void **v = vec, *t; /* vector and exchange buffer */
|
181
|
+
|
182
|
+
while (--n > 0) { /* shuffle loop (n random selections) */
|
183
|
+
i = (int)((n+1) *randfn()); /* compute a random index */
|
184
|
+
if (i > n) i = n; /* in the remaining section and */
|
185
|
+
if (i < 0) i = 0; /* exchange the vector elements */
|
186
|
+
t = v[i]; v[i] = v[n]; v[n] = t;
|
187
|
+
}
|
188
|
+
} /* v_shuffle() */
|
189
|
+
|
190
|
+
/*--------------------------------------------------------------------*/
|
191
|
+
|
192
|
+
void v_reverse (void *vec, int n)
|
193
|
+
{ /* --- reverse a pointer vector */
|
194
|
+
void **v, *t; /* vector and exchange buffer */
|
195
|
+
|
196
|
+
for (v = vec; --n > 0; ) { /* reverse the order of the elements */
|
197
|
+
t = v[n]; v[n--] = v[0]; *v++ = t; }
|
198
|
+
} /* v_reverse() */
|
199
|
+
|
200
|
+
/*--------------------------------------------------------------------*/
|
201
|
+
|
202
|
+
#define REC(type,rec) \
|
203
|
+
static void rec (type *vec, int n) \
|
204
|
+
{ /* --- recursive part of sort */ \
|
205
|
+
type *l, *r; /* pointers to exchange positions */ \
|
206
|
+
type x, t; /* pivot element and exchange buffer */\
|
207
|
+
int m; /* number of elements in sections */ \
|
208
|
+
\
|
209
|
+
do { /* sections sort loop */ \
|
210
|
+
l = vec; r = l +n -1; /* start at left and right boundary */ \
|
211
|
+
if (*l > *r) { t = *l; *l = *r; *r = t; } \
|
212
|
+
x = vec[n >> 1]; /* get the middle element as pivot */ \
|
213
|
+
if (x < *l) x = *l; /* compute median of three */ \
|
214
|
+
else if (x > *r) x = *r; /* to find a better pivot */ \
|
215
|
+
while (1) { /* split and exchange loop */ \
|
216
|
+
while (*++l < x) /* skip left elements that are */ \
|
217
|
+
; /* smaller than the pivot element */ \
|
218
|
+
while (*--r > x) /* skip right elements that are */ \
|
219
|
+
; /* greater than the pivot element */ \
|
220
|
+
if (l >= r) { /* if less than two elements left, */ \
|
221
|
+
if (l <= r) { l++; r--; } break; } /* abort the loop */ \
|
222
|
+
t = *l; *l = *r; *r = t; /* otherwise exchange elements */ \
|
223
|
+
} \
|
224
|
+
m = (int)(vec +n -l); /* compute the number of elements */ \
|
225
|
+
n = (int)(r -vec +1); /* right and left of the split */ \
|
226
|
+
if (n > m) { /* if right section is smaller, */ \
|
227
|
+
if (m >= TH_INSERT) /* but larger than the threshold, */ \
|
228
|
+
rec(l, m); } /* sort it by an recursive call */ \
|
229
|
+
else { /* if the left section is smaller, */ \
|
230
|
+
if (n >= TH_INSERT) /* but larger than the threshold, */ \
|
231
|
+
rec(vec, n); /* sort it by an recursive call, */ \
|
232
|
+
vec = l; n = m; /* then switch to the right section */ \
|
233
|
+
} /* keeping its size m in variable n */ \
|
234
|
+
} while (n >= TH_INSERT); /* while greater than threshold */ \
|
235
|
+
} /* rec() */
|
236
|
+
|
237
|
+
/*--------------------------------------------------------------------*/
|
238
|
+
|
239
|
+
#define SORT(type,rec,sort) \
|
240
|
+
void sort (type *vec, int n) \
|
241
|
+
{ /* --- sort a number vector */ \
|
242
|
+
int k; /* size of first section */ \
|
243
|
+
type *l, *r; /* to traverse the vector */ \
|
244
|
+
type t; /* exchange buffer */ \
|
245
|
+
\
|
246
|
+
assert(vec && (n >= 0)); /* check the function arguments */ \
|
247
|
+
if (n <= 1) return; /* do not sort less than two elems. */ \
|
248
|
+
if (n < TH_INSERT) /* if less elements than threshold */ \
|
249
|
+
k = n; /* for insertion sort, note the */ \
|
250
|
+
else { /* number of elements, otherwise */ \
|
251
|
+
rec(vec, n); /* call the recursive sort function */ \
|
252
|
+
k = TH_INSERT -1; /* and get the number of elements */ \
|
253
|
+
} /* in the first vector section */ \
|
254
|
+
for (l = r = vec; --k > 0; ) /* find position of smallest element */\
|
255
|
+
if (*++r < *l) l = r; /* within the first k elements */ \
|
256
|
+
r = vec; /* swap the smallest element */ \
|
257
|
+
t = *l; *l = *r; *r = t; /* to front as a sentinel */ \
|
258
|
+
while (--n > 0) { /* standard insertion sort */ \
|
259
|
+
t = *++r; /* note the number to insert */ \
|
260
|
+
for (l = r; *--l > t; k--) /* shift right all numbers that are */ \
|
261
|
+
l[1] = *l; /* greater than the one to insert */ \
|
262
|
+
l[1] = t; /* and store the number to insert */ \
|
263
|
+
} /* in the place thus found */ \
|
264
|
+
} /* sort() */
|
265
|
+
|
266
|
+
/*--------------------------------------------------------------------*/
|
267
|
+
|
268
|
+
REC (int, _intrec)
|
269
|
+
SORT(int, _intrec, v_intsort)
|
270
|
+
|
271
|
+
/*--------------------------------------------------------------------*/
|
272
|
+
|
273
|
+
REC (float, _fltrec)
|
274
|
+
SORT(float, _fltrec, v_fltsort)
|
275
|
+
|
276
|
+
/*--------------------------------------------------------------------*/
|
277
|
+
|
278
|
+
REC (double, _dblrec)
|
279
|
+
SORT(double, _dblrec, v_dblsort)
|
280
|
+
|
281
|
+
/*--------------------------------------------------------------------*/
|
282
|
+
|
283
|
+
#define REVERSE(type,reverse) \
|
284
|
+
void reverse (type *vec, int n) \
|
285
|
+
{ /* --- reverse a number vector */ \
|
286
|
+
type t; /* exchange buffer */ \
|
287
|
+
while (--n > 0) { /* reverse the order of the elems. */ \
|
288
|
+
t = vec[n]; vec[n--] = vec[0]; *vec++ = t; } \
|
289
|
+
} /* reverse() */
|
290
|
+
|
291
|
+
/*--------------------------------------------------------------------*/
|
292
|
+
|
293
|
+
REVERSE(int, v_intrev)
|
294
|
+
REVERSE(float, v_fltrev)
|
295
|
+
REVERSE(double, v_dblrev)
|
296
|
+
|
297
|
+
/*--------------------------------------------------------------------*/
|
298
|
+
|
299
|
+
#define SHUFFLE(type,shuffle) \
|
300
|
+
void shuffle (type *vec, int n, double randfn (void)) \
|
301
|
+
{ /* --- shuffle vector entries */ \
|
302
|
+
int i; /* vector index */ \
|
303
|
+
type t; /* exchange buffer */ \
|
304
|
+
\
|
305
|
+
while (--n > 0) { /* shuffle loop (n selections) */ \
|
306
|
+
i = (int)((n+1) *randfn()); /* compute a random index */ \
|
307
|
+
if (i > n) i = n; /* in the remaining section and */ \
|
308
|
+
if (i < 0) i = 0; /* exchange the vector elements */ \
|
309
|
+
t = vec[i]; vec[i] = vec[n]; vec[n] = t; \
|
310
|
+
} \
|
311
|
+
} /* shuffle() */
|
312
|
+
|
313
|
+
/*--------------------------------------------------------------------*/
|
314
|
+
|
315
|
+
SHUFFLE(int, v_intshfl)
|
316
|
+
SHUFFLE(float, v_fltshfl)
|
317
|
+
SHUFFLE(double, v_dblshfl)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : vecops.h
|
3
|
+
Contents: some special vector operations
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 1996.09.16 file created
|
6
|
+
1999.02.04 long int changed to int
|
7
|
+
2001.06.03 function v_shuffle added
|
8
|
+
2002.01.02 functions v_intsort, v_fltsort, v_dblsort added
|
9
|
+
2002.03.03 functions v_reverse, v_intrev etc. added
|
10
|
+
2003.08.21 function v_heapsort added
|
11
|
+
2007.01.16 shuffle functions for basic data types added
|
12
|
+
----------------------------------------------------------------------*/
|
13
|
+
#ifndef __VECOPS__
|
14
|
+
#define __VECOPS__
|
15
|
+
|
16
|
+
/*----------------------------------------------------------------------
|
17
|
+
Type Definitions
|
18
|
+
----------------------------------------------------------------------*/
|
19
|
+
typedef int VCMPFN (const void *p1, const void *p2, void *data);
|
20
|
+
|
21
|
+
/*----------------------------------------------------------------------
|
22
|
+
Functions
|
23
|
+
----------------------------------------------------------------------*/
|
24
|
+
extern void v_sort (void *vec, int n, VCMPFN cmpfn, void *data);
|
25
|
+
extern void v_heapsort (void *vec, int n, VCMPFN cmpfn, void *data);
|
26
|
+
extern void v_move (void *vec, int off, int n, int pos, int esz);
|
27
|
+
extern void v_shuffle (void *vec, int n, double randfn (void));
|
28
|
+
extern void v_reverse (void *vec, int n);
|
29
|
+
|
30
|
+
extern void v_intsort (int *vec, int n);
|
31
|
+
extern void v_intrev (int *vec, int n);
|
32
|
+
extern void v_intshfl (int *vec, int n, double randfn (void));
|
33
|
+
|
34
|
+
extern void v_fltsort (float *vec, int n);
|
35
|
+
extern void v_fltrev (float *vec, int n);
|
36
|
+
extern void v_fltshfl (float *vec, int n, double randfn (void));
|
37
|
+
|
38
|
+
extern void v_dblsort (double *vec, int n);
|
39
|
+
extern void v_dblrev (double *vec, int n);
|
40
|
+
extern void v_dblshfl (double *vec, int n, double randfn (void));
|
41
|
+
|
42
|
+
#endif
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Apriori
|
2
|
+
# This module provides the basic adaptations to the c extension.
|
3
|
+
class Adapter
|
4
|
+
include Apriori
|
5
|
+
|
6
|
+
# call the actual apriori extension
|
7
|
+
def call_apriori_with_arguments(args, opts={}) #:nodoc:
|
8
|
+
args.unshift("apriori")
|
9
|
+
do_apriori(args)
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module Apriori
|
2
|
+
|
3
|
+
# This class represents a single association rule.
|
4
|
+
#
|
5
|
+
# From Christian's original documentation:
|
6
|
+
#
|
7
|
+
# An association rule is a rule like "If a customer buys wine and bread, he often
|
8
|
+
# buys cheese, too."
|
9
|
+
#
|
10
|
+
# An association rule states that if we pick a customer at random and find out
|
11
|
+
# that he selected certain items (bought certain products, chose certain options
|
12
|
+
# etc.), we can be confident, quantified by a percentage, that he also selected
|
13
|
+
# certain other items (bought certain other products, chose certain other options
|
14
|
+
# etc.).
|
15
|
+
#
|
16
|
+
class AssociationRule
|
17
|
+
attr_accessor :antecedent
|
18
|
+
attr_accessor :num_antecedent_transactions
|
19
|
+
attr_accessor :support
|
20
|
+
|
21
|
+
attr_accessor :consequent
|
22
|
+
attr_accessor :confidence
|
23
|
+
|
24
|
+
class << self
|
25
|
+
# Given +filename+ of a file containing itemset information returns an
|
26
|
+
# Array of <tt>Itemset</tt>s. File format must match that of #parse_line.
|
27
|
+
def from_file(filename)
|
28
|
+
rules = []
|
29
|
+
begin
|
30
|
+
contents = File.read(filename)
|
31
|
+
contents.each_line do |line|
|
32
|
+
rules << parse_line(line)
|
33
|
+
end
|
34
|
+
rescue => e
|
35
|
+
puts "Error reading: #{filename}"
|
36
|
+
puts e
|
37
|
+
end
|
38
|
+
rules
|
39
|
+
end
|
40
|
+
|
41
|
+
# Given +line+ returns an Itemset
|
42
|
+
# Example of a line:
|
43
|
+
# foo <- bar baz bangle (66.7/4, 75.0)
|
44
|
+
def parse_line(line)
|
45
|
+
is = new
|
46
|
+
line =~ /(.+)\s+<-\s+(.+?)\s+\((\d+\.\d)(?:\/(\d+))?,\s+(\d+\.\d)\)/
|
47
|
+
consequent, antecedent, support, transactions, confidence = $1, $2, $3, $4, $5
|
48
|
+
is.consequent = consequent
|
49
|
+
is.antecedent = antecedent.split(/\s+/)
|
50
|
+
is.support = support.to_f
|
51
|
+
is.num_antecedent_transactions = transactions ? transactions.to_i : nil
|
52
|
+
is.confidence = confidence.to_f
|
53
|
+
is
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Returns the standard form of this rule as a string. For instance:
|
58
|
+
# foo <- bar baz bangle (66.7/4, 75.0)
|
59
|
+
def to_s
|
60
|
+
"%s <- %s (%0.01f%s, %0.01f)" % [ consequent,
|
61
|
+
antecedent.join(" "),
|
62
|
+
support,
|
63
|
+
num_antecedent_transactions ? "/#{num_antecedent_transactions}" : "", confidence ]
|
64
|
+
end
|
65
|
+
|
66
|
+
def eql?(object) #:nodoc:
|
67
|
+
self == (object)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Check equality between to <tt>AssociationRule</tt>s
|
71
|
+
def ==(object)
|
72
|
+
return true if object.equal?(self)
|
73
|
+
if object.instance_of?(self.class)
|
74
|
+
%w{antecedent num_antecedent_transactions
|
75
|
+
support consequent confidence}.each do |key|
|
76
|
+
return false unless object.send(key) == self.send(key)
|
77
|
+
end
|
78
|
+
return true
|
79
|
+
else
|
80
|
+
return false
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
data/lib/apriori.rb
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2008 Nate Murray
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
|
23
|
+
$: << File.expand_path(File.join(File.dirname(__FILE__), "../ext"))
|
24
|
+
require 'apriori_ext'
|
25
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
26
|
+
|
27
|
+
require 'tempfile'
|
28
|
+
require 'apriori/adapter'
|
29
|
+
require 'apriori/association_rule'
|
30
|
+
|
31
|
+
module Apriori
|
32
|
+
# Find association rules. Given +input+ and +opts+ returns an Array of AssociationRules.
|
33
|
+
# See README.txt if you are unsure why you would want to do this.
|
34
|
+
#
|
35
|
+
# +input+ can be an Array of Array's of String objects or a String specifing
|
36
|
+
# a path a transactions file.
|
37
|
+
#
|
38
|
+
# The options are:
|
39
|
+
#
|
40
|
+
# * <tt>:min_items</tt>: minimal number of items per rule (default: 1)
|
41
|
+
# * <tt>:max_items</tt>: maximal number of items per rule (default: no limit)
|
42
|
+
# * <tt>:min_support</tt>: minimal support of a rule (default: 10 (percent %))
|
43
|
+
# * <tt>:max_support</tt>: maximal support of a rule (default: 100 (percent %))
|
44
|
+
# * <tt>:min_confidence</tt>: minimal confidence of a rule (default: 80 (percent %))
|
45
|
+
# * <tt>:output_file</tt>: write the rules to this file instead of returning
|
46
|
+
# AssociationRule objects. If this option is specified the path to this file is returned
|
47
|
+
#
|
48
|
+
# Examples:
|
49
|
+
#
|
50
|
+
# This first example passes in an Array of Arrays of Strings. The idea is
|
51
|
+
# that each individual Array of Strings is a transaction and the containing
|
52
|
+
# Array is the set of all transactions.
|
53
|
+
#
|
54
|
+
# In this example, we call #find_association_rules with the default options.
|
55
|
+
#
|
56
|
+
# transactions = [ %w{beer doritos},
|
57
|
+
# %w{apple cheese},
|
58
|
+
# %w{apple cheese},
|
59
|
+
# %w{apple doritos} ]
|
60
|
+
#
|
61
|
+
# rules = Apriori.find_association_rules(transactions)
|
62
|
+
#
|
63
|
+
# In this example we read the transactions from a file. The format of the file
|
64
|
+
# is one transaction per line, space separated items. For instance:
|
65
|
+
#
|
66
|
+
# # save to /path/to/some/file.txt
|
67
|
+
# beer doritos
|
68
|
+
# apple cheese
|
69
|
+
# apple cheese
|
70
|
+
# apple doritos
|
71
|
+
#
|
72
|
+
# Here is how to call it, using many options:
|
73
|
+
#
|
74
|
+
# rules = Apriori.find_association_rules("/path/to/some/file.txt",
|
75
|
+
# :min_items => 2,
|
76
|
+
# :max_items => 2,
|
77
|
+
# :min_support => 0.01,
|
78
|
+
# :max_support => 100,
|
79
|
+
# :min_confidence => 20)
|
80
|
+
#
|
81
|
+
def self.find_association_rules(input, opts={})
|
82
|
+
args = []
|
83
|
+
|
84
|
+
# create the input file
|
85
|
+
if input.kind_of?(String)
|
86
|
+
args << input
|
87
|
+
elsif input.kind_of?(Array)
|
88
|
+
tempfile = create_temporary_file_from_transactions(input)
|
89
|
+
args << tempfile.path
|
90
|
+
else
|
91
|
+
raise "unknown input"
|
92
|
+
end
|
93
|
+
|
94
|
+
# create an output file somewhere
|
95
|
+
output_file = nil
|
96
|
+
if opts[:output_file]
|
97
|
+
output_file = opts[:output_file]
|
98
|
+
else
|
99
|
+
tempfile = Tempfile.new("transactions_results_#{$!}_#{rand.to_s}")
|
100
|
+
tempfile.close # starts open
|
101
|
+
output_file = tempfile.path
|
102
|
+
end
|
103
|
+
args << output_file
|
104
|
+
|
105
|
+
args << "-m#{opts[:min_items]}" if opts[:min_items]
|
106
|
+
args << "-n#{opts[:max_items]}" if opts[:max_items]
|
107
|
+
args << "-s#{opts[:min_support]}" if opts[:min_support]
|
108
|
+
args << "-S#{opts[:max_support]}" if opts[:max_support]
|
109
|
+
args << "-c#{opts[:min_confidence]}" if opts[:min_confidence]
|
110
|
+
|
111
|
+
args << "-a"
|
112
|
+
|
113
|
+
adapter = Adapter.new
|
114
|
+
adapter.call_apriori_with_arguments(args)
|
115
|
+
|
116
|
+
if opts[:output_file]
|
117
|
+
return output_file
|
118
|
+
else
|
119
|
+
return AssociationRule.from_file(output_file)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
def self.create_temporary_file_from_transactions(transactions)
|
125
|
+
tempfile = Tempfile.open("transactions_#{$!}_#{rand.to_s}")
|
126
|
+
transactions.each do |transaction|
|
127
|
+
tempfile.puts transaction.join(" ")
|
128
|
+
end
|
129
|
+
tempfile.close
|
130
|
+
tempfile
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
data/script/console
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# File: script/console
|
3
|
+
irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
|
4
|
+
|
5
|
+
libs = " -r irb/completion"
|
6
|
+
# Perhaps use a console_lib to store any extra methods I may want available in the cosole
|
7
|
+
# libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
|
8
|
+
libs << " -r #{File.dirname(__FILE__) + '/../lib/apriori.rb'}"
|
9
|
+
puts "Loading apriori gem"
|
10
|
+
exec "#{irb} #{libs} --simple-prompt"
|
data/script/destroy
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/destroy'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/generate'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Generate.new.run(ARGV)
|