jashmenn-apriori 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (122) hide show
  1. data/History.txt +4 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +121 -0
  4. data/README.txt +139 -0
  5. data/Rakefile +4 -0
  6. data/TODO.txt +60 -0
  7. data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
  8. data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
  9. data/attic/c_ext_test1/mytest.rb +10 -0
  10. data/attic/test.c +12 -0
  11. data/config/hoe.rb +77 -0
  12. data/config/requirements.rb +15 -0
  13. data/examples/01_simple_example.rb +23 -0
  14. data/examples/02_small_file_example.rb +17 -0
  15. data/examples/03_large_file_example.rb +22 -0
  16. data/examples/test_data/market_basket_basic_test.dat +9 -0
  17. data/ext/Apriori.c +149 -0
  18. data/ext/Makefile +149 -0
  19. data/ext/apriori/doc/apriori.html +1301 -0
  20. data/ext/apriori/doc/arem.gp +68 -0
  21. data/ext/apriori/doc/c_rev.gp +89 -0
  22. data/ext/apriori/doc/chi2.tex +156 -0
  23. data/ext/apriori/doc/copying +504 -0
  24. data/ext/apriori/doc/line.gif +0 -0
  25. data/ext/apriori/doc/uparrow.gif +0 -0
  26. data/ext/apriori/ex/flg2set +15 -0
  27. data/ext/apriori/ex/hdr2set +13 -0
  28. data/ext/apriori/ex/readme +71 -0
  29. data/ext/apriori/ex/row2set +7 -0
  30. data/ext/apriori/ex/rulesort +24 -0
  31. data/ext/apriori/ex/tab2set +9 -0
  32. data/ext/apriori/ex/test.app +2 -0
  33. data/ext/apriori/ex/test.rul +9 -0
  34. data/ext/apriori/ex/test1.rul +43 -0
  35. data/ext/apriori/ex/test1.tab +10 -0
  36. data/ext/apriori/ex/test2.tab +10 -0
  37. data/ext/apriori/ex/test3.tab +30 -0
  38. data/ext/apriori/ex/test4.tab +11 -0
  39. data/ext/apriori/ex/test5.tab +39 -0
  40. data/ext/apriori/ex/tid2set +23 -0
  41. data/ext/apriori/ex/xhdr2set +33 -0
  42. data/ext/apriori/src/apriori.c +750 -0
  43. data/ext/apriori/src/apriori.dsp +120 -0
  44. data/ext/apriori/src/apriori.dsw +29 -0
  45. data/ext/apriori/src/apriori.mak +99 -0
  46. data/ext/apriori/src/istree.c +1411 -0
  47. data/ext/apriori/src/istree.h +160 -0
  48. data/ext/apriori/src/makefile +105 -0
  49. data/ext/apriori/src/tract.c +870 -0
  50. data/ext/apriori/src/tract.h +261 -0
  51. data/ext/apriori_wrapper.c +757 -0
  52. data/ext/apriori_wrapper.h +10 -0
  53. data/ext/extconf.rb +32 -0
  54. data/ext/math/doc/copying +504 -0
  55. data/ext/math/src/chi2.c +151 -0
  56. data/ext/math/src/chi2.h +27 -0
  57. data/ext/math/src/choose.c +71 -0
  58. data/ext/math/src/choose.h +16 -0
  59. data/ext/math/src/gamma.c +446 -0
  60. data/ext/math/src/gamma.h +39 -0
  61. data/ext/math/src/intexp.c +35 -0
  62. data/ext/math/src/intexp.h +15 -0
  63. data/ext/math/src/makefile +164 -0
  64. data/ext/math/src/math.mak +48 -0
  65. data/ext/math/src/normal.c +387 -0
  66. data/ext/math/src/normal.h +44 -0
  67. data/ext/math/src/radfn.c +113 -0
  68. data/ext/math/src/radfn.h +34 -0
  69. data/ext/math/src/zeta.c +49 -0
  70. data/ext/math/src/zeta.h +15 -0
  71. data/ext/pre-clean.rb +8 -0
  72. data/ext/pre-setup.rb +9 -0
  73. data/ext/util/doc/copying +504 -0
  74. data/ext/util/src/listops.c +76 -0
  75. data/ext/util/src/listops.h +26 -0
  76. data/ext/util/src/makefile +103 -0
  77. data/ext/util/src/memsys.c +84 -0
  78. data/ext/util/src/memsys.h +42 -0
  79. data/ext/util/src/nstats.c +288 -0
  80. data/ext/util/src/nstats.h +69 -0
  81. data/ext/util/src/params.c +86 -0
  82. data/ext/util/src/params.h +19 -0
  83. data/ext/util/src/parse.c +133 -0
  84. data/ext/util/src/parse.h +81 -0
  85. data/ext/util/src/scan.c +767 -0
  86. data/ext/util/src/scan.h +111 -0
  87. data/ext/util/src/symtab.c +443 -0
  88. data/ext/util/src/symtab.h +121 -0
  89. data/ext/util/src/tabscan.c +279 -0
  90. data/ext/util/src/tabscan.h +99 -0
  91. data/ext/util/src/util.mak +91 -0
  92. data/ext/util/src/vecops.c +317 -0
  93. data/ext/util/src/vecops.h +42 -0
  94. data/lib/apriori/adapter.rb +13 -0
  95. data/lib/apriori/association_rule.rb +85 -0
  96. data/lib/apriori/version.rb +9 -0
  97. data/lib/apriori.rb +133 -0
  98. data/script/console +10 -0
  99. data/script/destroy +14 -0
  100. data/script/generate +14 -0
  101. data/script/txt2html +82 -0
  102. data/setup.rb +1585 -0
  103. data/tasks/apriori.rake +20 -0
  104. data/tasks/attic.rake +28 -0
  105. data/tasks/deployment.rake +34 -0
  106. data/tasks/environment.rake +7 -0
  107. data/tasks/install.rake +6 -0
  108. data/tasks/website.rake +17 -0
  109. data/test/apriori_test.rb +13 -0
  110. data/test/fixtures/market_basket_results_test.txt +5 -0
  111. data/test/fixtures/market_basket_string_test.txt +7 -0
  112. data/test/fixtures/results.txt +2 -0
  113. data/test/fixtures/sample.txt +7 -0
  114. data/test/test_helper.rb +5 -0
  115. data/test/unit/test_apriori.rb +68 -0
  116. data/test/unit/test_itemsets_and_parsing.rb +82 -0
  117. data/website/index.html +233 -0
  118. data/website/index.txt +142 -0
  119. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  120. data/website/stylesheets/screen.css +142 -0
  121. data/website/template.html.erb +49 -0
  122. metadata +188 -0
@@ -0,0 +1,317 @@
1
+ /*----------------------------------------------------------------------
2
+ File : vecops.c
3
+ Contents: some special vector operations
4
+ Author : Christian Borgelt
5
+ History : 1996.09.16 file created
6
+ 1999.02.04 long int changed to int
7
+ 2001.06.03 function v_shuffle added
8
+ 2002.01.02 functions v_intsort, v_fltsort, v_dblsort added
9
+ 2002.03.03 functions v_reverse, v_intrev etc. added
10
+ 2003.08.21 function v_heapsort added
11
+ 2007.01.16 shuffle functions for basic data types added
12
+ 2007.12.02 bug in reverse functions fixed
13
+ ----------------------------------------------------------------------*/
14
+ #include <assert.h>
15
+ #include "vecops.h"
16
+
17
+ /*----------------------------------------------------------------------
18
+ Preprocessor Definitions
19
+ ----------------------------------------------------------------------*/
20
+ #define TH_INSERT 16 /* threshold for insertion sort */
21
+ #define BUFSIZE 4096 /* size of buffers for shifting */
22
+
23
+ /*----------------------------------------------------------------------
24
+ Functions
25
+ ----------------------------------------------------------------------*/
26
+
27
+ static void _rec (void **vec, int n, VCMPFN cmpfn, void *data)
28
+ { /* --- recursive part of sort */
29
+ void **l, **r; /* pointers to exchange positions */
30
+ void *x, *t; /* pivot element and exchange buffer */
31
+ int m; /* number of elements in 2nd section */
32
+
33
+ do { /* sections sort loop */
34
+ l = vec; r = l +n -1; /* start at left and right boundary */
35
+ if (cmpfn(*l, *r, data) > 0) { /* bring the first and last */
36
+ t = *l; *l = *r; *r = t; } /* element into proper order */
37
+ x = vec[n >> 1]; /* get the middle element as pivot */
38
+ if (cmpfn(x, *l, data) < 0) x = *l; /* try to find a */
39
+ else if (cmpfn(x, *r, data) > 0) x = *r; /* better pivot */
40
+ while (1) { /* split and exchange loop */
41
+ while (cmpfn(*++l, x, data) < 0) /* skip left elements that */
42
+ ; /* are smaller than the pivot element */
43
+ while (cmpfn(*--r, x, data) > 0) /* skip right elements that */
44
+ ; /* are greater than the pivot element */
45
+ if (l >= r) { /* if less than two elements left, */
46
+ if (l <= r) { l++; r--; } break; } /* abort the loop */
47
+ t = *l; *l = *r; *r = t; /* otherwise exchange elements */
48
+ }
49
+ m = (int)(vec +n -l); /* compute the number of elements */
50
+ n = (int)(r -vec +1); /* right and left of the split */
51
+ if (n > m) { /* if right section is smaller, */
52
+ if (m >= TH_INSERT) /* but larger than the threshold, */
53
+ _rec(l, m, cmpfn, data); } /* sort it by a recursive call, */
54
+ else { /* if the left section is smaller, */
55
+ if (n >= TH_INSERT) /* but larger than the threshold, */
56
+ _rec(vec, n, cmpfn, data); /* sort it by a recursive call, */
57
+ vec = l; n = m; /* then switch to the right section */
58
+ } /* keeping its size m in variable n */
59
+ } while (n >= TH_INSERT); /* while greater than threshold */
60
+ } /* _rec() */
61
+
62
+ /*--------------------------------------------------------------------*/
63
+
64
+ void v_sort (void *vec, int n, VCMPFN cmpfn, void *data)
65
+ { /* --- quick sort for pointer vectors */
66
+ int k; /* size of first section */
67
+ void **l, **r; /* to traverse the vector */
68
+ void *t; /* exchange buffer */
69
+
70
+ assert(vec && (n >= 0) && cmpfn); /* check the function arguments */
71
+ if (n <= 1) return; /* do not sort less than two elements */
72
+ if (n < TH_INSERT) /* if fewer elements than threshold */
73
+ k = n; /* for insertion sort, note the */
74
+ else { /* number of elements, otherwise */
75
+ _rec(vec, n, cmpfn, data); /* call the recursive function */
76
+ k = TH_INSERT -1; /* and get the number of elements */
77
+ } /* in the first vector section */
78
+ for (l = r = vec; --k > 0; ) /* find the smallest element within */
79
+ if (cmpfn(*++r, *l, data) < 0) l = r; /* the first k elements */
80
+ r = vec; /* swap the smallest element */
81
+ t = *l; *l = *r; *r = t; /* to front as a sentinel */
82
+ while (--n > 0) { /* insertion sort loop */
83
+ t = *++r; /* note the element to insert */
84
+ for (l = r; cmpfn(*--l, t, data) > 0; ) /* shift right elements */
85
+ l[1] = *l; /* that are greater than the one to */
86
+ l[1] = t; /* insert and store the element to */
87
+ } /* insert in the place thus found */
88
+ } /* v_sort() */
89
+
90
+ /*--------------------------------------------------------------------*/
91
+
92
+ static void _sift (void **vec, int l, int r, VCMPFN cmpfn, void *data)
93
+ { /* --- let element sift down in heap */
94
+ void *t; /* buffer for element */
95
+ int i; /* index of first successor in heap */
96
+
97
+ t = vec[l]; /* note sift element */
98
+ i = l +l +1; /* compute index of first successor */
99
+ do { /* sift loop */
100
+ if ((i < r) /* if second successor is greater */
101
+ && (cmpfn(vec[i], vec[i+1], data) < 0))
102
+ i++; /* go to the second successor */
103
+ if (cmpfn(t, vec[i], data) >= 0) /* if the successor is greater */
104
+ break; /* than the sift element, */
105
+ vec[l] = vec[i]; /* let the successor ascend in heap */
106
+ l = i; i += i +1; /* compute index of first successor */
107
+ } while (i <= r); /* while still within heap */
108
+ vec[l] = t; /* store the sift element */
109
+ } /* _sift() */
110
+
111
+ /*--------------------------------------------------------------------*/
112
+
113
+ void v_heapsort (void *vec, int n, VCMPFN cmpfn, void *data)
114
+ { /* --- heap sort for pointer vectors */
115
+ int l, r; /* boundaries of heap section */
116
+ void *t, **v; /* exchange buffer, vector */
117
+
118
+ if (n <= 1) return; /* do not sort less than two elements */
119
+ l = n >> 1; /* at start, only the second half */
120
+ r = n -1; /* of the vector has heap structure */
121
+ while (--l >= 0) /* while the heap is not complete, */
122
+ _sift(vec, l, r, cmpfn, data); /* extend it by one element */
123
+ v = vec; /* type the vector pointer */
124
+ while (1) { /* heap reduction loop */
125
+ t = v[0]; v[0] = v[r]; /* swap the greatest element */
126
+ v[r] = t; /* to the end of the vector */
127
+ if (--r <= 0) break; /* if the heap is empty, abort */
128
+ _sift(v, 0, r, cmpfn, data);
129
+ } /* let the element that has been */
130
+ } /* v_heapsort() */ /* swapped to front sift down */
131
+
132
+ /*--------------------------------------------------------------------*/
133
+
134
+ void v_move (void *vec, int off, int n, int pos, int esz)
135
+ { /* --- move a vector section */
136
+ int i; /* loop variable */
137
+ int mid, end; /* middle and end index */
138
+ int *src, *dst; /* to traverse vector */
139
+ int buf[BUFSIZE]; /* buffer for vector elements */
140
+
141
+ assert(vec /* check the function arguments */
142
+ && (off >= 0) && (n >= 0) && (pos >= 0) && (esz >= 0));
143
+ esz /= (int)sizeof(int); /* adapt size, offsets, and counter */
144
+ pos *= esz; off *= esz; n *= esz;
145
+ end = off +n; /* normalize vector indices */
146
+ if (pos <= off) { mid = off; off = pos; }
147
+ else { mid = end; end = pos; }
148
+ if (mid -off < end -mid) { /* if first section is smaller */
149
+ while (mid > off) { /* while there are elements to shift */
150
+ n = (mid -off < BUFSIZE) ? mid -off : BUFSIZE;
151
+ src = (int*)vec +mid -n; /* get number of elements and */
152
+ dst = buf; /* copy source to the buffer */
153
+ for (i = n; --i >= 0; ) *dst++ = *src++;
154
+ dst = (int*)vec +mid -n; /* shift down/left second section */
155
+ for (i = end -mid; --i >= 0; ) *dst++ = *src++;
156
+ src = buf; /* copy buffer to destination */
157
+ for (i = n; --i >= 0; ) *dst++ = *src++;
158
+ mid -= n; end -= n; /* second section has been shifted */
159
+ } } /* down/left cnt elements */
160
+ else { /* if second section is smaller */
161
+ while (end > mid) { /* while there are elements to shift */
162
+ n = (end -mid < BUFSIZE) ? end -mid : BUFSIZE;
163
+ src = (int*)vec +mid +n; /* get number of elements and */
164
+ dst = buf +n; /* copy source to the buffer */
165
+ for (i = n; --i >= 0; ) *--dst = *--src;
166
+ dst = (int*)vec +mid +n; /* shift up/right first section */
167
+ for (i = mid -off; --i >= 0; ) *--dst = *--src;
168
+ src = buf +n; /* copy buffer to destination */
169
+ for (i = n; --i >= 0; ) *--dst = *--src;
170
+ mid += n; off += n; /* first section has been shifted */
171
+ } /* up/right cnt elements */
172
+ }
173
+ } /* v_move() */
174
+
175
+ /*--------------------------------------------------------------------*/
176
+
177
+ void v_shuffle (void *vec, int n, double randfn (void))
178
+ { /* --- shuffle vector entries */
179
+ int i; /* vector index */
180
+ void **v = vec, *t; /* vector and exchange buffer */
181
+
182
+ while (--n > 0) { /* shuffle loop (n random selections) */
183
+ i = (int)((n+1) *randfn()); /* compute a random index */
184
+ if (i > n) i = n; /* in the remaining section and */
185
+ if (i < 0) i = 0; /* exchange the vector elements */
186
+ t = v[i]; v[i] = v[n]; v[n] = t;
187
+ }
188
+ } /* v_shuffle() */
189
+
190
+ /*--------------------------------------------------------------------*/
191
+
192
+ void v_reverse (void *vec, int n)
193
+ { /* --- reverse a pointer vector */
194
+ void **v, *t; /* vector and exchange buffer */
195
+
196
+ for (v = vec; --n > 0; ) { /* reverse the order of the elements */
197
+ t = v[n]; v[n--] = v[0]; *v++ = t; }
198
+ } /* v_reverse() */
199
+
200
+ /*--------------------------------------------------------------------*/
201
+
202
+ #define REC(type,rec) \
203
+ static void rec (type *vec, int n) \
204
+ { /* --- recursive part of sort */ \
205
+ type *l, *r; /* pointers to exchange positions */ \
206
+ type x, t; /* pivot element and exchange buffer */\
207
+ int m; /* number of elements in sections */ \
208
+ \
209
+ do { /* sections sort loop */ \
210
+ l = vec; r = l +n -1; /* start at left and right boundary */ \
211
+ if (*l > *r) { t = *l; *l = *r; *r = t; } \
212
+ x = vec[n >> 1]; /* get the middle element as pivot */ \
213
+ if (x < *l) x = *l; /* compute median of three */ \
214
+ else if (x > *r) x = *r; /* to find a better pivot */ \
215
+ while (1) { /* split and exchange loop */ \
216
+ while (*++l < x) /* skip left elements that are */ \
217
+ ; /* smaller than the pivot element */ \
218
+ while (*--r > x) /* skip right elements that are */ \
219
+ ; /* greater than the pivot element */ \
220
+ if (l >= r) { /* if less than two elements left, */ \
221
+ if (l <= r) { l++; r--; } break; } /* abort the loop */ \
222
+ t = *l; *l = *r; *r = t; /* otherwise exchange elements */ \
223
+ } \
224
+ m = (int)(vec +n -l); /* compute the number of elements */ \
225
+ n = (int)(r -vec +1); /* right and left of the split */ \
226
+ if (n > m) { /* if right section is smaller, */ \
227
+ if (m >= TH_INSERT) /* but larger than the threshold, */ \
228
+ rec(l, m); } /* sort it by an recursive call */ \
229
+ else { /* if the left section is smaller, */ \
230
+ if (n >= TH_INSERT) /* but larger than the threshold, */ \
231
+ rec(vec, n); /* sort it by an recursive call, */ \
232
+ vec = l; n = m; /* then switch to the right section */ \
233
+ } /* keeping its size m in variable n */ \
234
+ } while (n >= TH_INSERT); /* while greater than threshold */ \
235
+ } /* rec() */
236
+
237
+ /*--------------------------------------------------------------------*/
238
+
239
+ #define SORT(type,rec,sort) \
240
+ void sort (type *vec, int n) \
241
+ { /* --- sort a number vector */ \
242
+ int k; /* size of first section */ \
243
+ type *l, *r; /* to traverse the vector */ \
244
+ type t; /* exchange buffer */ \
245
+ \
246
+ assert(vec && (n >= 0)); /* check the function arguments */ \
247
+ if (n <= 1) return; /* do not sort less than two elems. */ \
248
+ if (n < TH_INSERT) /* if less elements than threshold */ \
249
+ k = n; /* for insertion sort, note the */ \
250
+ else { /* number of elements, otherwise */ \
251
+ rec(vec, n); /* call the recursive sort function */ \
252
+ k = TH_INSERT -1; /* and get the number of elements */ \
253
+ } /* in the first vector section */ \
254
+ for (l = r = vec; --k > 0; ) /* find position of smallest element */\
255
+ if (*++r < *l) l = r; /* within the first k elements */ \
256
+ r = vec; /* swap the smallest element */ \
257
+ t = *l; *l = *r; *r = t; /* to front as a sentinel */ \
258
+ while (--n > 0) { /* standard insertion sort */ \
259
+ t = *++r; /* note the number to insert */ \
260
+ for (l = r; *--l > t; k--) /* shift right all numbers that are */ \
261
+ l[1] = *l; /* greater than the one to insert */ \
262
+ l[1] = t; /* and store the number to insert */ \
263
+ } /* in the place thus found */ \
264
+ } /* sort() */
265
+
266
+ /*--------------------------------------------------------------------*/
267
+
268
+ REC (int, _intrec)
269
+ SORT(int, _intrec, v_intsort)
270
+
271
+ /*--------------------------------------------------------------------*/
272
+
273
+ REC (float, _fltrec)
274
+ SORT(float, _fltrec, v_fltsort)
275
+
276
+ /*--------------------------------------------------------------------*/
277
+
278
+ REC (double, _dblrec)
279
+ SORT(double, _dblrec, v_dblsort)
280
+
281
+ /*--------------------------------------------------------------------*/
282
+
283
+ #define REVERSE(type,reverse) \
284
+ void reverse (type *vec, int n) \
285
+ { /* --- reverse a number vector */ \
286
+ type t; /* exchange buffer */ \
287
+ while (--n > 0) { /* reverse the order of the elems. */ \
288
+ t = vec[n]; vec[n--] = vec[0]; *vec++ = t; } \
289
+ } /* reverse() */
290
+
291
+ /*--------------------------------------------------------------------*/
292
+
293
+ REVERSE(int, v_intrev)
294
+ REVERSE(float, v_fltrev)
295
+ REVERSE(double, v_dblrev)
296
+
297
+ /*--------------------------------------------------------------------*/
298
+
299
+ #define SHUFFLE(type,shuffle) \
300
+ void shuffle (type *vec, int n, double randfn (void)) \
301
+ { /* --- shuffle vector entries */ \
302
+ int i; /* vector index */ \
303
+ type t; /* exchange buffer */ \
304
+ \
305
+ while (--n > 0) { /* shuffle loop (n selections) */ \
306
+ i = (int)((n+1) *randfn()); /* compute a random index */ \
307
+ if (i > n) i = n; /* in the remaining section and */ \
308
+ if (i < 0) i = 0; /* exchange the vector elements */ \
309
+ t = vec[i]; vec[i] = vec[n]; vec[n] = t; \
310
+ } \
311
+ } /* shuffle() */
312
+
313
+ /*--------------------------------------------------------------------*/
314
+
315
+ SHUFFLE(int, v_intshfl)
316
+ SHUFFLE(float, v_fltshfl)
317
+ SHUFFLE(double, v_dblshfl)
@@ -0,0 +1,42 @@
1
+ /*----------------------------------------------------------------------
2
+ File : vecops.h
3
+ Contents: some special vector operations
4
+ Author : Christian Borgelt
5
+ History : 1996.09.16 file created
6
+ 1999.02.04 long int changed to int
7
+ 2001.06.03 function v_shuffle added
8
+ 2002.01.02 functions v_intsort, v_fltsort, v_dblsort added
9
+ 2002.03.03 functions v_reverse, v_intrev etc. added
10
+ 2003.08.21 function v_heapsort added
11
+ 2007.01.16 shuffle functions for basic data types added
12
+ ----------------------------------------------------------------------*/
13
+ #ifndef __VECOPS__
14
+ #define __VECOPS__
15
+
16
+ /*----------------------------------------------------------------------
17
+ Type Definitions
18
+ ----------------------------------------------------------------------*/
19
+ typedef int VCMPFN (const void *p1, const void *p2, void *data);
20
+
21
+ /*----------------------------------------------------------------------
22
+ Functions
23
+ ----------------------------------------------------------------------*/
24
+ extern void v_sort (void *vec, int n, VCMPFN cmpfn, void *data);
25
+ extern void v_heapsort (void *vec, int n, VCMPFN cmpfn, void *data);
26
+ extern void v_move (void *vec, int off, int n, int pos, int esz);
27
+ extern void v_shuffle (void *vec, int n, double randfn (void));
28
+ extern void v_reverse (void *vec, int n);
29
+
30
+ extern void v_intsort (int *vec, int n);
31
+ extern void v_intrev (int *vec, int n);
32
+ extern void v_intshfl (int *vec, int n, double randfn (void));
33
+
34
+ extern void v_fltsort (float *vec, int n);
35
+ extern void v_fltrev (float *vec, int n);
36
+ extern void v_fltshfl (float *vec, int n, double randfn (void));
37
+
38
+ extern void v_dblsort (double *vec, int n);
39
+ extern void v_dblrev (double *vec, int n);
40
+ extern void v_dblshfl (double *vec, int n, double randfn (void));
41
+
42
+ #endif
@@ -0,0 +1,13 @@
1
+ module Apriori
2
+ # This module provides the basic adaptations to the c extension.
3
+ class Adapter
4
+ include Apriori
5
+
6
+ # call the actual apriori extension
7
+ def call_apriori_with_arguments(args, opts={}) #:nodoc:
8
+ args.unshift("apriori")
9
+ do_apriori(args)
10
+ end
11
+
12
+ end
13
+ end
@@ -0,0 +1,85 @@
1
+ module Apriori
2
+
3
+ # This class represents a single association rule.
4
+ #
5
+ # From Christian's original documentation:
6
+ #
7
+ # An association rule is a rule like "If a customer buys wine and bread, he often
8
+ # buys cheese, too."
9
+ #
10
+ # An association rule states that if we pick a customer at random and find out
11
+ # that he selected certain items (bought certain products, chose certain options
12
+ # etc.), we can be confident, quantified by a percentage, that he also selected
13
+ # certain other items (bought certain other products, chose certain other options
14
+ # etc.).
15
+ #
16
+ class AssociationRule
17
+ attr_accessor :antecedent
18
+ attr_accessor :num_antecedent_transactions
19
+ attr_accessor :support
20
+
21
+ attr_accessor :consequent
22
+ attr_accessor :confidence
23
+
24
+ class << self
25
+ # Given +filename+ of a file containing itemset information returns an
26
+ # Array of <tt>Itemset</tt>s. File format must match that of #parse_line.
27
+ def from_file(filename)
28
+ rules = []
29
+ begin
30
+ contents = File.read(filename)
31
+ contents.each_line do |line|
32
+ rules << parse_line(line)
33
+ end
34
+ rescue => e
35
+ puts "Error reading: #{filename}"
36
+ puts e
37
+ end
38
+ rules
39
+ end
40
+
41
+ # Given +line+ returns an Itemset
42
+ # Example of a line:
43
+ # foo <- bar baz bangle (66.7/4, 75.0)
44
+ def parse_line(line)
45
+ is = new
46
+ line =~ /(.+)\s+<-\s+(.+?)\s+\((\d+\.\d)(?:\/(\d+))?,\s+(\d+\.\d)\)/
47
+ consequent, antecedent, support, transactions, confidence = $1, $2, $3, $4, $5
48
+ is.consequent = consequent
49
+ is.antecedent = antecedent.split(/\s+/)
50
+ is.support = support.to_f
51
+ is.num_antecedent_transactions = transactions ? transactions.to_i : nil
52
+ is.confidence = confidence.to_f
53
+ is
54
+ end
55
+ end
56
+
57
+ # Returns the standard form of this rule as a string. For instance:
58
+ # foo <- bar baz bangle (66.7/4, 75.0)
59
+ def to_s
60
+ "%s <- %s (%0.01f%s, %0.01f)" % [ consequent,
61
+ antecedent.join(" "),
62
+ support,
63
+ num_antecedent_transactions ? "/#{num_antecedent_transactions}" : "", confidence ]
64
+ end
65
+
66
+ def eql?(object) #:nodoc:
67
+ self == (object)
68
+ end
69
+
70
+ # Check equality between to <tt>AssociationRule</tt>s
71
+ def ==(object)
72
+ return true if object.equal?(self)
73
+ if object.instance_of?(self.class)
74
+ %w{antecedent num_antecedent_transactions
75
+ support consequent confidence}.each do |key|
76
+ return false unless object.send(key) == self.send(key)
77
+ end
78
+ return true
79
+ else
80
+ return false
81
+ end
82
+ end
83
+
84
+ end
85
+ end
@@ -0,0 +1,9 @@
1
+ module Apriori
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 1
5
+ TINY = 0
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
data/lib/apriori.rb ADDED
@@ -0,0 +1,133 @@
1
+ #--
2
+ # Copyright (c) 2008 Nate Murray
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+
23
+ $: << File.expand_path(File.join(File.dirname(__FILE__), "../ext"))
24
+ require 'apriori_ext'
25
+ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
26
+
27
+ require 'tempfile'
28
+ require 'apriori/adapter'
29
+ require 'apriori/association_rule'
30
+
31
+ module Apriori
32
+ # Find association rules. Given +input+ and +opts+ returns an Array of AssociationRules.
33
+ # See README.txt if you are unsure why you would want to do this.
34
+ #
35
+ # +input+ can be an Array of Array's of String objects or a String specifing
36
+ # a path a transactions file.
37
+ #
38
+ # The options are:
39
+ #
40
+ # * <tt>:min_items</tt>: minimal number of items per rule (default: 1)
41
+ # * <tt>:max_items</tt>: maximal number of items per rule (default: no limit)
42
+ # * <tt>:min_support</tt>: minimal support of a rule (default: 10 (percent %))
43
+ # * <tt>:max_support</tt>: maximal support of a rule (default: 100 (percent %))
44
+ # * <tt>:min_confidence</tt>: minimal confidence of a rule (default: 80 (percent %))
45
+ # * <tt>:output_file</tt>: write the rules to this file instead of returning
46
+ # AssociationRule objects. If this option is specified the path to this file is returned
47
+ #
48
+ # Examples:
49
+ #
50
+ # This first example passes in an Array of Arrays of Strings. The idea is
51
+ # that each individual Array of Strings is a transaction and the containing
52
+ # Array is the set of all transactions.
53
+ #
54
+ # In this example, we call #find_association_rules with the default options.
55
+ #
56
+ # transactions = [ %w{beer doritos},
57
+ # %w{apple cheese},
58
+ # %w{apple cheese},
59
+ # %w{apple doritos} ]
60
+ #
61
+ # rules = Apriori.find_association_rules(transactions)
62
+ #
63
+ # In this example we read the transactions from a file. The format of the file
64
+ # is one transaction per line, space separated items. For instance:
65
+ #
66
+ # # save to /path/to/some/file.txt
67
+ # beer doritos
68
+ # apple cheese
69
+ # apple cheese
70
+ # apple doritos
71
+ #
72
+ # Here is how to call it, using many options:
73
+ #
74
+ # rules = Apriori.find_association_rules("/path/to/some/file.txt",
75
+ # :min_items => 2,
76
+ # :max_items => 2,
77
+ # :min_support => 0.01,
78
+ # :max_support => 100,
79
+ # :min_confidence => 20)
80
+ #
81
+ def self.find_association_rules(input, opts={})
82
+ args = []
83
+
84
+ # create the input file
85
+ if input.kind_of?(String)
86
+ args << input
87
+ elsif input.kind_of?(Array)
88
+ tempfile = create_temporary_file_from_transactions(input)
89
+ args << tempfile.path
90
+ else
91
+ raise "unknown input"
92
+ end
93
+
94
+ # create an output file somewhere
95
+ output_file = nil
96
+ if opts[:output_file]
97
+ output_file = opts[:output_file]
98
+ else
99
+ tempfile = Tempfile.new("transactions_results_#{$!}_#{rand.to_s}")
100
+ tempfile.close # starts open
101
+ output_file = tempfile.path
102
+ end
103
+ args << output_file
104
+
105
+ args << "-m#{opts[:min_items]}" if opts[:min_items]
106
+ args << "-n#{opts[:max_items]}" if opts[:max_items]
107
+ args << "-s#{opts[:min_support]}" if opts[:min_support]
108
+ args << "-S#{opts[:max_support]}" if opts[:max_support]
109
+ args << "-c#{opts[:min_confidence]}" if opts[:min_confidence]
110
+
111
+ args << "-a"
112
+
113
+ adapter = Adapter.new
114
+ adapter.call_apriori_with_arguments(args)
115
+
116
+ if opts[:output_file]
117
+ return output_file
118
+ else
119
+ return AssociationRule.from_file(output_file)
120
+ end
121
+ end
122
+
123
+ private
124
+ def self.create_temporary_file_from_transactions(transactions)
125
+ tempfile = Tempfile.open("transactions_#{$!}_#{rand.to_s}")
126
+ transactions.each do |transaction|
127
+ tempfile.puts transaction.join(" ")
128
+ end
129
+ tempfile.close
130
+ tempfile
131
+ end
132
+
133
+ end
data/script/console ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ libs << " -r #{File.dirname(__FILE__) + '/../lib/apriori.rb'}"
9
+ puts "Loading apriori gem"
10
+ exec "#{irb} #{libs} --simple-prompt"
data/script/destroy ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
data/script/generate ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)