nysol-take 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. checksums.yaml +7 -0
  2. data/bin/mbiclique.rb +317 -0
  3. data/bin/mbipolish.rb +362 -0
  4. data/bin/mccomp.rb +235 -0
  5. data/bin/mclique.rb +295 -0
  6. data/bin/mclique2g.rb +105 -0
  7. data/bin/mcliqueInfo.rb +203 -0
  8. data/bin/mfriends.rb +202 -0
  9. data/bin/mgdiff.rb +252 -0
  10. data/bin/mhifriend.rb +456 -0
  11. data/bin/mhipolish.rb +465 -0
  12. data/bin/mitemset.rb +168 -0
  13. data/bin/mpal.rb +410 -0
  14. data/bin/mpolishing.rb +399 -0
  15. data/bin/msequence.rb +165 -0
  16. data/bin/mtra2g.rb +476 -0
  17. data/bin/mtra2gc.rb +360 -0
  18. data/ext/grhfilrun/extconf.rb +12 -0
  19. data/ext/grhfilrun/grhfilrun.c +85 -0
  20. data/ext/grhfilrun/src/_sspc.c +358 -0
  21. data/ext/grhfilrun/src/aheap.c +545 -0
  22. data/ext/grhfilrun/src/aheap.h +251 -0
  23. data/ext/grhfilrun/src/base.c +92 -0
  24. data/ext/grhfilrun/src/base.h +59 -0
  25. data/ext/grhfilrun/src/fstar.c +497 -0
  26. data/ext/grhfilrun/src/fstar.h +80 -0
  27. data/ext/grhfilrun/src/grhfil.c +214 -0
  28. data/ext/grhfilrun/src/itemset.c +713 -0
  29. data/ext/grhfilrun/src/itemset.h +170 -0
  30. data/ext/grhfilrun/src/problem.c +415 -0
  31. data/ext/grhfilrun/src/problem.h +179 -0
  32. data/ext/grhfilrun/src/queue.c +533 -0
  33. data/ext/grhfilrun/src/queue.h +182 -0
  34. data/ext/grhfilrun/src/sample.c +19 -0
  35. data/ext/grhfilrun/src/sspc.c +597 -0
  36. data/ext/grhfilrun/src/sspc2.c +491 -0
  37. data/ext/grhfilrun/src/stdlib2.c +1482 -0
  38. data/ext/grhfilrun/src/stdlib2.h +892 -0
  39. data/ext/grhfilrun/src/trsact.c +817 -0
  40. data/ext/grhfilrun/src/trsact.h +160 -0
  41. data/ext/grhfilrun/src/vec.c +745 -0
  42. data/ext/grhfilrun/src/vec.h +172 -0
  43. data/ext/lcmrun/extconf.rb +20 -0
  44. data/ext/lcmrun/lcmrun.cpp +99 -0
  45. data/ext/lcmrun/src/aheap.c +216 -0
  46. data/ext/lcmrun/src/aheap.h +111 -0
  47. data/ext/lcmrun/src/base.c +92 -0
  48. data/ext/lcmrun/src/base.h +59 -0
  49. data/ext/lcmrun/src/itemset.c +496 -0
  50. data/ext/lcmrun/src/itemset.h +157 -0
  51. data/ext/lcmrun/src/lcm.c +427 -0
  52. data/ext/lcmrun/src/problem.c +349 -0
  53. data/ext/lcmrun/src/problem.h +177 -0
  54. data/ext/lcmrun/src/queue.c +528 -0
  55. data/ext/lcmrun/src/queue.h +176 -0
  56. data/ext/lcmrun/src/sgraph.c +359 -0
  57. data/ext/lcmrun/src/sgraph.h +173 -0
  58. data/ext/lcmrun/src/stdlib2.c +1282 -0
  59. data/ext/lcmrun/src/stdlib2.h +823 -0
  60. data/ext/lcmrun/src/trsact.c +747 -0
  61. data/ext/lcmrun/src/trsact.h +159 -0
  62. data/ext/lcmrun/src/vec.c +731 -0
  63. data/ext/lcmrun/src/vec.h +171 -0
  64. data/ext/lcmseq0run/extconf.rb +20 -0
  65. data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
  66. data/ext/lcmseq0run/src/aheap.c +216 -0
  67. data/ext/lcmseq0run/src/aheap.h +111 -0
  68. data/ext/lcmseq0run/src/base.c +92 -0
  69. data/ext/lcmseq0run/src/base.h +59 -0
  70. data/ext/lcmseq0run/src/itemset.c +518 -0
  71. data/ext/lcmseq0run/src/itemset.h +157 -0
  72. data/ext/lcmseq0run/src/itemset_zero.c +522 -0
  73. data/ext/lcmseq0run/src/lcm_seq.c +446 -0
  74. data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
  75. data/ext/lcmseq0run/src/problem.c +439 -0
  76. data/ext/lcmseq0run/src/problem.h +179 -0
  77. data/ext/lcmseq0run/src/problem_zero.c +439 -0
  78. data/ext/lcmseq0run/src/queue.c +533 -0
  79. data/ext/lcmseq0run/src/queue.h +182 -0
  80. data/ext/lcmseq0run/src/stdlib2.c +1350 -0
  81. data/ext/lcmseq0run/src/stdlib2.h +864 -0
  82. data/ext/lcmseq0run/src/trsact.c +747 -0
  83. data/ext/lcmseq0run/src/trsact.h +159 -0
  84. data/ext/lcmseq0run/src/vec.c +779 -0
  85. data/ext/lcmseq0run/src/vec.h +172 -0
  86. data/ext/lcmseqrun/extconf.rb +20 -0
  87. data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
  88. data/ext/lcmseqrun/src/aheap.c +216 -0
  89. data/ext/lcmseqrun/src/aheap.h +111 -0
  90. data/ext/lcmseqrun/src/base.c +92 -0
  91. data/ext/lcmseqrun/src/base.h +59 -0
  92. data/ext/lcmseqrun/src/itemset.c +518 -0
  93. data/ext/lcmseqrun/src/itemset.h +157 -0
  94. data/ext/lcmseqrun/src/itemset_zero.c +522 -0
  95. data/ext/lcmseqrun/src/lcm_seq.c +447 -0
  96. data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
  97. data/ext/lcmseqrun/src/problem.c +439 -0
  98. data/ext/lcmseqrun/src/problem.h +179 -0
  99. data/ext/lcmseqrun/src/problem_zero.c +439 -0
  100. data/ext/lcmseqrun/src/queue.c +533 -0
  101. data/ext/lcmseqrun/src/queue.h +182 -0
  102. data/ext/lcmseqrun/src/stdlib2.c +1350 -0
  103. data/ext/lcmseqrun/src/stdlib2.h +864 -0
  104. data/ext/lcmseqrun/src/trsact.c +747 -0
  105. data/ext/lcmseqrun/src/trsact.h +159 -0
  106. data/ext/lcmseqrun/src/vec.c +779 -0
  107. data/ext/lcmseqrun/src/vec.h +172 -0
  108. data/ext/lcmtransrun/extconf.rb +18 -0
  109. data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
  110. data/ext/macerun/extconf.rb +20 -0
  111. data/ext/macerun/macerun.cpp +57 -0
  112. data/ext/macerun/src/aheap.c +217 -0
  113. data/ext/macerun/src/aheap.h +112 -0
  114. data/ext/macerun/src/itemset.c +491 -0
  115. data/ext/macerun/src/itemset.h +158 -0
  116. data/ext/macerun/src/mace.c +503 -0
  117. data/ext/macerun/src/problem.c +346 -0
  118. data/ext/macerun/src/problem.h +174 -0
  119. data/ext/macerun/src/queue.c +529 -0
  120. data/ext/macerun/src/queue.h +177 -0
  121. data/ext/macerun/src/sgraph.c +360 -0
  122. data/ext/macerun/src/sgraph.h +174 -0
  123. data/ext/macerun/src/stdlib2.c +993 -0
  124. data/ext/macerun/src/stdlib2.h +811 -0
  125. data/ext/macerun/src/vec.c +634 -0
  126. data/ext/macerun/src/vec.h +170 -0
  127. data/ext/sspcrun/extconf.rb +20 -0
  128. data/ext/sspcrun/src/_sspc.c +358 -0
  129. data/ext/sspcrun/src/aheap.c +545 -0
  130. data/ext/sspcrun/src/aheap.h +251 -0
  131. data/ext/sspcrun/src/base.c +92 -0
  132. data/ext/sspcrun/src/base.h +59 -0
  133. data/ext/sspcrun/src/fstar.c +496 -0
  134. data/ext/sspcrun/src/fstar.h +80 -0
  135. data/ext/sspcrun/src/grhfil.c +213 -0
  136. data/ext/sspcrun/src/itemset.c +713 -0
  137. data/ext/sspcrun/src/itemset.h +170 -0
  138. data/ext/sspcrun/src/problem.c +415 -0
  139. data/ext/sspcrun/src/problem.h +179 -0
  140. data/ext/sspcrun/src/queue.c +533 -0
  141. data/ext/sspcrun/src/queue.h +182 -0
  142. data/ext/sspcrun/src/sample.c +19 -0
  143. data/ext/sspcrun/src/sspc.c +598 -0
  144. data/ext/sspcrun/src/sspc2.c +491 -0
  145. data/ext/sspcrun/src/stdlib2.c +1482 -0
  146. data/ext/sspcrun/src/stdlib2.h +892 -0
  147. data/ext/sspcrun/src/trsact.c +817 -0
  148. data/ext/sspcrun/src/trsact.h +160 -0
  149. data/ext/sspcrun/src/vec.c +745 -0
  150. data/ext/sspcrun/src/vec.h +172 -0
  151. data/ext/sspcrun/sspcrun.cpp +54 -0
  152. data/lib/nysol/enumLcmEp.rb +338 -0
  153. data/lib/nysol/enumLcmEsp.rb +284 -0
  154. data/lib/nysol/enumLcmIs.rb +275 -0
  155. data/lib/nysol/enumLcmSeq.rb +143 -0
  156. data/lib/nysol/items.rb +201 -0
  157. data/lib/nysol/seqDB.rb +256 -0
  158. data/lib/nysol/take.rb +39 -0
  159. data/lib/nysol/taxonomy.rb +113 -0
  160. data/lib/nysol/traDB.rb +257 -0
  161. metadata +239 -0
@@ -0,0 +1,491 @@
1
+ /* itemset search input/output common routines
2
+ 25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, do not forget to
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about the codes for the users.
11
+ For the commercial use, please make a contact to Takeaki Uno. */
12
+
13
+ /* routines for itemset mining */
14
+
15
+ #ifndef _itemset_c_
16
+ #define _itemset_c_
17
+
18
+ #include"itemset.h"
19
+ #include"queue.c"
20
+ #include"aheap.c"
21
+
22
+ /* flush the write buffer, available for multi-core mode */
23
+ void ITEMSET_flush (ITEMSET *I, FILE2 *fp){
24
+ if ( !(I->flag&ITEMSET_MULTI_OUTPUT) || (fp->buf-fp->buf_org) > FILE2_BUFSIZ/2 ){
25
+ SPIN_LOCK(I->multi_core, I->lock_output);
26
+ FILE2_flush (fp);
27
+ SPIN_UNLOCK(I->multi_core, I->lock_output);
28
+ }
29
+ }
30
+
31
+ /* Output information about ITEMSET structure. flag&1: print frequency constraint */
32
+ void ITEMSET_print (ITEMSET *I, int flag){
33
+ if ( I->lb>0 || I->ub<INTHUGE ){
34
+ if ( I->lb > 0 ) print_err ("%d <= ", I->lb);
35
+ print_err ("itemsets ");
36
+ if ( I->ub < INTHUGE ) print_err (" <= %d\n", I->ub);
37
+ print_err ("\n");
38
+ }
39
+ if ( flag&1 ){
40
+ if ( I->frq_lb > -WEIGHTHUGE ) print_err (WEIGHTF" <=", I->frq_lb);
41
+ print_err (" frequency ");
42
+ if ( I->frq_ub < WEIGHTHUGE ) print_err (" <="WEIGHTF, I->frq_ub);
43
+ print_err ("\n");
44
+ }
45
+ }
46
+
47
+ /* ITEMSET initialization */
48
+ void ITEMSET_init (ITEMSET *I){
49
+ I->flag = 0;
50
+ I->progress = 0;
51
+ I->iters = I->iters2 = I->iters3 = 0;
52
+ I->solutions = I->solutions2 = I->max_solutions = I->outputs = I->outputs2 = 0;
53
+ I->topk.end = 0;
54
+ I->item_max = I->item_max_org = 0;
55
+ I->ub = I->len_ub = I->gap_ub = INTHUGE;
56
+ I->lb = I->len_lb = I->gap_lb = 0;
57
+ I->frq = I->pfrq = I->total_weight = 0;
58
+ I->ratio = I->prob = 0.0;
59
+ I->posi_ub = I->nega_ub = I->frq_ub = WEIGHTHUGE;
60
+ I->posi_lb = I->nega_lb = I->frq_lb = I->setrule_lb = -WEIGHTHUGE;
61
+ I->dir = 0;
62
+ I->target = INTHUGE;
63
+ I->prob_ub = I->ratio_ub = I->rposi_ub = 1;
64
+ I->prob_lb = I->ratio_lb = I->rposi_lb = 0;
65
+ I->itemflag = NULL;
66
+ I->perm = NULL;
67
+ I->item_frq = NULL;
68
+ I->sc = NULL;
69
+ I->X = NULL;
70
+ I->fp = NULL;
71
+ I->separator = ' ';
72
+ I->topk = INIT_AHEAP;
73
+ I->itemset = I->add = INIT_QUEUE;
74
+ I->set_weight = NULL;
75
+ I->set_occ = NULL;
76
+
77
+ I->multi_iters = I->multi_iters2 = I->multi_iters3 = NULL;
78
+ I->multi_outputs = I->multi_outputs2 = NULL;
79
+ I->multi_solutions = I->multi_solutions2 = NULL;
80
+ I->multi_fp = NULL;
81
+
82
+ I->multi_core = 0;
83
+ }
84
+
85
+
86
+ /* second initialization
87
+ topk.end>0 => initialize heap for topk mining */
88
+ /* all pointers will be set to 0, but not for */
89
+ /* if topK mining, set topk.end to "K" */
90
+ void ITEMSET_alloc (ITEMSET *I, char *fname, PERM *perm, QUEUE_INT item_max, size_t item_max_org){
91
+ LONG i;
92
+ size_t siz = (I->flag&ITEMSET_USE_ORG)?item_max_org+2: item_max+2;
93
+ int j;
94
+
95
+ I->prob = I->ratio = 1.0;
96
+ I->frq = 0;
97
+ I->perm = perm;
98
+ if ( I->topk.end>0 ){
99
+ AHEAP_alloc (&I->topk, I->topk.end);
100
+ FLOOP (i, 0, I->topk.end) AHEAP_chg (&I->topk, (AHEAP_ID)i, -WEIGHTHUGE);
101
+ I->frq_lb = -WEIGHTHUGE;
102
+ } else I->topk.v = NULL;
103
+ QUEUE_alloc (&I->itemset, (QUEUE_ID)siz); I->itemset.end = (QUEUE_ID)siz;
104
+ if ( I->flag&ITEMSET_ADD ) QUEUE_alloc (&I->add, (QUEUE_ID)siz);
105
+ calloc2 (I->sc, siz+2, goto ERR);
106
+ if ( I->flag&ITEMSET_SET_RULE ){
107
+ calloc2 (I->set_weight, siz, goto ERR);
108
+ if ( I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT) )
109
+ calloc2 (I->set_occ, siz, goto ERR);
110
+ }
111
+ I->iters = I->iters2 = I->solutions = 0;
112
+ I->item_max = item_max;
113
+ I->item_max_org = (QUEUE_INT)item_max_org;
114
+ if ( fname ){
115
+ if ( strcmp (fname, "-") == 0 ) I->fp = stdout;
116
+ else fopen2 (I->fp, fname, (I->flag&ITEMSET_APPEND)?"a":"w", goto ERR);
117
+ } else I->fp = 0;
118
+ if ( I->flag&ITEMSET_ITEMFRQ )
119
+ malloc2 (I->item_frq, item_max+2, goto ERR);
120
+ if ( I->flag&ITEMSET_RULE ){
121
+ calloc2 (I->itemflag, item_max+2, goto ERR);
122
+ }
123
+ I->total_weight = 1;
124
+ j = MAX(I->multi_core,1);
125
+ calloc2 (I->multi_iters, j*7, goto ERR);
126
+ I->multi_iters2 = I->multi_iters + j;
127
+ I->multi_iters3 = I->multi_iters2 + j;
128
+ I->multi_outputs = I->multi_iters3 + j;
129
+ I->multi_outputs2 = I->multi_outputs + j;
130
+ I->multi_solutions = I->multi_outputs2 + j;
131
+ I->multi_solutions2 = I->multi_solutions + j;
132
+
133
+ malloc2 (I->multi_fp, j, goto ERR);
134
+ FLOOP (i, 0, j)
135
+ FILE2_open_ (I->multi_fp[i], I->fp, goto ERR);
136
+ #ifdef MULTI_CORE
137
+ if ( I->multi_core > 0 ){
138
+ pthread_spin_init (&I->lock_counter, PTHREAD_PROCESS_PRIVATE);
139
+ pthread_spin_init (&I->lock_sc, PTHREAD_PROCESS_PRIVATE);
140
+ pthread_spin_init (&I->lock_output, PTHREAD_PROCESS_PRIVATE);
141
+ }
142
+ #endif
143
+ return;
144
+ ERR:;
145
+ ITEMSET_end (I);
146
+ EXIT;
147
+ }
148
+
149
+ /* sum the counters computed by each thread */
150
+ void ITEMSET_merge_counters (ITEMSET *I){
151
+ int i;
152
+ FLOOP (i, 0, MAX(I->multi_core,1)){
153
+ I->iters += I->multi_iters[i];
154
+ I->iters2 += I->multi_iters2[i];
155
+ I->iters3 += I->multi_iters3[i];
156
+ I->outputs += I->multi_outputs[i];
157
+ I->outputs2 += I->multi_outputs2[i];
158
+ I->solutions += I->multi_solutions[i];
159
+ I->solutions2 += I->multi_solutions2[i];
160
+ if ( I->multi_fp[i].buf ) FILE2_flush_last (&I->multi_fp[i]);
161
+ }
162
+ ARY_FILL (I->multi_iters, 0, MAX(I->multi_core,1)*7, 0);
163
+ }
164
+
165
+ /*******************************************************************/
166
+ /* termination of ITEMSET */
167
+ /*******************************************************************/
168
+ void ITEMSET_end (ITEMSET *I){
169
+ int i;
170
+ QUEUE_end (&I->itemset);
171
+ QUEUE_end (&I->add);
172
+ AHEAP_end (&I->topk);
173
+ fclose2 (I->fp);
174
+ mfree (I->sc, I->item_frq, I->itemflag, I->perm, I->set_weight, I->set_occ);
175
+
176
+ if ( I->multi_fp )
177
+ FLOOP (i, 0, MAX(I->multi_core,1)) free2 (I->multi_fp[i].buf);
178
+ mfree (I->multi_iters, I->multi_fp);
179
+ #ifdef MULTI_CORE
180
+ if ( I->multi_core>0 ){
181
+ pthread_spin_destroy(&I->lock_counter);
182
+ pthread_spin_destroy(&I->lock_sc);
183
+ pthread_spin_destroy(&I->lock_output);
184
+ }
185
+ #endif
186
+ ITEMSET_init (I);
187
+ }
188
+
189
+ /*******************************************************************/
190
+ /* output at the termination of the algorithm */
191
+ /* print #of itemsets of size k, for each k */
192
+ /*******************************************************************/
193
+ void ITEMSET_last_output (ITEMSET *I){
194
+ QUEUE_ID i;
195
+ LONG n=0, nn=0;
196
+
197
+ ITEMSET_merge_counters (I);
198
+ if ( !(I->flag&SHOW_MESSAGE) ) return; // "no message" is specified
199
+ if ( I->topk.end > 0 ){
200
+ i = AHEAP_findmin_head (&I->topk);
201
+ fprint_WEIGHT (stdout, AHEAP_H (I->topk, i));
202
+ printf ("\n");
203
+ return;
204
+ }
205
+ FLOOP (i, 0, I->itemset.end+1){
206
+ n += I->sc[i];
207
+ if ( I->sc[i] != 0 ) nn = i;
208
+ }
209
+ if ( n!=0 ){
210
+ printf (LONGF "\n", n);
211
+ FLOOP (i, 0, nn+1) printf (LONGF "\n", I->sc[i]);
212
+ }
213
+ print_err ("iters=" LONGF, I->iters);
214
+ if ( I->flag&ITEMSET_ITERS2 ) print_err (", iters2=" LONGF, I->iters2);
215
+ print_err ("\n");
216
+ }
217
+
218
+ /* output frequency, coverage */
219
+ void ITEMSET_output_frequency (ITEMSET *I, int core_id){
220
+ FILE2 *fp = &I->multi_fp[core_id];
221
+ if ( I->flag&(ITEMSET_FREQ+ITEMSET_PRE_FREQ) ){
222
+ if ( I->flag&ITEMSET_FREQ ) FILE2_putc (fp, ' ');
223
+ FILE2_print_WEIGHT (fp, I->frq, 4, '(');
224
+ FILE2_putc (fp, ')');
225
+ if ( I->flag&ITEMSET_PRE_FREQ ) FILE2_putc (fp, ' ');
226
+ }
227
+ if ( I->flag&ITEMSET_OUTPUT_POSINEGA ){ // output positive sum, negative sum in the occurrence
228
+ FILE2_putc (fp, ' ');
229
+ FILE2_print_WEIGHT (fp, I->pfrq, 4, '(');
230
+ FILE2_print_WEIGHT (fp, I->pfrq-I->frq, 4, ',');
231
+ FILE2_print_WEIGHT (fp, I->pfrq/(2*I->pfrq-I->frq), 4, ',');
232
+ FILE2_putc (fp, ')');
233
+ }
234
+ }
235
+
236
+ #ifdef _trsact_h_
237
+ void ITEMSET_output_occ (ITEMSET *I, QUEUE *occ, int core_id){
238
+ QUEUE_ID i;
239
+ QUEUE_INT *x;
240
+ FILE2 *fp = &I->multi_fp[core_id];
241
+ TRSACT *TT = (TRSACT *)(I->X);
242
+ VEC_ID j, ee = TT->rows_org;
243
+ int flag = I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT);
244
+
245
+ i=0; MQUE_FLOOP_ (*occ, x, TT->occ_unit){
246
+ if ( (I->flag&ITEMSET_RM_DUP_TRSACT)==0 || *x != ee ){
247
+ FILE2_print_int (fp, TT->trperm? TT->trperm[*x]: *x, I->separator);
248
+ if (flag == ITEMSET_MULTI_OCC_PRINT ){
249
+ FLOOP (j, 1, (VEC_ID)(TT->occ_unit/sizeof(QUEUE_INT)))
250
+ FILE2_print_int (fp, *(x+j), I->separator);
251
+ } else if ( flag == (ITEMSET_MULTI_OCC_PRINT+ITEMSET_TRSACT_ID) ){
252
+ FILE2_print_int (fp, *(x+1), I->separator);
253
+ }
254
+ }
255
+ ee = *x;
256
+ if ( (++i)%256==0 ) ITEMSET_flush (I, fp);
257
+ }
258
+ FILE2_putc (fp, '\n');
259
+ }
260
+ #endif
261
+
262
+ /* output an itemset to the output file */
263
+ void ITEMSET_output_itemset (ITEMSET *I, QUEUE *occ, int core_id){
264
+ QUEUE_ID i;
265
+ QUEUE_INT e;
266
+ #ifdef _agraph_h_
267
+ QUEUE_INT ee;
268
+ #endif
269
+
270
+ FILE2 *fp = &I->multi_fp[core_id];
271
+
272
+ I->multi_outputs[core_id]++;
273
+ if ( (I->flag&SHOW_PROGRESS ) && (I->multi_outputs[core_id]%(ITEMSET_INTERVAL) == 0) )
274
+ print_err ("---- " LONGF " solutions in " LONGF " candidates\n",
275
+ I->multi_solutions[core_id], I->multi_outputs[core_id]);
276
+ if ( I->itemset.t < I->lb || I->itemset.t > I->ub ) return;
277
+ if ( (I->flag&ITEMSET_IGNORE_BOUND)==0 && (I->frq < I->frq_lb || I->frq > I->frq_ub) ) return;
278
+ if ( (I->flag&ITEMSET_IGNORE_BOUND)==0 && (I->pfrq < I->posi_lb || I->pfrq > I->posi_ub || (I->frq - I->pfrq) > I->nega_ub || (I->frq - I->pfrq) < I->nega_lb) ) return;
279
+
280
+ I->multi_solutions[core_id]++;
281
+ if ( I->max_solutions>0 && I->multi_solutions[core_id] > I->max_solutions ){
282
+ ITEMSET_last_output (I);
283
+ ERROR_MES = "reached to maximum number of solutions";
284
+ EXIT;
285
+ }
286
+ if ( I->topk.v ){
287
+ e = AHEAP_findmin_head (&(I->topk));
288
+ if ( I->frq > AHEAP_H (I->topk, e) ){
289
+ AHEAP_chg (&(I->topk), e, I->frq);
290
+ e = AHEAP_findmin_head (&(I->topk));
291
+ I->frq_lb = AHEAP_H (I->topk, e);
292
+ }
293
+ } else if ( I->fp ){
294
+ if ( I->flag&ITEMSET_PRE_FREQ ) ITEMSET_output_frequency (I, core_id);
295
+ if ( (I->flag & ITEMSET_NOT_ITEMSET) == 0 ){
296
+ #ifdef _agraph_h_
297
+ if ( I->flag&ITEMSET_OUTPUT_EDGE ){
298
+ FLOOP (i, 0, I->itemset.t){
299
+ e = I->itemset.v[i];
300
+ ee = AGRAPH_INC_FROM(*((AGRAPH *)(I->X)), e, I->dir);
301
+ FILE2_print_int (fp, I->perm? I->perm[ee]: ee, '(' );
302
+ ee = AGRAPH_INC_TO(*((AGRAPH *)(I->X)), e, I->dir);
303
+ FILE2_print_int (fp, I->perm? I->perm[ee]: ee, I->separator);
304
+ FILE2_putc (fp, ')');
305
+ if ( i<I->itemset.t-1 ) FILE2_putc (fp, I->separator);
306
+ if ( (i+1)%256==0 ) ITEMSET_flush (I, fp);
307
+ }
308
+ goto NEXT;
309
+ }
310
+ #endif
311
+ FLOOP (i, 0, I->itemset.t){
312
+ e = I->itemset.v[i];
313
+ FILE2_print_int (fp, I->perm? I->perm[e]: e, i==0? 0: I->separator);
314
+ if ( (i+1)%256==0 ) ITEMSET_flush (I, fp);
315
+ }
316
+ #ifdef _agraph_h_
317
+ NEXT:;
318
+ #endif
319
+ }
320
+ if ( !(I->flag&ITEMSET_PRE_FREQ) ) ITEMSET_output_frequency (I, core_id);
321
+ if ( ((I->flag & ITEMSET_NOT_ITEMSET) == 0) || (I->flag&ITEMSET_FREQ) || (I->flag&ITEMSET_PRE_FREQ) ) FILE2_putc (fp, '\n');
322
+
323
+ #ifdef _trsact_h_
324
+ if (I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT)) ITEMSET_output_occ (I, occ, core_id);
325
+ #endif
326
+ }
327
+ I->sc[I->itemset.t]++;
328
+ ITEMSET_flush (I, fp);
329
+ }
330
+
331
+ /* output itemsets with adding all combination of "add"
332
+ at the first call, i has to be "add->t" */
333
+ void ITEMSET_solution_iter (ITEMSET *I, QUEUE *occ, int core_id){
334
+ QUEUE_ID t=I->add.t;
335
+ if ( I->itemset.t > I->ub ) return;
336
+ ITEMSET_output_itemset (I, occ, core_id);
337
+ if ( ERROR_MES ) return;
338
+ BLOOP (I->add.t, I->add.t, 0){
339
+ ARY_INS (I->itemset, I->add.v[I->add.t]);
340
+ ITEMSET_solution_iter (I, occ, core_id);
341
+ if ( ERROR_MES ) return;
342
+ I->itemset.t--;
343
+ }
344
+ I->add.t = t;
345
+ }
346
+
347
+ void ITEMSET_solution (ITEMSET *I, QUEUE *occ, int core_id){
348
+ QUEUE_ID i;
349
+ LONG s;
350
+ if ( I->itemset.t > I->ub ) return;
351
+ if ( I->flag & ITEMSET_ALL ){
352
+ if ( I->fp || I->topk.v ) ITEMSET_solution_iter (I, occ, core_id);
353
+ else {
354
+ s=1; FLOOP (i, 0, I->add.t+1){
355
+ I->sc[I->itemset.t+i] += s;
356
+ s = s*(I->add.t-i)/(i+1);
357
+ }
358
+ }
359
+ } else {
360
+ FLOOP (i, 0, I->add.t) ARY_INS (I->itemset, I->add.v[i]);
361
+ ITEMSET_output_itemset (I, occ, core_id);
362
+ I->itemset.t -= I->add.t;
363
+ }
364
+ }
365
+
366
+ /*************************************************************************/
367
+ /* ourput a rule */
368
+ /*************************************************************************/
369
+ void ITEMSET_output_rule (ITEMSET *I, QUEUE *occ, double p1, double p2, size_t item, int core_id){
370
+ FILE2 *fp = &I->multi_fp[core_id];
371
+ if ( fp->fp && !(I->topk.v) ){
372
+ FILE2_print_real (fp, p1, 4, '(');
373
+ FILE2_print_real (fp, p2, 4, ',');
374
+ FILE2_putc (fp, ')');
375
+ FILE2_print_int (fp, I->perm[item], I->separator);
376
+ FILE2_puts (fp, " <= ");
377
+ }
378
+ if ( I->flag & ITEMSET_RULE ){
379
+ if ( I->flag & ITEMSET_RULE_ADD ) ITEMSET_solution (I, occ, core_id);
380
+ else ITEMSET_output_itemset (I, occ, core_id);
381
+ } else ITEMSET_solution (I, occ, core_id);
382
+ }
383
+ /*************************************************************************/
384
+ /* check all rules for a pair of itemset and item */
385
+ /*************************************************************************/
386
+ void ITEMSET_check_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, size_t item, int core_id){
387
+ double p = w[item]/I->frq, pp, ff;
388
+ // printf ("[ratio] %f, p=%f, (%f/ %f), %d(%d) <= ", I->ratio_lb, p, w[item], I->frq, I->perm[item], I->itemflag[item]);
389
+ if ( I->itemflag[item]==1 ) return;
390
+ if ( w[item] <= -WEIGHTHUGE ) p = 0;
391
+ pp = p; ff = I->item_frq[item];
392
+ if ( I->flag & ITEMSET_RULE_SUPP ){ pp = w[item]; ff *= I->total_weight; }
393
+
394
+ if ( I->flag & (ITEMSET_RULE_FRQ+ITEMSET_RULE_INFRQ)){
395
+ if ( (I->flag & ITEMSET_RULE_FRQ) && p < I->ratio_lb ) return;
396
+ if ( (I->flag & ITEMSET_RULE_INFRQ) && p > I->ratio_ub ) return;
397
+ ITEMSET_output_rule (I, occ, pp, ff, item, core_id);
398
+ } else if ( I->flag & (ITEMSET_RULE_RFRQ+ITEMSET_RULE_RINFRQ) ){
399
+ if ( (I->flag & ITEMSET_RULE_RFRQ) && (1-p) > I->ratio_lb * (1-I->item_frq[item]) ) return;
400
+ if ( (I->flag & ITEMSET_RULE_RINFRQ) && p > I->ratio_ub * I->item_frq[item] ) return;
401
+ ITEMSET_output_rule (I, occ, pp, ff, item, core_id);
402
+ }
403
+ }
404
+
405
+ /*************************************************************************/
406
+ /* check all rules for an itemset and all items */
407
+ /*************************************************************************/
408
+ void ITEMSET_check_all_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, QUEUE *jump, WEIGHT total, int core_id){
409
+ QUEUE_ID i, t;
410
+ QUEUE_INT e, f=0, *x;
411
+ WEIGHT d = I->frq/total;
412
+
413
+ // checking out of range for itemset size and (posi/nega) frequency
414
+ if ( I->itemset.t+I->add.t < I->lb || I->itemset.t>I->ub || (!(I->flag&ITEMSET_ALL) && I->itemset.t+I->add.t>I->ub)) return;
415
+ if ( !(I->flag&ITEMSET_IGNORE_BOUND) && (I->frq < I->frq_lb || I->frq > I->frq_ub) ) return;
416
+ if ( !(I->flag&ITEMSET_IGNORE_BOUND) && (I->pfrq < I->posi_lb || I->pfrq > I->posi_ub || (I->frq - I->pfrq) > I->nega_ub || (I->frq - I->pfrq) < I->nega_lb) ) return;
417
+
418
+ if ( I->flag&ITEMSET_SET_RULE ){ // itemset->itemset rule for sequence mining
419
+ FLOOP (i, 0, I->itemset.t-1){
420
+ if ( I->frq/I->set_weight[i] >= I->setrule_lb && I->fp ){
421
+ I->sc[i]++;
422
+ if ( I->flag&ITEMSET_PRE_FREQ ) ITEMSET_output_frequency (I, core_id);
423
+ FLOOP (t, 0, I->itemset.t){
424
+ FILE2_print_int (&I->multi_fp[core_id], I->itemset.v[t], t?I->separator:0);
425
+ if ( t == i ){
426
+ FILE2_putc (&I->multi_fp[core_id], ' ');
427
+ FILE2_putc (&I->multi_fp[core_id], '=');
428
+ FILE2_putc (&I->multi_fp[core_id], '>');
429
+ }
430
+ }
431
+ if ( !(I->flag&ITEMSET_PRE_FREQ) ) ITEMSET_output_frequency ( I, core_id);
432
+ FILE2_putc (&I->multi_fp[core_id], ' ');
433
+ FILE2_print_real (&I->multi_fp[core_id], I->frq/I->set_weight[i], 4, '(');
434
+ FILE2_putc (&I->multi_fp[core_id], ')');
435
+ FILE2_putc (&I->multi_fp[core_id], '\n');
436
+ #ifdef _trsact_h_
437
+ if ( I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT) )
438
+ ITEMSET_output_occ (I, I->set_occ[i], core_id);
439
+ #endif
440
+ ITEMSET_flush (I, &I->multi_fp[core_id]);
441
+ }
442
+ }
443
+ }
444
+ // constraint of relational frequency
445
+ if ( ((I->flag&ITEMSET_RFRQ)==0 || d >= I->prob_lb * I->prob )
446
+ && ((I->flag&ITEMSET_RINFRQ)==0 || d <= I->prob * I->prob_ub) ){
447
+ if ( I->flag&ITEMSET_RULE ){ // rule mining routines
448
+ if ( I->itemset.t == 0 ) return;
449
+ if ( I->target < I->item_max ){
450
+ ITEMSET_check_rule (I, w, occ, I->target, core_id); if (ERROR_MES) return;
451
+ } else {
452
+ if ( I->flag & (ITEMSET_RULE_FRQ + ITEMSET_RULE_RFRQ) ){
453
+ if ( I->add.t>0 ){
454
+ // if ( I->itemflag[I->add.v[0]] ) // for POSI_EQUISUPP (occ_w[e] may not be 100%, in the case)
455
+ f = I->add.v[I->add.t-1]; t = I->add.t; I->add.t--;
456
+ FLOOP (i, 0, t){
457
+ e = I->add.v[i];
458
+ I->add.v[i] = f;
459
+ ITEMSET_check_rule (I, w, occ, e, core_id); if (ERROR_MES) return;
460
+ I->add.v[i] = e;
461
+ }
462
+ I->add.t++;
463
+ }
464
+ MQUE_FLOOP (*jump, x)
465
+ ITEMSET_check_rule (I, w, occ, *x, core_id); if (ERROR_MES) return;
466
+ } else {
467
+ if ( I->flag & (ITEMSET_RULE_INFRQ + ITEMSET_RULE_RINFRQ) ){
468
+ // ARY_FLOOP ( *jump, i, e ) I->itemflag[e]--;
469
+ FLOOP (i, 0, I->item_max){
470
+ if ( I->itemflag[i] != 1 ){
471
+ ITEMSET_check_rule (I, w, occ, i, core_id); if (ERROR_MES) return;
472
+ }
473
+ }
474
+ // ARY_FLOOP ( *jump, i, e ) I->itemflag[e]++;
475
+ // }
476
+ // ARY_FLOOP ( *jump, i, e ) ITEMSET_check_rule (I, w, occ, e);
477
+ }
478
+ }
479
+ }
480
+ } else { // usual mining (not rule mining)
481
+ if ( I->fp && (I->flag&(ITEMSET_RFRQ+ITEMSET_RINFRQ))){
482
+ FILE2_print_real (&I->multi_fp[core_id], d, 4, '[');
483
+ FILE2_print_real (&I->multi_fp[core_id], I->prob, 4, ',');
484
+ FILE2_putc (&I->multi_fp[core_id], ']');
485
+ }
486
+ ITEMSET_solution (I, occ, core_id);
487
+ }
488
+ }
489
+ }
490
+
491
+ #endif
@@ -0,0 +1,158 @@
1
+ /* itemset search input/output common routines
2
+ 25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, do not forget to
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about the codes for the users.
11
+ For the commercial use, please make a contact to Takeaki Uno. */
12
+
13
+ /* routines for itemset mining */
14
+
15
+ #ifndef _itemset_h_
16
+ #define _itemset_h_
17
+
18
+ #include"stdlib2.h"
19
+ #include"queue.h"
20
+ #define AHEAP_KEY_WEIGHT
21
+ #include"aheap.h"
22
+
23
+
24
+ typedef struct {
25
+ int a;
26
+ QUEUE itemset; // current operating itemset
27
+ QUEUE add; // for equisupport (hypercube decomposition)
28
+ int ub, lb; // upper/lower bounds for the itemset size
29
+ WEIGHT frq, pfrq, frq_ub, frq_lb; // upper/lower bounds for the frequency
30
+ WEIGHT rposi_lb, rposi_ub, posi_lb, posi_ub, nega_ub, nega_lb; // upper/lower bounds for the sum of positive/negative weights
31
+ WEIGHT setrule_lb; // frequency lower bound for set rule
32
+ double ratio, prob; // confidence and independent probability of the current pattern
33
+ double ratio_ub, ratio_lb, prob_ub, prob_lb; // upper/lower bounds for confidence and independent probability
34
+ QUEUE_INT target; // target item for rule mining
35
+ char *itemflag; // 1 if it is include in the pattern (and 2 if included in add)
36
+ WEIGHT *item_frq; // frequency of each item
37
+ WEIGHT total_weight; // total weight of the input database
38
+ int len_ub, len_lb; // upper/lower bounds for the length of the pattern
39
+ int gap_ub, gap_lb; // upper/lower bounds for the gaps in the pattern
40
+ LONG *sc; // #itemsets classified by the sizes
41
+ QUEUE_INT item_max, item_max_org; // (original) maximum item
42
+ AHEAP topk; // heap for topk mining. valid if topk->h is not NULL
43
+ int flag; // flag for various functions
44
+ PERM *perm; // permutation array for output itemset: item => original item
45
+ FILE *fp; // file pointer to the output file
46
+ char separator; // separator of items output
47
+ int progress;
48
+ LONG iters, iters2, iters3; //iterations
49
+ LONG solutions, solutions2; // number of solutions output
50
+ LONG outputs, outputs2; // #calls of ITEMSET_output_itemset or ITEMSET_solusion
51
+ LONG max_solutions; // maximum solutions to be output
52
+ void *X; // pointer to the original data
53
+ int dir; // direction flag for AGRAPH & SGRAPH
54
+
55
+ int multi_core; // number of processors
56
+ LONG *multi_iters, *multi_iters2, *multi_iters3; //iterations
57
+ LONG *multi_solutions, *multi_solutions2; // number of solutions output
58
+ LONG *multi_outputs, *multi_outputs2; // #calls of ITEMSET_output_itemset or ITEMSET_solusion
59
+ FILE2 *multi_fp; // output file2 pointer for multi-core mode
60
+ WEIGHT *set_weight; // the frequency of each prefix of current itemset
61
+ QUEUE **set_occ; // the occurrence of each prefix of current itemset
62
+
63
+ #ifdef MULTI_CORE
64
+ pthread_spinlock_t lock_counter; // couneter locker for jump counter
65
+ pthread_spinlock_t lock_sc; // couneter locker for score counter
66
+ pthread_spinlock_t lock_output; // couneter locker for #output
67
+ #endif
68
+ } ITEMSET;
69
+
70
+ /* parameters for ITEMSET.flag */
71
+
72
+ #define ITEMSET_ITERS2 4 // output #iters2
73
+ #define ITEMSET_PRE_FREQ 8 // output frequency preceding to each itemset
74
+ #define ITEMSET_FREQ 16 // output frequency following to each itemset
75
+ #define ITEMSET_ALL 32 // concat all combinations of "add" to each itemset
76
+
77
+ #define ITEMSET_TRSACT_ID 64 // output transaction ID's in occurrences
78
+ #define ITEMSET_OUTPUT_EDGE 128 // output itemset as edge set (refer AGRAPH)
79
+ #define ITEMSET_IGNORE_BOUND 256 // ignore constraint for frequency
80
+ #define ITEMSET_RM_DUP_TRSACT 512 // remove duplicated transaction ID's
81
+ #define ITEMSET_MULTI_OCC_PRINT 1024 //print each component of occ
82
+ // TRSACT_ID+MULTI_OCC_PRINT means print first two components of occ
83
+ #define ITEMSET_NOT_ITEMSET 2048 // do not print itemset to the output file
84
+ #define ITEMSET_RULE_SUPP 4096 // output confidence and item frquency by abusolute value
85
+ #define ITEMSET_OUTPUT_POSINEGA 8192 // output negative/positive frequencies
86
+ #define ITEMSET_MULTI_OUTPUT 16384 // for multi-core mode
87
+ #define ITEMSET_USE_ORG 32768 // use item_max_org to the size of use
88
+ #define ITEMSET_ITEMFRQ 65536 // allocate item_frq
89
+ #define ITEMSET_ADD 131072 // allocate add
90
+
91
+ #define ITEMSET_RULE_FRQ 262144
92
+ #define ITEMSET_RULE_INFRQ 524288
93
+ #define ITEMSET_RULE_RFRQ 1048576
94
+ #define ITEMSET_RULE_RINFRQ 2097152
95
+ #define ITEMSET_RFRQ 4194304
96
+ #define ITEMSET_RINFRQ 8388608
97
+ #define ITEMSET_POSI_RATIO 16777216
98
+ #define ITEMSET_SET_RULE 134217728
99
+
100
+ #define ITEMSET_APPEND 268435456 // append the output to the fiile
101
+ #define ITEMSET_RULE_ADD 536870912 // append items in add to the solution, for rule output
102
+
103
+ //#define ITEMSET_RULE (ITEMSET_RULE_FRQ + ITEMSET_RULE_INFRQ + ITEMSET_RULE_RFRQ + ITEMSET_RULE_RINFRQ + ITEMSET_RFRQ + ITEMSET_RINFRQ + ITEMSET_SET_RULE) // for check any rule is true
104
+ #define ITEMSET_RULE (ITEMSET_RULE_FRQ + ITEMSET_RULE_INFRQ + ITEMSET_RULE_RFRQ + ITEMSET_RULE_RINFRQ + ITEMSET_SET_RULE) // for check any rule is true
105
+
106
+ #ifndef ITEMSET_INTERVAL
107
+ #define ITEMSET_INTERVAL 500000
108
+ #endif
109
+
110
+ /* Output information about ITEMSET structure. flag&1: print frequency constraint */
111
+ void ITEMSET_print (ITEMSET *II, int flag);
112
+
113
+ /* topk.end>0 => initialize heap for topk mining */
114
+ /* all pointers will be set to 0, but not for */
115
+ /* if topK mining, set topk.end to "K" */
116
+ void ITEMSET_init (ITEMSET *I);
117
+ void ITEMSET_alloc (ITEMSET *I, char *fname, PERM *perm, QUEUE_INT item_max, size_t item_max_org);
118
+ void ITEMSET_end (ITEMSET *I);
119
+
120
+ /* sum the counters computed by each thread */
121
+ void ITEMSET_merge_counters (ITEMSET *I);
122
+
123
+ /*******************************************************************/
124
+ /* output at the termination of the algorithm */
125
+ /* print #of itemsets of size k, for each k */
126
+ /*******************************************************************/
127
+ void ITEMSET_last_output (ITEMSET *I);
128
+
129
+ /* output frequency, coverage */
130
+ void ITEMSET_output_frequency (ITEMSET *I, int core_id);
131
+
132
+ /* output an itemset to the output file */
133
+ void ITEMSET_output_itemset (ITEMSET *I, QUEUE *occ, int core_id);
134
+
135
+ /* output itemsets with adding all combination of "add"
136
+ at the first call, i has to be "add->t" */
137
+ void ITEMSET_solution (ITEMSET *I, QUEUE *occ, int core_id);
138
+
139
+ /*************************************************************************/
140
+ /* ourput a rule */
141
+ /*************************************************************************/
142
+ void ITEMSET_output_rule (ITEMSET *I, QUEUE *occ, double p1, double p2, size_t item, int core_id);
143
+
144
+ /*************************************************************************/
145
+ /* check all rules for a pair of itemset and item */
146
+ /*************************************************************************/
147
+ void ITEMSET_check_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, size_t item, int core_id);
148
+
149
+ /*************************************************************************/
150
+ /* check all rules for an itemset and all items */
151
+ /*************************************************************************/
152
+ void ITEMSET_check_all_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, QUEUE *jump, WEIGHT total, int core_id);
153
+
154
+ #endif
155
+
156
+
157
+
158
+