nysol-take 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. checksums.yaml +7 -0
  2. data/bin/mbiclique.rb +317 -0
  3. data/bin/mbipolish.rb +362 -0
  4. data/bin/mccomp.rb +235 -0
  5. data/bin/mclique.rb +295 -0
  6. data/bin/mclique2g.rb +105 -0
  7. data/bin/mcliqueInfo.rb +203 -0
  8. data/bin/mfriends.rb +202 -0
  9. data/bin/mgdiff.rb +252 -0
  10. data/bin/mhifriend.rb +456 -0
  11. data/bin/mhipolish.rb +465 -0
  12. data/bin/mitemset.rb +168 -0
  13. data/bin/mpal.rb +410 -0
  14. data/bin/mpolishing.rb +399 -0
  15. data/bin/msequence.rb +165 -0
  16. data/bin/mtra2g.rb +476 -0
  17. data/bin/mtra2gc.rb +360 -0
  18. data/ext/grhfilrun/extconf.rb +12 -0
  19. data/ext/grhfilrun/grhfilrun.c +85 -0
  20. data/ext/grhfilrun/src/_sspc.c +358 -0
  21. data/ext/grhfilrun/src/aheap.c +545 -0
  22. data/ext/grhfilrun/src/aheap.h +251 -0
  23. data/ext/grhfilrun/src/base.c +92 -0
  24. data/ext/grhfilrun/src/base.h +59 -0
  25. data/ext/grhfilrun/src/fstar.c +497 -0
  26. data/ext/grhfilrun/src/fstar.h +80 -0
  27. data/ext/grhfilrun/src/grhfil.c +214 -0
  28. data/ext/grhfilrun/src/itemset.c +713 -0
  29. data/ext/grhfilrun/src/itemset.h +170 -0
  30. data/ext/grhfilrun/src/problem.c +415 -0
  31. data/ext/grhfilrun/src/problem.h +179 -0
  32. data/ext/grhfilrun/src/queue.c +533 -0
  33. data/ext/grhfilrun/src/queue.h +182 -0
  34. data/ext/grhfilrun/src/sample.c +19 -0
  35. data/ext/grhfilrun/src/sspc.c +597 -0
  36. data/ext/grhfilrun/src/sspc2.c +491 -0
  37. data/ext/grhfilrun/src/stdlib2.c +1482 -0
  38. data/ext/grhfilrun/src/stdlib2.h +892 -0
  39. data/ext/grhfilrun/src/trsact.c +817 -0
  40. data/ext/grhfilrun/src/trsact.h +160 -0
  41. data/ext/grhfilrun/src/vec.c +745 -0
  42. data/ext/grhfilrun/src/vec.h +172 -0
  43. data/ext/lcmrun/extconf.rb +20 -0
  44. data/ext/lcmrun/lcmrun.cpp +99 -0
  45. data/ext/lcmrun/src/aheap.c +216 -0
  46. data/ext/lcmrun/src/aheap.h +111 -0
  47. data/ext/lcmrun/src/base.c +92 -0
  48. data/ext/lcmrun/src/base.h +59 -0
  49. data/ext/lcmrun/src/itemset.c +496 -0
  50. data/ext/lcmrun/src/itemset.h +157 -0
  51. data/ext/lcmrun/src/lcm.c +427 -0
  52. data/ext/lcmrun/src/problem.c +349 -0
  53. data/ext/lcmrun/src/problem.h +177 -0
  54. data/ext/lcmrun/src/queue.c +528 -0
  55. data/ext/lcmrun/src/queue.h +176 -0
  56. data/ext/lcmrun/src/sgraph.c +359 -0
  57. data/ext/lcmrun/src/sgraph.h +173 -0
  58. data/ext/lcmrun/src/stdlib2.c +1282 -0
  59. data/ext/lcmrun/src/stdlib2.h +823 -0
  60. data/ext/lcmrun/src/trsact.c +747 -0
  61. data/ext/lcmrun/src/trsact.h +159 -0
  62. data/ext/lcmrun/src/vec.c +731 -0
  63. data/ext/lcmrun/src/vec.h +171 -0
  64. data/ext/lcmseq0run/extconf.rb +20 -0
  65. data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
  66. data/ext/lcmseq0run/src/aheap.c +216 -0
  67. data/ext/lcmseq0run/src/aheap.h +111 -0
  68. data/ext/lcmseq0run/src/base.c +92 -0
  69. data/ext/lcmseq0run/src/base.h +59 -0
  70. data/ext/lcmseq0run/src/itemset.c +518 -0
  71. data/ext/lcmseq0run/src/itemset.h +157 -0
  72. data/ext/lcmseq0run/src/itemset_zero.c +522 -0
  73. data/ext/lcmseq0run/src/lcm_seq.c +446 -0
  74. data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
  75. data/ext/lcmseq0run/src/problem.c +439 -0
  76. data/ext/lcmseq0run/src/problem.h +179 -0
  77. data/ext/lcmseq0run/src/problem_zero.c +439 -0
  78. data/ext/lcmseq0run/src/queue.c +533 -0
  79. data/ext/lcmseq0run/src/queue.h +182 -0
  80. data/ext/lcmseq0run/src/stdlib2.c +1350 -0
  81. data/ext/lcmseq0run/src/stdlib2.h +864 -0
  82. data/ext/lcmseq0run/src/trsact.c +747 -0
  83. data/ext/lcmseq0run/src/trsact.h +159 -0
  84. data/ext/lcmseq0run/src/vec.c +779 -0
  85. data/ext/lcmseq0run/src/vec.h +172 -0
  86. data/ext/lcmseqrun/extconf.rb +20 -0
  87. data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
  88. data/ext/lcmseqrun/src/aheap.c +216 -0
  89. data/ext/lcmseqrun/src/aheap.h +111 -0
  90. data/ext/lcmseqrun/src/base.c +92 -0
  91. data/ext/lcmseqrun/src/base.h +59 -0
  92. data/ext/lcmseqrun/src/itemset.c +518 -0
  93. data/ext/lcmseqrun/src/itemset.h +157 -0
  94. data/ext/lcmseqrun/src/itemset_zero.c +522 -0
  95. data/ext/lcmseqrun/src/lcm_seq.c +447 -0
  96. data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
  97. data/ext/lcmseqrun/src/problem.c +439 -0
  98. data/ext/lcmseqrun/src/problem.h +179 -0
  99. data/ext/lcmseqrun/src/problem_zero.c +439 -0
  100. data/ext/lcmseqrun/src/queue.c +533 -0
  101. data/ext/lcmseqrun/src/queue.h +182 -0
  102. data/ext/lcmseqrun/src/stdlib2.c +1350 -0
  103. data/ext/lcmseqrun/src/stdlib2.h +864 -0
  104. data/ext/lcmseqrun/src/trsact.c +747 -0
  105. data/ext/lcmseqrun/src/trsact.h +159 -0
  106. data/ext/lcmseqrun/src/vec.c +779 -0
  107. data/ext/lcmseqrun/src/vec.h +172 -0
  108. data/ext/lcmtransrun/extconf.rb +18 -0
  109. data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
  110. data/ext/macerun/extconf.rb +20 -0
  111. data/ext/macerun/macerun.cpp +57 -0
  112. data/ext/macerun/src/aheap.c +217 -0
  113. data/ext/macerun/src/aheap.h +112 -0
  114. data/ext/macerun/src/itemset.c +491 -0
  115. data/ext/macerun/src/itemset.h +158 -0
  116. data/ext/macerun/src/mace.c +503 -0
  117. data/ext/macerun/src/problem.c +346 -0
  118. data/ext/macerun/src/problem.h +174 -0
  119. data/ext/macerun/src/queue.c +529 -0
  120. data/ext/macerun/src/queue.h +177 -0
  121. data/ext/macerun/src/sgraph.c +360 -0
  122. data/ext/macerun/src/sgraph.h +174 -0
  123. data/ext/macerun/src/stdlib2.c +993 -0
  124. data/ext/macerun/src/stdlib2.h +811 -0
  125. data/ext/macerun/src/vec.c +634 -0
  126. data/ext/macerun/src/vec.h +170 -0
  127. data/ext/sspcrun/extconf.rb +20 -0
  128. data/ext/sspcrun/src/_sspc.c +358 -0
  129. data/ext/sspcrun/src/aheap.c +545 -0
  130. data/ext/sspcrun/src/aheap.h +251 -0
  131. data/ext/sspcrun/src/base.c +92 -0
  132. data/ext/sspcrun/src/base.h +59 -0
  133. data/ext/sspcrun/src/fstar.c +496 -0
  134. data/ext/sspcrun/src/fstar.h +80 -0
  135. data/ext/sspcrun/src/grhfil.c +213 -0
  136. data/ext/sspcrun/src/itemset.c +713 -0
  137. data/ext/sspcrun/src/itemset.h +170 -0
  138. data/ext/sspcrun/src/problem.c +415 -0
  139. data/ext/sspcrun/src/problem.h +179 -0
  140. data/ext/sspcrun/src/queue.c +533 -0
  141. data/ext/sspcrun/src/queue.h +182 -0
  142. data/ext/sspcrun/src/sample.c +19 -0
  143. data/ext/sspcrun/src/sspc.c +598 -0
  144. data/ext/sspcrun/src/sspc2.c +491 -0
  145. data/ext/sspcrun/src/stdlib2.c +1482 -0
  146. data/ext/sspcrun/src/stdlib2.h +892 -0
  147. data/ext/sspcrun/src/trsact.c +817 -0
  148. data/ext/sspcrun/src/trsact.h +160 -0
  149. data/ext/sspcrun/src/vec.c +745 -0
  150. data/ext/sspcrun/src/vec.h +172 -0
  151. data/ext/sspcrun/sspcrun.cpp +54 -0
  152. data/lib/nysol/enumLcmEp.rb +338 -0
  153. data/lib/nysol/enumLcmEsp.rb +284 -0
  154. data/lib/nysol/enumLcmIs.rb +275 -0
  155. data/lib/nysol/enumLcmSeq.rb +143 -0
  156. data/lib/nysol/items.rb +201 -0
  157. data/lib/nysol/seqDB.rb +256 -0
  158. data/lib/nysol/take.rb +39 -0
  159. data/lib/nysol/taxonomy.rb +113 -0
  160. data/lib/nysol/traDB.rb +257 -0
  161. metadata +239 -0
@@ -0,0 +1,157 @@
1
+ /* itemset search input/output common routines
2
+ 25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, please
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about the codes for the users. */
11
+
12
+ /* routines for itemset mining */
13
+
14
+ #ifndef _itemset_h_
15
+ #define _itemset_h_
16
+
17
+ #include"stdlib2.h"
18
+ #include"queue.h"
19
+ #define AHEAP_KEY_WEIGHT
20
+ #include"aheap.h"
21
+
22
+
23
+ typedef struct {
24
+ int a;
25
+ QUEUE itemset; // current operating itemset
26
+ QUEUE add; // for equisupport (hypercube decomposition)
27
+ int ub, lb; // upper/lower bounds for the itemset size
28
+ WEIGHT frq, pfrq, frq_ub, frq_lb; // upper/lower bounds for the frequency
29
+ WEIGHT rposi_lb, rposi_ub, posi_lb, posi_ub, nega_ub, nega_lb; // upper/lower bounds for the sum of positive/negative weights
30
+ WEIGHT setrule_lb; // frequency lower bound for set rule
31
+ double ratio, prob; // confidence and independent probability of the current pattern
32
+ double ratio_ub, ratio_lb, prob_ub, prob_lb; // upper/lower bounds for confidence and independent probability
33
+ QUEUE_INT target; // target item for rule mining
34
+ char *itemflag; // 1 if it is include in the pattern (and 2 if included in add)
35
+ WEIGHT *item_frq; // frequency of each item
36
+ WEIGHT total_weight; // total weight of the input database
37
+ int len_ub, len_lb; // upper/lower bounds for the length of the pattern
38
+ int gap_ub, gap_lb; // upper/lower bounds for the gaps in the pattern
39
+ LONG *sc; // #itemsets classified by the sizes
40
+ QUEUE_INT item_max, item_max_org; // (original) maximum item
41
+ AHEAP topk; // heap for topk mining. valid if topk->h is not NULL
42
+ int flag; // flag for various functions
43
+ PERM *perm; // permutation array for output itemset: item => original item
44
+ FILE *fp; // file pointer to the output file
45
+ char separator; // separator of items output
46
+ int progress;
47
+ LONG iters, iters2, iters3; //iterations
48
+ LONG solutions, solutions2; // number of solutions output
49
+ LONG outputs, outputs2; // #calls of ITEMSET_output_itemset or ITEMSET_solusion
50
+ LONG max_solutions; // maximum solutions to be output
51
+ void *X; // pointer to the original data
52
+ int dir; // direction flag for AGRAPH & SGRAPH
53
+
54
+ int multi_core; // number of processors
55
+ LONG *multi_iters, *multi_iters2, *multi_iters3; //iterations
56
+ LONG *multi_solutions, *multi_solutions2; // number of solutions output
57
+ LONG *multi_outputs, *multi_outputs2; // #calls of ITEMSET_output_itemset or ITEMSET_solusion
58
+ FILE2 *multi_fp; // output file2 pointer for multi-core mode
59
+ WEIGHT *set_weight; // the frequency of each prefix of current itemset
60
+ QUEUE **set_occ; // the occurrence of each prefix of current itemset
61
+
62
+ #ifdef MULTI_CORE
63
+ pthread_spinlock_t lock_counter; // couneter locker for jump counter
64
+ pthread_spinlock_t lock_sc; // couneter locker for score counter
65
+ pthread_spinlock_t lock_output; // couneter locker for #output
66
+ #endif
67
+ } ITEMSET;
68
+
69
+ /* parameters for ITEMSET.flag */
70
+
71
+ #define ITEMSET_ITERS2 4 // output #iters2
72
+ #define ITEMSET_PRE_FREQ 8 // output frequency preceding to each itemset
73
+ #define ITEMSET_FREQ 16 // output frequency following to each itemset
74
+ #define ITEMSET_ALL 32 // concat all combinations of "add" to each itemset
75
+
76
+ #define ITEMSET_TRSACT_ID 64 // output transaction ID's in occurrences
77
+ #define ITEMSET_OUTPUT_EDGE 128 // output itemset as edge set (refer AGRAPH)
78
+ #define ITEMSET_IGNORE_BOUND 256 // ignore constraint for frequency
79
+ #define ITEMSET_RM_DUP_TRSACT 512 // remove duplicated transaction ID's
80
+ #define ITEMSET_MULTI_OCC_PRINT 1024 //print each component of occ
81
+ // TRSACT_ID+MULTI_OCC_PRINT means print first two components of occ
82
+ #define ITEMSET_NOT_ITEMSET 2048 // do not print itemset to the output file
83
+ #define ITEMSET_RULE_SUPP 4096 // output confidence and item frquency by abusolute value
84
+ #define ITEMSET_OUTPUT_POSINEGA 8192 // output negative/positive frequencies
85
+ #define ITEMSET_MULTI_OUTPUT 16384 // for multi-core mode
86
+ #define ITEMSET_USE_ORG 32768 // use item_max_org to the size of use
87
+ #define ITEMSET_ITEMFRQ 65536 // allocate item_frq
88
+ #define ITEMSET_ADD 131072 // allocate add
89
+
90
+ #define ITEMSET_RULE_FRQ 262144
91
+ #define ITEMSET_RULE_INFRQ 524288
92
+ #define ITEMSET_RULE_RFRQ 1048576
93
+ #define ITEMSET_RULE_RINFRQ 2097152
94
+ #define ITEMSET_RFRQ 4194304
95
+ #define ITEMSET_RINFRQ 8388608
96
+ #define ITEMSET_POSI_RATIO 16777216
97
+ #define ITEMSET_SET_RULE 134217728
98
+
99
+ #define ITEMSET_APPEND 268435456 // append the output to the fiile
100
+ #define ITEMSET_RULE_ADD 536870912 // append items in add to the solution, for rule output
101
+
102
+ //#define ITEMSET_RULE (ITEMSET_RULE_FRQ + ITEMSET_RULE_INFRQ + ITEMSET_RULE_RFRQ + ITEMSET_RULE_RINFRQ + ITEMSET_RFRQ + ITEMSET_RINFRQ + ITEMSET_SET_RULE) // for check any rule is true
103
+ #define ITEMSET_RULE (ITEMSET_RULE_FRQ + ITEMSET_RULE_INFRQ + ITEMSET_RULE_RFRQ + ITEMSET_RULE_RINFRQ + ITEMSET_SET_RULE) // for check any rule is true
104
+
105
+ #ifndef ITEMSET_INTERVAL
106
+ #define ITEMSET_INTERVAL 500000
107
+ #endif
108
+
109
+ /* Output information about ITEMSET structure. flag&1: print frequency constraint */
110
+ void ITEMSET_print (ITEMSET *II, int flag);
111
+
112
+ /* topk.end>0 => initialize heap for topk mining */
113
+ /* all pointers will be set to 0, but not for */
114
+ /* if topK mining, set topk.end to "K" */
115
+ void ITEMSET_init (ITEMSET *I);
116
+ void ITEMSET_alloc (ITEMSET *I, char *fname, PERM *perm, QUEUE_INT item_max, size_t item_max_org);
117
+ void ITEMSET_end (ITEMSET *I);
118
+
119
+ /* sum the counters computed by each thread */
120
+ void ITEMSET_merge_counters (ITEMSET *I);
121
+
122
+ /*******************************************************************/
123
+ /* output at the termination of the algorithm */
124
+ /* print #of itemsets of size k, for each k */
125
+ /*******************************************************************/
126
+ void ITEMSET_last_output (ITEMSET *I);
127
+
128
+ /* output frequency, coverage */
129
+ void ITEMSET_output_frequency (ITEMSET *I, int core_id);
130
+
131
+ /* output an itemset to the output file */
132
+ void ITEMSET_output_itemset (ITEMSET *I, QUEUE *occ, int core_id);
133
+
134
+ /* output itemsets with adding all combination of "add"
135
+ at the first call, i has to be "add->t" */
136
+ void ITEMSET_solution (ITEMSET *I, QUEUE *occ, int core_id);
137
+
138
+ /*************************************************************************/
139
+ /* ourput a rule */
140
+ /*************************************************************************/
141
+ void ITEMSET_output_rule (ITEMSET *I, QUEUE *occ, double p1, double p2, size_t item, int core_id);
142
+
143
+ /*************************************************************************/
144
+ /* check all rules for a pair of itemset and item */
145
+ /*************************************************************************/
146
+ void ITEMSET_check_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, size_t item, int core_id);
147
+
148
+ /*************************************************************************/
149
+ /* check all rules for an itemset and all items */
150
+ /*************************************************************************/
151
+ void ITEMSET_check_all_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, QUEUE *jump, WEIGHT total, int core_id);
152
+
153
+ #endif
154
+
155
+
156
+
157
+
@@ -0,0 +1,522 @@
1
+ /* itemset search input/output common routines
2
+ 25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, please
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about the codes for the users. */
11
+
12
+ /* routines for itemset mining */
13
+
14
+ #ifndef _itemset_c_
15
+ #define _itemset_c_
16
+
17
+ #include"itemset.h"
18
+ #include"queue.c"
19
+ #include"aheap.c"
20
+
21
+ /* flush the write buffer, available for multi-core mode */
22
+ void ITEMSET_flush (ITEMSET *I, FILE2 *fp){
23
+ if ( !(I->flag&ITEMSET_MULTI_OUTPUT) || (fp->buf-fp->buf_org) > FILE2_BUFSIZ/2 ){
24
+ SPIN_LOCK(I->multi_core, I->lock_output);
25
+ FILE2_flush (fp);
26
+ SPIN_UNLOCK(I->multi_core, I->lock_output);
27
+ }
28
+ }
29
+
30
+ /* Output information about ITEMSET structure. flag&1: print frequency constraint */
31
+ void ITEMSET_print (ITEMSET *I, int flag){
32
+ if ( I->lb>0 || I->ub<INTHUGE ){
33
+ if ( I->lb > 0 ) print_err ("%d <= ", I->lb);
34
+ print_err ("itemsets ");
35
+ if ( I->ub < INTHUGE ) print_err (" <= %d\n", I->ub);
36
+ print_err ("\n");
37
+ }
38
+ if ( flag&1 ){
39
+ if ( I->frq_lb > -WEIGHTHUGE ) print_err (WEIGHTF" <=", I->frq_lb);
40
+ print_err (" frequency ");
41
+ if ( I->frq_ub < WEIGHTHUGE ) print_err (" <="WEIGHTF, I->frq_ub);
42
+ print_err ("\n");
43
+ }
44
+ }
45
+
46
+ /* ITEMSET initialization */
47
+ void ITEMSET_init (ITEMSET *I){
48
+ I->flag = 0;
49
+ I->progress = 0;
50
+ I->iters = I->iters2 = I->iters3 = 0;
51
+ I->solutions = I->solutions2 = I->max_solutions = I->outputs = I->outputs2 = 0;
52
+ I->topk.end = 0;
53
+ I->item_max = I->item_max_org = 0;
54
+ I->ub = I->len_ub = I->gap_ub = INTHUGE;
55
+ I->lb = I->len_lb = I->gap_lb = 0;
56
+ I->frq = I->pfrq = I->total_weight = 0;
57
+ I->ratio = I->prob = 0.0;
58
+ I->posi_ub = I->nega_ub = I->frq_ub = WEIGHTHUGE;
59
+ I->posi_lb = I->nega_lb = I->frq_lb = I->setrule_lb = -WEIGHTHUGE;
60
+ I->dir = 0;
61
+ I->target = INTHUGE;
62
+ I->prob_ub = I->ratio_ub = I->rposi_ub = 1;
63
+ I->prob_lb = I->ratio_lb = I->rposi_lb = 0;
64
+ I->itemflag = NULL;
65
+ I->perm = NULL;
66
+ I->item_frq = NULL;
67
+ I->sc = NULL;
68
+ I->X = NULL;
69
+ I->fp = NULL;
70
+ I->separator = ' ';
71
+ I->topk = INIT_AHEAP;
72
+ I->itemset = I->add = INIT_QUEUE;
73
+ I->set_weight = NULL;
74
+ I->set_occ = NULL;
75
+
76
+ I->multi_iters = I->multi_iters2 = I->multi_iters3 = NULL;
77
+ I->multi_outputs = I->multi_outputs2 = NULL;
78
+ I->multi_solutions = I->multi_solutions2 = NULL;
79
+ I->multi_fp = NULL;
80
+
81
+ I->multi_core = 0;
82
+ }
83
+
84
+
85
+ /* second initialization
86
+ topk.end>0 => initialize heap for topk mining */
87
+ /* all pointers will be set to 0, but not for */
88
+ /* if topK mining, set topk.end to "K" */
89
+ void ITEMSET_alloc (ITEMSET *I, char *fname, PERM *perm, QUEUE_INT item_max, size_t item_max_org){
90
+ LONG i;
91
+ size_t siz = (I->flag&ITEMSET_USE_ORG)?item_max_org+2: item_max+2;
92
+ int j;
93
+
94
+ I->prob = I->ratio = 1.0;
95
+ I->frq = 0;
96
+ I->perm = perm;
97
+ if ( I->topk.end>0 ){
98
+ AHEAP_alloc (&I->topk, I->topk.end);
99
+ FLOOP (i, 0, I->topk.end) AHEAP_chg (&I->topk, (AHEAP_ID)i, -WEIGHTHUGE);
100
+ I->frq_lb = -WEIGHTHUGE;
101
+ } else I->topk.v = NULL;
102
+ QUEUE_alloc (&I->itemset, (QUEUE_ID)siz); I->itemset.end = (QUEUE_ID)siz;
103
+ if ( I->flag&ITEMSET_ADD ) QUEUE_alloc (&I->add, (QUEUE_ID)siz);
104
+ calloc2 (I->sc, siz+2, goto ERR);
105
+ if ( I->flag&ITEMSET_SET_RULE ){
106
+ calloc2 (I->set_weight, siz, goto ERR);
107
+ if ( I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT) )
108
+ calloc2 (I->set_occ, siz, goto ERR);
109
+ }
110
+ I->iters = I->iters2 = I->solutions = 0;
111
+ I->item_max = item_max;
112
+ I->item_max_org = (QUEUE_INT)item_max_org;
113
+ if ( fname ){
114
+ #ifdef _FILE2_LOAD_FROM_MEMORY_
115
+ I->fp = (FILE *)1;
116
+ #else
117
+ if ( strcmp (fname, "-") == 0 ) I->fp = stdout;
118
+ else fopen2 (I->fp, fname, (I->flag&ITEMSET_APPEND)?"a":"w", goto ERR);
119
+ #endif
120
+ } else I->fp = 0;
121
+ if ( I->flag&ITEMSET_ITEMFRQ )
122
+ malloc2 (I->item_frq, item_max+2, goto ERR);
123
+ if ( I->flag&ITEMSET_RULE ){
124
+ calloc2 (I->itemflag, item_max+2, goto ERR);
125
+ }
126
+ I->total_weight = 1;
127
+ j = MAX(I->multi_core,1);
128
+ calloc2 (I->multi_iters, j*7, goto ERR);
129
+ I->multi_iters2 = I->multi_iters + j;
130
+ I->multi_iters3 = I->multi_iters2 + j;
131
+ I->multi_outputs = I->multi_iters3 + j;
132
+ I->multi_outputs2 = I->multi_outputs + j;
133
+ I->multi_solutions = I->multi_outputs2 + j;
134
+ I->multi_solutions2 = I->multi_solutions + j;
135
+
136
+ calloc2 (I->multi_fp, j, goto ERR);
137
+ FLOOP (i, 0, j)
138
+ FILE2_open_ (I->multi_fp[i], I->fp, goto ERR);
139
+ #ifdef MULTI_CORE
140
+ if ( I->multi_core > 0 ){
141
+ pthread_spin_init (&I->lock_counter, PTHREAD_PROCESS_PRIVATE);
142
+ pthread_spin_init (&I->lock_sc, PTHREAD_PROCESS_PRIVATE);
143
+ pthread_spin_init (&I->lock_output, PTHREAD_PROCESS_PRIVATE);
144
+ }
145
+ #endif
146
+ return;
147
+ ERR:;
148
+ ITEMSET_end (I);
149
+ EXIT;
150
+ }
151
+
152
+ /* sum the counters computed by each thread */
153
+ void ITEMSET_merge_counters (ITEMSET *I){
154
+ int i;
155
+ FLOOP (i, 0, MAX(I->multi_core,1)){
156
+ I->iters += I->multi_iters[i];
157
+ I->iters2 += I->multi_iters2[i];
158
+ I->iters3 += I->multi_iters3[i];
159
+ I->outputs += I->multi_outputs[i];
160
+ I->outputs2 += I->multi_outputs2[i];
161
+ I->solutions += I->multi_solutions[i];
162
+ I->solutions2 += I->multi_solutions2[i];
163
+ if ( I->multi_fp[i].buf ) FILE2_flush_last (&I->multi_fp[i]);
164
+ }
165
+ ARY_FILL (I->multi_iters, 0, MAX(I->multi_core,1)*7, 0);
166
+ }
167
+
168
+ /*******************************************************************/
169
+ /* termination of ITEMSET */
170
+ /*******************************************************************/
171
+ void ITEMSET_end (ITEMSET *I){
172
+ int i;
173
+ QUEUE_end (&I->itemset);
174
+ QUEUE_end (&I->add);
175
+ AHEAP_end (&I->topk);
176
+ #ifndef _FILE2_LOAD_FROM_MEMORY_
177
+ fclose2 (I->fp);
178
+ #endif
179
+ mfree (I->sc, I->item_frq, I->itemflag, I->perm, I->set_weight, I->set_occ);
180
+
181
+ if ( I->multi_fp )
182
+ FLOOP (i, 0, MAX(I->multi_core,1)) free2 (I->multi_fp[i].buf_org);
183
+ mfree (I->multi_iters, I->multi_fp);
184
+ #ifdef MULTI_CORE
185
+ if ( I->multi_core>0 ){
186
+ pthread_spin_destroy(&I->lock_counter);
187
+ pthread_spin_destroy(&I->lock_sc);
188
+ pthread_spin_destroy(&I->lock_output);
189
+ }
190
+ #endif
191
+ ITEMSET_init (I);
192
+ }
193
+
194
+ /*******************************************************************/
195
+ /* output at the termination of the algorithm */
196
+ /* print #of itemsets of size k, for each k */
197
+ /*******************************************************************/
198
+ void ITEMSET_last_output (ITEMSET *I){
199
+ QUEUE_ID i;
200
+ LONG n=0, nn=0;
201
+
202
+ ITEMSET_merge_counters (I);
203
+ if ( !(I->flag&SHOW_MESSAGE) ) return; // "no message" is specified
204
+ if ( I->topk.end > 0 ){
205
+ i = AHEAP_findmin_head (&I->topk);
206
+ fprint_WEIGHT (stdout, AHEAP_H (I->topk, i));
207
+ printf ("\n");
208
+ return;
209
+ }
210
+ FLOOP (i, 0, I->itemset.end+1){
211
+ n += I->sc[i];
212
+ if ( I->sc[i] != 0 ) nn = i;
213
+ }
214
+ if ( n!=0 ){
215
+ printf (LONGF "\n", n);
216
+ FLOOP (i, 0, nn+1) printf (LONGF "\n", I->sc[i]);
217
+ }
218
+ print_err ("iters=" LONGF, I->iters);
219
+ if ( I->flag&ITEMSET_ITERS2 ) print_err (", iters2=" LONGF, I->iters2);
220
+ print_err ("\n");
221
+ }
222
+
223
+ /* output frequency, coverage */
224
+ void ITEMSET_output_frequency (ITEMSET *I, int core_id){
225
+ FILE2 *fp = &I->multi_fp[core_id];
226
+ if ( I->flag&(ITEMSET_FREQ+ITEMSET_PRE_FREQ) ){
227
+ if ( I->flag&ITEMSET_FREQ ) FILE2_putc (fp, ' ');
228
+ FILE2_print_WEIGHT (fp, I->frq, 4, '(');
229
+ FILE2_putc (fp, ')');
230
+ if ( I->flag&ITEMSET_PRE_FREQ ) FILE2_putc (fp, ' ');
231
+ }
232
+ if ( I->flag&ITEMSET_OUTPUT_POSINEGA ){ // output positive sum, negative sum in the occurrence
233
+ FILE2_putc (fp, ' ');
234
+ FILE2_print_WEIGHT (fp, I->pfrq, 4, '(');
235
+ FILE2_print_WEIGHT (fp, I->pfrq-I->frq, 4, ',');
236
+ FILE2_print_WEIGHT (fp, I->pfrq/(2*I->pfrq-I->frq), 4, ',');
237
+ FILE2_putc (fp, ')');
238
+ }
239
+ }
240
+
241
+ #ifdef _trsact_h_
242
+ void ITEMSET_output_occ (ITEMSET *I, QUEUE *occ, int core_id){
243
+ QUEUE_ID i;
244
+ QUEUE_INT *x;
245
+ FILE2 *fp = &I->multi_fp[core_id];
246
+ TRSACT *TT = (TRSACT *)(I->X);
247
+ VEC_ID j, ee = TT->rows_org;
248
+ int flag = I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT);
249
+
250
+ i=0; MQUE_FLOOP_ (*occ, x, TT->occ_unit){
251
+ if ( (I->flag&ITEMSET_RM_DUP_TRSACT)==0 || *x != ee ){
252
+ FILE2_print_int (fp, TT->trperm? TT->trperm[*x]: *x, I->separator);
253
+ if (flag == ITEMSET_MULTI_OCC_PRINT ){
254
+ FLOOP (j, 1, (VEC_ID)(TT->occ_unit/sizeof(QUEUE_INT)))
255
+ FILE2_print_int (fp, *(x+j), I->separator);
256
+ } else if ( flag == (ITEMSET_MULTI_OCC_PRINT+ITEMSET_TRSACT_ID) ){
257
+ FILE2_print_int (fp, *(x+1), I->separator);
258
+ }
259
+ }
260
+ ee = *x;
261
+ if ( (++i)%256==0 ) ITEMSET_flush (I, fp);
262
+ }
263
+ #ifdef _FILE2_LOAD_FROM_MEMORY_
264
+ *((int *)__write_to_memory__) = INTHUGE;
265
+ __write_to_memory__ = (char *)(((int *)__write_to_memory__) + 1);
266
+ #else
267
+ FILE2_putc (fp, '\n');
268
+ #endif
269
+ }
270
+ #endif
271
+
272
+ /* output an itemset to the output file */
273
+ void ITEMSET_output_itemset (ITEMSET *I, QUEUE *occ, int core_id){
274
+ QUEUE_ID i;
275
+ QUEUE_INT e;
276
+ #ifdef _agraph_h_
277
+ QUEUE_INT ee;
278
+ #endif
279
+
280
+ FLOOP (i, 0, I->itemset.t){ // add by NYSOL
281
+ if( I->itemset.v[i]==0) return; // add by NYSOL
282
+ } // add by NYSOL
283
+
284
+ FILE2 *fp = &I->multi_fp[core_id];
285
+
286
+ I->multi_outputs[core_id]++;
287
+ if ( (I->flag&SHOW_PROGRESS ) && (I->multi_outputs[core_id]%(ITEMSET_INTERVAL) == 0) )
288
+ print_err ("---- " LONGF " solutions in " LONGF " candidates\n",
289
+ I->multi_solutions[core_id], I->multi_outputs[core_id]);
290
+ if ( I->itemset.t < I->lb || I->itemset.t > I->ub ) return;
291
+ if ( (I->flag&ITEMSET_IGNORE_BOUND)==0 && (I->frq < I->frq_lb || I->frq > I->frq_ub) ) return;
292
+ if ( (I->flag&ITEMSET_IGNORE_BOUND)==0 && (I->pfrq < I->posi_lb || I->pfrq > I->posi_ub || (I->frq - I->pfrq) > I->nega_ub || (I->frq - I->pfrq) < I->nega_lb) ) return;
293
+
294
+ I->multi_solutions[core_id]++;
295
+ if ( I->max_solutions>0 && I->multi_solutions[core_id] > I->max_solutions ){
296
+ ITEMSET_last_output (I);
297
+ ERROR_MES = "reached to maximum number of solutions";
298
+ EXIT;
299
+ }
300
+ if ( I->topk.v ){
301
+ e = AHEAP_findmin_head (&(I->topk));
302
+ if ( I->frq > AHEAP_H (I->topk, e) ){
303
+ AHEAP_chg (&(I->topk), e, I->frq);
304
+ e = AHEAP_findmin_head (&(I->topk));
305
+ I->frq_lb = AHEAP_H (I->topk, e);
306
+ }
307
+ } else if ( I->fp ){
308
+ if ( I->flag&ITEMSET_PRE_FREQ ) ITEMSET_output_frequency (I, core_id);
309
+ if ( (I->flag & ITEMSET_NOT_ITEMSET) == 0 ){
310
+ #ifdef _agraph_h_
311
+ if ( I->flag&ITEMSET_OUTPUT_EDGE ){
312
+ FLOOP (i, 0, I->itemset.t){
313
+ e = I->itemset.v[i];
314
+ ee = AGRAPH_INC_FROM(*((AGRAPH *)(I->X)), e, I->dir);
315
+ FILE2_print_int (fp, I->perm? I->perm[ee]: ee, '(' );
316
+ ee = AGRAPH_INC_TO(*((AGRAPH *)(I->X)), e, I->dir);
317
+ FILE2_print_int (fp, I->perm? I->perm[ee]: ee, I->separator);
318
+ #ifdef _FILE2_LOAD_FROM_MEMORY_
319
+ FILE2_putc (fp, ')');
320
+ #endif
321
+ if ( i<I->itemset.t-1 ) FILE2_putc (fp, I->separator);
322
+ if ( (i+1)%256==0 ) ITEMSET_flush (I, fp);
323
+ }
324
+ goto NEXT;
325
+ }
326
+ #endif
327
+ FLOOP (i, 0, I->itemset.t){
328
+ e = I->itemset.v[i];
329
+ FILE2_print_int (fp, I->perm? I->perm[e]: e, i==0? 0: I->separator);
330
+ if ( (i+1)%256==0 ) ITEMSET_flush (I, fp);
331
+ }
332
+ #ifdef _agraph_h_
333
+ NEXT:;
334
+ #endif
335
+ }
336
+ if ( !(I->flag&ITEMSET_PRE_FREQ) ) ITEMSET_output_frequency (I, core_id);
337
+ if ( ((I->flag & ITEMSET_NOT_ITEMSET) == 0) || (I->flag&ITEMSET_FREQ) || (I->flag&ITEMSET_PRE_FREQ) ){
338
+ #ifdef _FILE2_LOAD_FROM_MEMORY_
339
+ FILE2_WRITE_MEMORY (QUEUE_INT, FILE2_LOAD_FROM_MEMORY_END);
340
+ #else
341
+ FILE2_putc (fp, '\n');
342
+ #endif
343
+ }
344
+ #ifdef _trsact_h_
345
+ if (I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT)) ITEMSET_output_occ (I, occ, core_id);
346
+ #endif
347
+ }
348
+ I->sc[I->itemset.t]++;
349
+ ITEMSET_flush (I, fp);
350
+ }
351
+
352
+ /* output itemsets with adding all combination of "add"
353
+ at the first call, i has to be "add->t" */
354
+ void ITEMSET_solution_iter (ITEMSET *I, QUEUE *occ, int core_id){
355
+ QUEUE_ID t=I->add.t;
356
+ if ( I->itemset.t > I->ub ) return;
357
+ ITEMSET_output_itemset (I, occ, core_id);
358
+ if ( ERROR_MES ) return;
359
+ BLOOP (I->add.t, I->add.t, 0){
360
+ QUE_INS (I->itemset, I->add.v[I->add.t]);
361
+ ITEMSET_solution_iter (I, occ, core_id);
362
+ if ( ERROR_MES ) return;
363
+ I->itemset.t--;
364
+ }
365
+ I->add.t = t;
366
+ }
367
+
368
+ void ITEMSET_solution (ITEMSET *I, QUEUE *occ, int core_id){
369
+ QUEUE_ID i;
370
+ LONG s;
371
+ if ( I->itemset.t > I->ub ) return;
372
+ if ( I->flag & ITEMSET_ALL ){
373
+ if ( I->fp || I->topk.v ) ITEMSET_solution_iter (I, occ, core_id);
374
+ else {
375
+ s=1; FLOOP (i, 0, I->add.t+1){
376
+ I->sc[I->itemset.t+i] += s;
377
+ s = s*(I->add.t-i)/(i+1);
378
+ }
379
+ }
380
+ } else {
381
+ FLOOP (i, 0, I->add.t) QUE_INS (I->itemset, I->add.v[i]);
382
+ ITEMSET_output_itemset (I, occ, core_id);
383
+ I->itemset.t -= I->add.t;
384
+ }
385
+ }
386
+
387
+ /*************************************************************************/
388
+ /* ourput a rule */
389
+ /*************************************************************************/
390
+ void ITEMSET_output_rule (ITEMSET *I, QUEUE *occ, double p1, double p2, size_t item, int core_id){
391
+ FILE2 *fp = &I->multi_fp[core_id];
392
+ if ( fp->fp && !(I->topk.v) ){
393
+ FILE2_print_real (fp, p1, 4, '(');
394
+ FILE2_print_real (fp, p2, 4, ',');
395
+ FILE2_putc (fp, ')');
396
+ FILE2_print_int (fp, I->perm[item], I->separator);
397
+ FILE2_puts (fp, " <= ");
398
+ }
399
+ if ( I->flag & ITEMSET_RULE ){
400
+ if ( I->flag & ITEMSET_RULE_ADD ) ITEMSET_solution (I, occ, core_id);
401
+ else ITEMSET_output_itemset (I, occ, core_id);
402
+ } else ITEMSET_solution (I, occ, core_id);
403
+ }
404
+ /*************************************************************************/
405
+ /* check all rules for a pair of itemset and item */
406
+ /*************************************************************************/
407
+ void ITEMSET_check_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, size_t item, int core_id){
408
+ double p = w[item]/I->frq, pp, ff;
409
+ // printf ("[ratio] %f, p=%f, (%f/ %f), %d(%d) <= ", I->ratio_lb, p, w[item], I->frq, I->perm[item], I->itemflag[item]);
410
+ if ( I->itemflag[item]==1 ) return;
411
+ if ( w[item] <= -WEIGHTHUGE ) p = 0;
412
+ pp = p; ff = I->item_frq[item];
413
+ if ( I->flag & ITEMSET_RULE_SUPP ){ pp = w[item]; ff *= I->total_weight; }
414
+
415
+ if ( I->flag & (ITEMSET_RULE_FRQ+ITEMSET_RULE_INFRQ)){
416
+ if ( (I->flag & ITEMSET_RULE_FRQ) && p < I->ratio_lb ) return;
417
+ if ( (I->flag & ITEMSET_RULE_INFRQ) && p > I->ratio_ub ) return;
418
+ ITEMSET_output_rule (I, occ, p, ff, item, core_id);
419
+ } else if ( I->flag & (ITEMSET_RULE_RFRQ+ITEMSET_RULE_RINFRQ) ){
420
+ if ( (I->flag & ITEMSET_RULE_RFRQ) && (1-p) > I->ratio_lb * (1-I->item_frq[item]) ) return;
421
+ if ( (I->flag & ITEMSET_RULE_RINFRQ) && p > I->ratio_ub * I->item_frq[item] ) return;
422
+ ITEMSET_output_rule (I, occ, pp, ff, item, core_id);
423
+ }
424
+ }
425
+
426
+ /*************************************************************************/
427
+ /* check all rules for an itemset and all items */
428
+ /*************************************************************************/
429
+ void ITEMSET_check_all_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, QUEUE *jump, WEIGHT total, int core_id){
430
+ QUEUE_ID i, t;
431
+ QUEUE_INT e, f=0, *x;
432
+ WEIGHT d = I->frq/total;
433
+
434
+ // checking out of range for itemset size and (posi/nega) frequency
435
+ if ( I->itemset.t+I->add.t < I->lb || I->itemset.t>I->ub || (!(I->flag&ITEMSET_ALL) && I->itemset.t+I->add.t>I->ub)) return;
436
+ if ( !(I->flag&ITEMSET_IGNORE_BOUND) && (I->frq < I->frq_lb || I->frq > I->frq_ub) ) return;
437
+ if ( !(I->flag&ITEMSET_IGNORE_BOUND) && (I->pfrq < I->posi_lb || I->pfrq > I->posi_ub || (I->frq - I->pfrq) > I->nega_ub || (I->frq - I->pfrq) < I->nega_lb) ) return;
438
+
439
+ if ( I->flag&ITEMSET_SET_RULE ){ // itemset->itemset rule for sequence mining
440
+ FLOOP (i, 0, I->itemset.t-1){
441
+ if ( I->frq/I->set_weight[i] >= I->setrule_lb && I->fp ){
442
+ I->sc[i]++;
443
+ if ( I->flag&ITEMSET_PRE_FREQ ) ITEMSET_output_frequency (I, core_id);
444
+ FLOOP (t, 0, I->itemset.t){
445
+ FILE2_print_int (&I->multi_fp[core_id], I->itemset.v[t], t?I->separator:0);
446
+ if ( t == i ){
447
+ FILE2_putc (&I->multi_fp[core_id], ' ');
448
+ FILE2_putc (&I->multi_fp[core_id], '=');
449
+ FILE2_putc (&I->multi_fp[core_id], '>');
450
+ }
451
+ }
452
+ if ( !(I->flag&ITEMSET_PRE_FREQ) ) ITEMSET_output_frequency ( I, core_id);
453
+ FILE2_putc (&I->multi_fp[core_id], ' ');
454
+ FILE2_print_real (&I->multi_fp[core_id], I->frq/I->set_weight[i], 4, '(');
455
+ FILE2_putc (&I->multi_fp[core_id], ')');
456
+ #ifdef _FILE2_LOAD_FROM_MEMORY_
457
+ FILE2_WRITE_MEMORY (QUEUE_INT, FILE2_LOAD_FROM_MEMORY_END);
458
+ #else
459
+ FILE2_putc (&I->multi_fp[core_id], '\n');
460
+ #endif
461
+ #ifdef _trsact_h_
462
+ if ( I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT) ){
463
+ ITEMSET_output_occ (I, I->set_occ[i], core_id);
464
+ }
465
+ #endif
466
+ ITEMSET_flush (I, &I->multi_fp[core_id]);
467
+ }
468
+ }
469
+ }
470
+ // constraint of relational frequency
471
+ if ( ((I->flag&ITEMSET_RFRQ)==0 || d >= I->prob_lb * I->prob )
472
+ && ((I->flag&ITEMSET_RINFRQ)==0 || d <= I->prob * I->prob_ub) ){
473
+ if ( I->flag&ITEMSET_RULE ){ // rule mining routines
474
+ if ( I->itemset.t == 0 ) return;
475
+ if ( I->target < I->item_max ){
476
+ MQUE_FLOOP (*jump, x){
477
+ if ( *x == I->target ){
478
+ ITEMSET_check_rule (I, w, occ, *x, core_id); if (ERROR_MES) return;
479
+ }
480
+ }
481
+ // ITEMSET_check_rule (I, w, occ, I->target, core_id); if (ERROR_MES) return;
482
+ } else {
483
+ if ( I->flag & (ITEMSET_RULE_FRQ + ITEMSET_RULE_RFRQ) ){
484
+ if ( I->add.t>0 ){
485
+ // if ( I->itemflag[I->add.v[0]] ) // for POSI_EQUISUPP (occ_w[e] may not be 100%, in the case)
486
+ f = I->add.v[I->add.t-1]; t = I->add.t; I->add.t--;
487
+ FLOOP (i, 0, t){
488
+ e = I->add.v[i];
489
+ I->add.v[i] = f;
490
+ ITEMSET_check_rule (I, w, occ, e, core_id); if (ERROR_MES) return;
491
+ I->add.v[i] = e;
492
+ }
493
+ I->add.t++;
494
+ }
495
+ MQUE_FLOOP (*jump, x)
496
+ ITEMSET_check_rule (I, w, occ, *x, core_id); if (ERROR_MES) return;
497
+ } else {
498
+ if ( I->flag & (ITEMSET_RULE_INFRQ + ITEMSET_RULE_RINFRQ) ){
499
+ // ARY_FLOOP ( *jump, i, e ) I->itemflag[e]--;
500
+ FLOOP (i, 0, I->item_max){
501
+ if ( I->itemflag[i] != 1 ){
502
+ ITEMSET_check_rule (I, w, occ, i, core_id); if (ERROR_MES) return;
503
+ }
504
+ }
505
+ // ARY_FLOOP ( *jump, i, e ) I->itemflag[e]++;
506
+ // }
507
+ // ARY_FLOOP ( *jump, i, e ) ITEMSET_check_rule (I, w, occ, e);
508
+ }
509
+ }
510
+ }
511
+ } else { // usual mining (not rule mining)
512
+ if ( I->fp && (I->flag&(ITEMSET_RFRQ+ITEMSET_RINFRQ))){
513
+ FILE2_print_real (&I->multi_fp[core_id], d, 4, '[');
514
+ FILE2_print_real (&I->multi_fp[core_id], I->prob, 4, ',');
515
+ FILE2_putc (&I->multi_fp[core_id], ']');
516
+ }
517
+ ITEMSET_solution (I, occ, core_id);
518
+ }
519
+ }
520
+ }
521
+
522
+ #endif