nysol-take 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. checksums.yaml +7 -0
  2. data/bin/mbiclique.rb +317 -0
  3. data/bin/mbipolish.rb +362 -0
  4. data/bin/mccomp.rb +235 -0
  5. data/bin/mclique.rb +295 -0
  6. data/bin/mclique2g.rb +105 -0
  7. data/bin/mcliqueInfo.rb +203 -0
  8. data/bin/mfriends.rb +202 -0
  9. data/bin/mgdiff.rb +252 -0
  10. data/bin/mhifriend.rb +456 -0
  11. data/bin/mhipolish.rb +465 -0
  12. data/bin/mitemset.rb +168 -0
  13. data/bin/mpal.rb +410 -0
  14. data/bin/mpolishing.rb +399 -0
  15. data/bin/msequence.rb +165 -0
  16. data/bin/mtra2g.rb +476 -0
  17. data/bin/mtra2gc.rb +360 -0
  18. data/ext/grhfilrun/extconf.rb +12 -0
  19. data/ext/grhfilrun/grhfilrun.c +85 -0
  20. data/ext/grhfilrun/src/_sspc.c +358 -0
  21. data/ext/grhfilrun/src/aheap.c +545 -0
  22. data/ext/grhfilrun/src/aheap.h +251 -0
  23. data/ext/grhfilrun/src/base.c +92 -0
  24. data/ext/grhfilrun/src/base.h +59 -0
  25. data/ext/grhfilrun/src/fstar.c +497 -0
  26. data/ext/grhfilrun/src/fstar.h +80 -0
  27. data/ext/grhfilrun/src/grhfil.c +214 -0
  28. data/ext/grhfilrun/src/itemset.c +713 -0
  29. data/ext/grhfilrun/src/itemset.h +170 -0
  30. data/ext/grhfilrun/src/problem.c +415 -0
  31. data/ext/grhfilrun/src/problem.h +179 -0
  32. data/ext/grhfilrun/src/queue.c +533 -0
  33. data/ext/grhfilrun/src/queue.h +182 -0
  34. data/ext/grhfilrun/src/sample.c +19 -0
  35. data/ext/grhfilrun/src/sspc.c +597 -0
  36. data/ext/grhfilrun/src/sspc2.c +491 -0
  37. data/ext/grhfilrun/src/stdlib2.c +1482 -0
  38. data/ext/grhfilrun/src/stdlib2.h +892 -0
  39. data/ext/grhfilrun/src/trsact.c +817 -0
  40. data/ext/grhfilrun/src/trsact.h +160 -0
  41. data/ext/grhfilrun/src/vec.c +745 -0
  42. data/ext/grhfilrun/src/vec.h +172 -0
  43. data/ext/lcmrun/extconf.rb +20 -0
  44. data/ext/lcmrun/lcmrun.cpp +99 -0
  45. data/ext/lcmrun/src/aheap.c +216 -0
  46. data/ext/lcmrun/src/aheap.h +111 -0
  47. data/ext/lcmrun/src/base.c +92 -0
  48. data/ext/lcmrun/src/base.h +59 -0
  49. data/ext/lcmrun/src/itemset.c +496 -0
  50. data/ext/lcmrun/src/itemset.h +157 -0
  51. data/ext/lcmrun/src/lcm.c +427 -0
  52. data/ext/lcmrun/src/problem.c +349 -0
  53. data/ext/lcmrun/src/problem.h +177 -0
  54. data/ext/lcmrun/src/queue.c +528 -0
  55. data/ext/lcmrun/src/queue.h +176 -0
  56. data/ext/lcmrun/src/sgraph.c +359 -0
  57. data/ext/lcmrun/src/sgraph.h +173 -0
  58. data/ext/lcmrun/src/stdlib2.c +1282 -0
  59. data/ext/lcmrun/src/stdlib2.h +823 -0
  60. data/ext/lcmrun/src/trsact.c +747 -0
  61. data/ext/lcmrun/src/trsact.h +159 -0
  62. data/ext/lcmrun/src/vec.c +731 -0
  63. data/ext/lcmrun/src/vec.h +171 -0
  64. data/ext/lcmseq0run/extconf.rb +20 -0
  65. data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
  66. data/ext/lcmseq0run/src/aheap.c +216 -0
  67. data/ext/lcmseq0run/src/aheap.h +111 -0
  68. data/ext/lcmseq0run/src/base.c +92 -0
  69. data/ext/lcmseq0run/src/base.h +59 -0
  70. data/ext/lcmseq0run/src/itemset.c +518 -0
  71. data/ext/lcmseq0run/src/itemset.h +157 -0
  72. data/ext/lcmseq0run/src/itemset_zero.c +522 -0
  73. data/ext/lcmseq0run/src/lcm_seq.c +446 -0
  74. data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
  75. data/ext/lcmseq0run/src/problem.c +439 -0
  76. data/ext/lcmseq0run/src/problem.h +179 -0
  77. data/ext/lcmseq0run/src/problem_zero.c +439 -0
  78. data/ext/lcmseq0run/src/queue.c +533 -0
  79. data/ext/lcmseq0run/src/queue.h +182 -0
  80. data/ext/lcmseq0run/src/stdlib2.c +1350 -0
  81. data/ext/lcmseq0run/src/stdlib2.h +864 -0
  82. data/ext/lcmseq0run/src/trsact.c +747 -0
  83. data/ext/lcmseq0run/src/trsact.h +159 -0
  84. data/ext/lcmseq0run/src/vec.c +779 -0
  85. data/ext/lcmseq0run/src/vec.h +172 -0
  86. data/ext/lcmseqrun/extconf.rb +20 -0
  87. data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
  88. data/ext/lcmseqrun/src/aheap.c +216 -0
  89. data/ext/lcmseqrun/src/aheap.h +111 -0
  90. data/ext/lcmseqrun/src/base.c +92 -0
  91. data/ext/lcmseqrun/src/base.h +59 -0
  92. data/ext/lcmseqrun/src/itemset.c +518 -0
  93. data/ext/lcmseqrun/src/itemset.h +157 -0
  94. data/ext/lcmseqrun/src/itemset_zero.c +522 -0
  95. data/ext/lcmseqrun/src/lcm_seq.c +447 -0
  96. data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
  97. data/ext/lcmseqrun/src/problem.c +439 -0
  98. data/ext/lcmseqrun/src/problem.h +179 -0
  99. data/ext/lcmseqrun/src/problem_zero.c +439 -0
  100. data/ext/lcmseqrun/src/queue.c +533 -0
  101. data/ext/lcmseqrun/src/queue.h +182 -0
  102. data/ext/lcmseqrun/src/stdlib2.c +1350 -0
  103. data/ext/lcmseqrun/src/stdlib2.h +864 -0
  104. data/ext/lcmseqrun/src/trsact.c +747 -0
  105. data/ext/lcmseqrun/src/trsact.h +159 -0
  106. data/ext/lcmseqrun/src/vec.c +779 -0
  107. data/ext/lcmseqrun/src/vec.h +172 -0
  108. data/ext/lcmtransrun/extconf.rb +18 -0
  109. data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
  110. data/ext/macerun/extconf.rb +20 -0
  111. data/ext/macerun/macerun.cpp +57 -0
  112. data/ext/macerun/src/aheap.c +217 -0
  113. data/ext/macerun/src/aheap.h +112 -0
  114. data/ext/macerun/src/itemset.c +491 -0
  115. data/ext/macerun/src/itemset.h +158 -0
  116. data/ext/macerun/src/mace.c +503 -0
  117. data/ext/macerun/src/problem.c +346 -0
  118. data/ext/macerun/src/problem.h +174 -0
  119. data/ext/macerun/src/queue.c +529 -0
  120. data/ext/macerun/src/queue.h +177 -0
  121. data/ext/macerun/src/sgraph.c +360 -0
  122. data/ext/macerun/src/sgraph.h +174 -0
  123. data/ext/macerun/src/stdlib2.c +993 -0
  124. data/ext/macerun/src/stdlib2.h +811 -0
  125. data/ext/macerun/src/vec.c +634 -0
  126. data/ext/macerun/src/vec.h +170 -0
  127. data/ext/sspcrun/extconf.rb +20 -0
  128. data/ext/sspcrun/src/_sspc.c +358 -0
  129. data/ext/sspcrun/src/aheap.c +545 -0
  130. data/ext/sspcrun/src/aheap.h +251 -0
  131. data/ext/sspcrun/src/base.c +92 -0
  132. data/ext/sspcrun/src/base.h +59 -0
  133. data/ext/sspcrun/src/fstar.c +496 -0
  134. data/ext/sspcrun/src/fstar.h +80 -0
  135. data/ext/sspcrun/src/grhfil.c +213 -0
  136. data/ext/sspcrun/src/itemset.c +713 -0
  137. data/ext/sspcrun/src/itemset.h +170 -0
  138. data/ext/sspcrun/src/problem.c +415 -0
  139. data/ext/sspcrun/src/problem.h +179 -0
  140. data/ext/sspcrun/src/queue.c +533 -0
  141. data/ext/sspcrun/src/queue.h +182 -0
  142. data/ext/sspcrun/src/sample.c +19 -0
  143. data/ext/sspcrun/src/sspc.c +598 -0
  144. data/ext/sspcrun/src/sspc2.c +491 -0
  145. data/ext/sspcrun/src/stdlib2.c +1482 -0
  146. data/ext/sspcrun/src/stdlib2.h +892 -0
  147. data/ext/sspcrun/src/trsact.c +817 -0
  148. data/ext/sspcrun/src/trsact.h +160 -0
  149. data/ext/sspcrun/src/vec.c +745 -0
  150. data/ext/sspcrun/src/vec.h +172 -0
  151. data/ext/sspcrun/sspcrun.cpp +54 -0
  152. data/lib/nysol/enumLcmEp.rb +338 -0
  153. data/lib/nysol/enumLcmEsp.rb +284 -0
  154. data/lib/nysol/enumLcmIs.rb +275 -0
  155. data/lib/nysol/enumLcmSeq.rb +143 -0
  156. data/lib/nysol/items.rb +201 -0
  157. data/lib/nysol/seqDB.rb +256 -0
  158. data/lib/nysol/take.rb +39 -0
  159. data/lib/nysol/taxonomy.rb +113 -0
  160. data/lib/nysol/traDB.rb +257 -0
  161. metadata +239 -0
@@ -0,0 +1,157 @@
1
+ /* itemset search input/output common routines
2
+ 25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, please
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about the codes for the users. */
11
+
12
+ /* routines for itemset mining */
13
+
14
+ #ifndef _itemset_h_
15
+ #define _itemset_h_
16
+
17
+ #include"stdlib2.h"
18
+ #include"queue.h"
19
+ #define AHEAP_KEY_WEIGHT
20
+ #include"aheap.h"
21
+
22
+
23
+ typedef struct {
24
+ int a;
25
+ QUEUE itemset; // current operating itemset
26
+ QUEUE add; // for equisupport (hypercube decomposition)
27
+ int ub, lb; // upper/lower bounds for the itemset size
28
+ WEIGHT frq, pfrq, frq_ub, frq_lb; // upper/lower bounds for the frequency
29
+ WEIGHT rposi_lb, rposi_ub, posi_lb, posi_ub, nega_ub, nega_lb; // upper/lower bounds for the sum of positive/negative weights
30
+ WEIGHT setrule_lb; // frequency lower bound for set rule
31
+ double ratio, prob; // confidence and independent probability of the current pattern
32
+ double ratio_ub, ratio_lb, prob_ub, prob_lb; // upper/lower bounds for confidence and independent probability
33
+ QUEUE_INT target; // target item for rule mining
34
+ char *itemflag; // 1 if it is include in the pattern (and 2 if included in add)
35
+ WEIGHT *item_frq; // frequency of each item
36
+ WEIGHT total_weight; // total weight of the input database
37
+ int len_ub, len_lb; // upper/lower bounds for the length of the pattern
38
+ int gap_ub, gap_lb; // upper/lower bounds for the gaps in the pattern
39
+ LONG *sc; // #itemsets classified by the sizes
40
+ QUEUE_INT item_max, item_max_org; // (original) maximum item
41
+ AHEAP topk; // heap for topk mining. valid if topk->h is not NULL
42
+ int flag; // flag for various functions
43
+ PERM *perm; // permutation array for output itemset: item => original item
44
+ FILE *fp; // file pointer to the output file
45
+ char separator; // separator of items output
46
+ int progress;
47
+ LONG iters, iters2, iters3; //iterations
48
+ LONG solutions, solutions2; // number of solutions output
49
+ LONG outputs, outputs2; // #calls of ITEMSET_output_itemset or ITEMSET_solusion
50
+ LONG max_solutions; // maximum solutions to be output
51
+ void *X; // pointer to the original data
52
+ int dir; // direction flag for AGRAPH & SGRAPH
53
+
54
+ int multi_core; // number of processors
55
+ LONG *multi_iters, *multi_iters2, *multi_iters3; //iterations
56
+ LONG *multi_solutions, *multi_solutions2; // number of solutions output
57
+ LONG *multi_outputs, *multi_outputs2; // #calls of ITEMSET_output_itemset or ITEMSET_solusion
58
+ FILE2 *multi_fp; // output file2 pointer for multi-core mode
59
+ WEIGHT *set_weight; // the frequency of each prefix of current itemset
60
+ QUEUE **set_occ; // the occurrence of each prefix of current itemset
61
+
62
+ #ifdef MULTI_CORE
63
+ pthread_spinlock_t lock_counter; // couneter locker for jump counter
64
+ pthread_spinlock_t lock_sc; // couneter locker for score counter
65
+ pthread_spinlock_t lock_output; // couneter locker for #output
66
+ #endif
67
+ } ITEMSET;
68
+
69
+ /* parameters for ITEMSET.flag */
70
+
71
+ #define ITEMSET_ITERS2 4 // output #iters2
72
+ #define ITEMSET_PRE_FREQ 8 // output frequency preceding to each itemset
73
+ #define ITEMSET_FREQ 16 // output frequency following to each itemset
74
+ #define ITEMSET_ALL 32 // concat all combinations of "add" to each itemset
75
+
76
+ #define ITEMSET_TRSACT_ID 64 // output transaction ID's in occurrences
77
+ #define ITEMSET_OUTPUT_EDGE 128 // output itemset as edge set (refer AGRAPH)
78
+ #define ITEMSET_IGNORE_BOUND 256 // ignore constraint for frequency
79
+ #define ITEMSET_RM_DUP_TRSACT 512 // remove duplicated transaction ID's
80
+ #define ITEMSET_MULTI_OCC_PRINT 1024 //print each component of occ
81
+ // TRSACT_ID+MULTI_OCC_PRINT means print first two components of occ
82
+ #define ITEMSET_NOT_ITEMSET 2048 // do not print itemset to the output file
83
+ #define ITEMSET_RULE_SUPP 4096 // output confidence and item frquency by abusolute value
84
+ #define ITEMSET_OUTPUT_POSINEGA 8192 // output negative/positive frequencies
85
+ #define ITEMSET_MULTI_OUTPUT 16384 // for multi-core mode
86
+ #define ITEMSET_USE_ORG 32768 // use item_max_org to the size of use
87
+ #define ITEMSET_ITEMFRQ 65536 // allocate item_frq
88
+ #define ITEMSET_ADD 131072 // allocate add
89
+
90
+ #define ITEMSET_RULE_FRQ 262144
91
+ #define ITEMSET_RULE_INFRQ 524288
92
+ #define ITEMSET_RULE_RFRQ 1048576
93
+ #define ITEMSET_RULE_RINFRQ 2097152
94
+ #define ITEMSET_RFRQ 4194304
95
+ #define ITEMSET_RINFRQ 8388608
96
+ #define ITEMSET_POSI_RATIO 16777216
97
+ #define ITEMSET_SET_RULE 134217728
98
+
99
+ #define ITEMSET_APPEND 268435456 // append the output to the fiile
100
+ #define ITEMSET_RULE_ADD 536870912 // append items in add to the solution, for rule output
101
+
102
+ //#define ITEMSET_RULE (ITEMSET_RULE_FRQ + ITEMSET_RULE_INFRQ + ITEMSET_RULE_RFRQ + ITEMSET_RULE_RINFRQ + ITEMSET_RFRQ + ITEMSET_RINFRQ + ITEMSET_SET_RULE) // for check any rule is true
103
+ #define ITEMSET_RULE (ITEMSET_RULE_FRQ + ITEMSET_RULE_INFRQ + ITEMSET_RULE_RFRQ + ITEMSET_RULE_RINFRQ + ITEMSET_SET_RULE) // for check any rule is true
104
+
105
+ #ifndef ITEMSET_INTERVAL
106
+ #define ITEMSET_INTERVAL 500000
107
+ #endif
108
+
109
+ /* Output information about ITEMSET structure. flag&1: print frequency constraint */
110
+ void ITEMSET_print (ITEMSET *II, int flag);
111
+
112
+ /* topk.end>0 => initialize heap for topk mining */
113
+ /* all pointers will be set to 0, but not for */
114
+ /* if topK mining, set topk.end to "K" */
115
+ void ITEMSET_init (ITEMSET *I);
116
+ void ITEMSET_alloc (ITEMSET *I, char *fname, PERM *perm, QUEUE_INT item_max, size_t item_max_org);
117
+ void ITEMSET_end (ITEMSET *I);
118
+
119
+ /* sum the counters computed by each thread */
120
+ void ITEMSET_merge_counters (ITEMSET *I);
121
+
122
+ /*******************************************************************/
123
+ /* output at the termination of the algorithm */
124
+ /* print #of itemsets of size k, for each k */
125
+ /*******************************************************************/
126
+ void ITEMSET_last_output (ITEMSET *I);
127
+
128
+ /* output frequency, coverage */
129
+ void ITEMSET_output_frequency (ITEMSET *I, int core_id);
130
+
131
+ /* output an itemset to the output file */
132
+ void ITEMSET_output_itemset (ITEMSET *I, QUEUE *occ, int core_id);
133
+
134
+ /* output itemsets with adding all combination of "add"
135
+ at the first call, i has to be "add->t" */
136
+ void ITEMSET_solution (ITEMSET *I, QUEUE *occ, int core_id);
137
+
138
+ /*************************************************************************/
139
+ /* ourput a rule */
140
+ /*************************************************************************/
141
+ void ITEMSET_output_rule (ITEMSET *I, QUEUE *occ, double p1, double p2, size_t item, int core_id);
142
+
143
+ /*************************************************************************/
144
+ /* check all rules for a pair of itemset and item */
145
+ /*************************************************************************/
146
+ void ITEMSET_check_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, size_t item, int core_id);
147
+
148
+ /*************************************************************************/
149
+ /* check all rules for an itemset and all items */
150
+ /*************************************************************************/
151
+ void ITEMSET_check_all_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, QUEUE *jump, WEIGHT total, int core_id);
152
+
153
+ #endif
154
+
155
+
156
+
157
+
@@ -0,0 +1,427 @@
1
+ /* Linear time Closed itemset Miner for Frequent Itemset Mining problems */
2
+ /* 2004/4/10 Takeaki Uno, e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, do not forget to
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about LCM for the users.
11
+ For the commercial use, please make a contact to Takeaki Uno. */
12
+
13
+
14
+ #ifndef _lcm_c_
15
+ #define _lcm_c_
16
+
17
+ #define WEIGHT_DOUBLE
18
+ #define TRSACT_DEFAULT_WEIGHT 1
19
+
20
+ #define LCM_UNCONST 16777216 // use the complement graph of the constraint graph
21
+ #define LCM_POSI_EQUISUPP 33554432 // an item will be dealt as "equisupp" when "positive"-frequency is equal to the positive-frequency of the current itemset
22
+
23
+ #define ERROR_RET
24
+
25
+ #include"trsact.c"
26
+ #include"sgraph.c"
27
+ #include"problem.c"
28
+
29
+ void LCM_error (){
30
+ ERROR_MES = "command explanation";
31
+ print_err ("LCM: [FCMfQIq] [options] input-filename support [output-filename]\n\
32
+ %%:show progress, _:no message, +:write solutions in append mode\n\
33
+ F:frequent itemset mining, C:closed frequent itemset mining\n\
34
+ M:maximal frequent itemset mining, P:positive-closed itemset mining\n\
35
+ f:output frequency following to each output itemset\n\
36
+ A:output positive/negative frequency, and their ratio\n\
37
+ Q:output frequency and coverages preceding to itemsets\n\
38
+ R:output redundant items for rule mining (usually, redundant items are removed, to be minimal, in the case of rule mining)\n\
39
+ I:output ID's of transactions including each pattern\n\
40
+ i:do not output itemset to the output file (only rules)\n\
41
+ s:output confidence and item frequency by absolute values\n\
42
+ t:transpose the input database (item i will be i-th transaction, and i-th transaction will be item i)\n\
43
+ [options]\n\
44
+ -K [num]:output [num] most frequent itemsets\n\
45
+ -l,-u [num]:output itemsets with size at least/most [num]\n\
46
+ -U [num]:upper bound for support(maximum support)\n\
47
+ -w [filename]:read weights of transactions from the file\n\
48
+ -c,-C [filename]:read item constraint/un-constraint file\n\
49
+ -i [num]: find association rule for item [num]\n\
50
+ -a,-A [ratio]: find association rules of confidence at least/most [ratio]\n\
51
+ -r,-R [ratio]: find association rules of relational confidence at least/most [ratio]\n\
52
+ -f,F [ratio]: output itemsets with frequency no less/greater than [ratio] times the frequency given by product of the probability of each item appearance\n\
53
+ -p,-P [num]: output itemset only if (frequency)/(abusolute frequency) is no less/no greater than [num]\n\
54
+ -n,-N [num]: output itemset only if its negative frequency is no less/no greater than [num] (negative frequency is the sum of weights of transactions having negative weights)\n\
55
+ -o,-O [num]: output itemset only if its positive frequency is no less/no greater than [num] (positive frequency is the sum of weights of transactions having positive weights)\n\
56
+ -m,-M [filename]:read/write item permutation from/to file [filename]\n\
57
+ -# [num]:stop after outputting [num] solutions\n\
58
+ -, [char]:give the separator of the numbers in the output\n\
59
+ -Q [filename]:replace the output numbers according to the permutation table given by [filename]\n\
60
+ # the 1st letter of input-filename cannot be '-'.\n\
61
+ # if the output file name is -, the solutions will be output to standard output.\n");
62
+ EXIT;
63
+ }
64
+
65
+ /***********************************************************************/
66
+ /* read parameters given by command line */
67
+ /***********************************************************************/
68
+ void LCM_read_param (int argc, char *argv[], PROBLEM *PP){
69
+ ITEMSET *II = &PP->II;
70
+ int c=1, f=0;
71
+ if ( argc < c+3 ){ LCM_error (); return; }
72
+
73
+ if ( !strchr (argv[c], '_') ){ II->flag |= SHOW_MESSAGE; PP->TT.flag |= SHOW_MESSAGE; }
74
+ if ( strchr (argv[c], '%') ) II->flag |= SHOW_PROGRESS;
75
+ if ( strchr (argv[c], '+') ) II->flag |= ITEMSET_APPEND;
76
+ if ( strchr (argv[c], 'f') ) II->flag |= ITEMSET_FREQ;
77
+ if ( strchr (argv[c], 'Q') ) II->flag |= ITEMSET_PRE_FREQ;
78
+ if ( strchr (argv[c], 'R') ) II->flag |= ITEMSET_RULE_ADD;
79
+ if ( strchr (argv[c], 'A') ) II->flag |= ITEMSET_OUTPUT_POSINEGA;
80
+ if ( strchr (argv[c], 'C') ){ PP->problem |= PROBLEM_CLOSED; PP->TT.flag |= TRSACT_INTSEC; }
81
+ else if ( strchr (argv[c], 'F') ){ PP->problem |= PROBLEM_FREQSET; II->flag |= ITEMSET_ALL; }
82
+ else if ( strchr (argv[c], 'M') ){ PP->problem |= PROBLEM_MAXIMAL; PP->TT.flag |= TRSACT_UNION; }
83
+ else error ("one of F, C, M has to be given", EXIT);
84
+ if ( strchr (argv[c], 'P') ) PP->problem |= LCM_POSI_EQUISUPP;
85
+ if ( strchr (argv[c], 'I') ) II->flag |= ITEMSET_TRSACT_ID;
86
+ if ( strchr (argv[c], 'i') ) II->flag |= ITEMSET_NOT_ITEMSET;
87
+ if ( strchr (argv[c], 's') ) II->flag |= ITEMSET_RULE_SUPP;
88
+ if ( strchr (argv[c], 't') ) PP->TT.flag |= LOAD_TPOSE;
89
+ c++;
90
+
91
+ while ( argv[c][0] == '-' ){
92
+ switch (argv[c][1]){
93
+ case 'K': if ( PP->problem & PROBLEM_MAXIMAL )
94
+ error ("M command and -K option can not be given simltaneously", EXIT);
95
+ II->topk.end = atoi (argv[c+1]);
96
+ break; case 'm': PP->TT.pfname = argv[c+1];
97
+ break; case 'M': PP->TT.pfname = argv[c+1]; PP->TT.flag |= TRSACT_WRITE_PERM;
98
+ break; case 'l': II->lb = atoi (argv[c+1]);
99
+ break; case 'u': II->ub = atoi(argv[c+1]);
100
+ break; case 'U': II->frq_ub = (WEIGHT)atof(argv[c+1]);
101
+ break; case 'w': PP->TT.wfname = argv[c+1];
102
+ break; case 'c': PP->SG.fname = argv[c+1];
103
+ break; case 'C': PP->SG.fname = argv[c+1]; PP->problem |= LCM_UNCONST;
104
+ break; case 'f': II->prob_lb = atof(argv[c+1]); II->flag |= ITEMSET_RFRQ; f++;
105
+ break; case 'F': II->prob_ub = atof(argv[c+1]); II->flag |= ITEMSET_RINFRQ; f++;
106
+ break; case 'i': II->target = atoi(argv[c+1]);
107
+ break; case 'a': II->ratio_lb = atof(argv[c+1]); II->flag |= ITEMSET_RULE_FRQ; f|=1;
108
+ break; case 'A': II->ratio_ub = atof(argv[c+1]); II->flag |= ITEMSET_RULE_INFRQ; f|=1;
109
+ break; case 'r': II->ratio_lb = atof(argv[c+1]); II->flag |= ITEMSET_RULE_RFRQ; f|=2;
110
+ break; case 'R': II->ratio_ub = atof(argv[c+1]); II->flag |= ITEMSET_RULE_RINFRQ; f|=2;
111
+ break; case 'P': II->flag |= ITEMSET_POSI_RATIO; II->flag |= ITEMSET_IGNORE_BOUND; II->rposi_ub = atof(argv[c+1]); f|=4;
112
+ break; case 'p': II->flag |= ITEMSET_POSI_RATIO; II->flag |= ITEMSET_IGNORE_BOUND; II->rposi_lb = atof(argv[c+1]); f|=4;
113
+ break; case 'n': II->nega_lb = atof(argv[c+1]);
114
+ break; case 'N': II->nega_ub = atof(argv[c+1]);
115
+ break; case 'o': II->posi_lb = atof(argv[c+1]);
116
+ break; case 'O': II->posi_ub = atof(argv[c+1]);
117
+ break; case '#': II->max_solutions = atoi(argv[c+1]);
118
+ break; case ',': II->separator = argv[c+1][0];
119
+ break; case 'Q': PP->outperm_fname = argv[c+1];
120
+ break; default: goto NEXT;
121
+ }
122
+ c += 2;
123
+ if ( argc < c+2 ){ LCM_error (); return; }
124
+ }
125
+
126
+ NEXT:;
127
+ if ( (f&3)==3 || (f&5)==5 || (f&6)==6 ) error ("-f, -F, -a, -A, -p, -P, -r and -R can not specified simultaneously", EXIT);
128
+ if ( f && (II->flag & ITEMSET_PRE_FREQ) ) BITRM (II->flag, ITEMSET_PRE_FREQ);
129
+
130
+ if ( ( PP->problem & PROBLEM_CLOSED ) && PP->SG.fname )
131
+ error ("closed itemset mining does not work with item constraints", EXIT);
132
+
133
+ if ( (PP->problem & PROBLEM_FREQSET) && (II->flag & (ITEMSET_RULE + ITEMSET_RFRQ + ITEMSET_RINFRQ)) ){
134
+ PP->problem |= PROBLEM_CLOSED; BITRM (PP->problem, PROBLEM_FREQSET);
135
+ BITRM (II->flag, ITEMSET_ALL);
136
+ }
137
+ PP->TT.fname = argv[c];
138
+ if ( II->topk.end==0 ) II->frq_lb = (WEIGHT)atof(argv[c+1]);
139
+ if ( argc>c+2 ) PP->output_fname = argv[c+2];
140
+ }
141
+
142
+ /*********************************************************************/
143
+ /* add an item to itemset, and update data */
144
+ /*********************************************************************/
145
+ void LCM_add_item (PROBLEM *PP, QUEUE *Q, QUEUE_INT item){
146
+ QUEUE_INT *x;
147
+ QUE_INS (*Q, item);
148
+ PP->II.itemflag[item] = 1;
149
+ if ( PP->SG.fname )
150
+ MQUE_MLOOP (PP->SG.edge.v[item], x, item) PP->itemary[*x]++;
151
+ }
152
+
153
+ /*********************************************************************/
154
+ /* delete an item from itemset, and update data */
155
+ /*********************************************************************/
156
+ void LCM_del_item (PROBLEM *PP, QUEUE *Q){
157
+ QUEUE_INT *x, item = Q->v[--Q->t];
158
+ PP->II.itemflag[item] = 0;
159
+ if ( PP->SG.fname )
160
+ MQUE_MLOOP (PP->SG.edge.v[item], x, item) PP->itemary[*x]--;
161
+ }
162
+
163
+ /* remove unnecessary transactions which do not include all posi_closed items */
164
+ /* scan of each transaction is up to item */
165
+ void LCM_reduce_occ_by_posi_equisupp (PROBLEM *PP, QUEUE *occ, QUEUE_INT item, QUEUE_INT full){
166
+ QUEUE_ID ii=0;
167
+ TRSACT *TT = &PP->TT;
168
+ ITEMSET *II = &PP->II;
169
+ QUEUE_INT *x, *y, *z, cnt;
170
+
171
+ MQUE_FLOOP (*occ, x){
172
+ if ( TT->w[*x]>= 0 ) continue;
173
+ cnt = 0;
174
+ MQUE_MLOOP (TT->T.v[*x], y, item) if ( II->itemflag[*y] == 2 ) cnt++;
175
+ if ( cnt==full ) occ->v[ii++] = *x;
176
+ else {
177
+ II->frq -= TT->w[*x];
178
+ MQUE_MLOOP (TT->T.v[*x], z, item) PP->occ_w[*z] -= TT->w[*x];
179
+ }
180
+ }
181
+ occ->t = ii;
182
+ MQUE_FLOOP (PP->itemcand, x){
183
+ if ( II->itemflag[*x] == 2 ) II->itemflag[*x] = 1;
184
+ }
185
+ }
186
+
187
+ /*************************************************************************/
188
+ /* ppc check and maximality check */
189
+ /* INPUT: O:occurrence, jump:items, th:support, frq:frequency, add:itemset
190
+ OUTPUT: maximum item i s.t. frq(i)=frq
191
+ OPERATION: remove infrequent items from jump, and
192
+ insert items i to "add" s.t. frq(i)=frq */
193
+ /*************************************************************************/
194
+ /* functions
195
+ 1. when closed itemset mining or maximal frequent itemset mining, find all items
196
+ included in all transactions in occ (checked by pfrq, occ_w
197
+ if there is such an item with index>item, ppc condition is violated, and return non-negative value
198
+ 2. when constraint graph is given, set the frequency (occ_w) of the items which can
199
+ not be added to itemset to infrequent number.
200
+ 3. count the size of reduced database
201
+ 4. call LCM_reduce_occ_posi
202
+ */
203
+ QUEUE_INT LCM_maximality_check (PROBLEM *PP, QUEUE *occ, QUEUE_INT item, QUEUE_INT *fmax, QUEUE_INT *cnt){
204
+ ITEMSET *II = &PP->II;
205
+ TRSACT *TT = &PP->TT;
206
+ QUEUE_INT m = TT->T.clms, full=0, *x;
207
+ WEIGHT w=-WEIGHTHUGE;
208
+ *fmax = TT->T.clms; *cnt=0;
209
+
210
+ MQUE_FLOOP (TT->jump, x){
211
+ if ( II->itemflag[*x] == 1) continue;
212
+ //QUEUE_perm_print (&II->itemset, II->perm);
213
+ if ( PP->SG.fname && ( (((PP->problem & LCM_UNCONST)==0) && (PP->itemary[*x]>0) ) ||
214
+ ((PP->problem & LCM_UNCONST) && (PP->itemary[*x]<II->itemset.t ))) ){
215
+ // e can not be added by item constraint
216
+ // PP->occ_pw[e] = PP->occ_w[e] = II->frq_lb -1;
217
+ II->itemflag[*x] = 3;
218
+ } else if ( ISEQUAL(PP->occ_pw[*x],II->pfrq) && ( ISEQUAL(PP->occ_w[*x],II->frq) || (PP->problem & LCM_POSI_EQUISUPP) ) ){ // check e is included in all transactions in occ
219
+ if ( *x<item ){
220
+ if ( !PP->SG.fname ){ // add item as "equisupport"
221
+ LCM_add_item (PP, &II->add, *x);
222
+ if ( (PP->problem&LCM_POSI_EQUISUPP) && (II->flag&ITEMSET_RULE) ) II->itemflag[*x] = 0; // in POSI_EQUISUPP, occ_w[*x] is not equal to II->frq, thus we have to deal it in the rule mining
223
+ }
224
+ if ( !ISEQUAL(PP->occ_w[*x],II->frq) ){ full++; II->itemflag[*x] = 2; }
225
+ } else m = *x; // an item in prefix can be added without going to another closed itemset
226
+ } else {
227
+ if ( *x<item ) (*cnt)++;
228
+ II->itemflag[*x] = PP->occ_pw[*x] < PP->th? 3: 0; // mark item by freq/infreq
229
+ if ( PP->occ_w[*x] > w ){
230
+ *fmax = *x;
231
+ w = PP->occ_w[*x];
232
+ }
233
+ }
234
+ }
235
+ if ( full && (PP->problem & LCM_POSI_EQUISUPP) && m<item ) // m<item always holds in frequent itemset mining
236
+ LCM_reduce_occ_by_posi_equisupp (PP, occ, item, full);
237
+ return (m);
238
+ }
239
+
240
+ /***************************************************************/
241
+ /* iteration of LCM ver. 5 */
242
+ /* INPUT: item:tail of the current solution, t_new,buf:head of the list of
243
+ ID and buffer memory of new transactions */
244
+ /*************************************************************************/
245
+ void LCM (PROBLEM *PP, int item, QUEUE *occ, WEIGHT frq, WEIGHT pfrq){
246
+ ITEMSET *II = &PP->II;
247
+ TRSACT *TT = &PP->TT;
248
+ int bnum = TT->buf.num, bblock = TT->buf.block_num;
249
+ int wnum = TT->wbuf.num, wblock = TT->wbuf.block_num;
250
+ VEC_ID new_t = TT->new_t;
251
+ QUEUE_INT cnt, f, *x, m, e, imax = PP->clms? item: TT->T.clms;
252
+ QUEUE_ID js = PP->itemcand.s, qt = II->add.t, i;
253
+ WEIGHT rposi=0.0;
254
+
255
+ //TRSACT_print (TT, occ, NULL);
256
+ //printf ("itemset: %f ::::", II->frq); QUEUE_perm_print (&II->itemset, II->perm);
257
+ //QUEUE_print__ ( occ );
258
+ //printf ("itemset: %f ::::", II->frq); QUEUE_perm_print (&II->itemset, II->perm);
259
+ //printf ("itemset: %f ::::", II->frq); QUEUE_print__ (&II->itemset);
260
+ //FLOOP (i, 0, item) printf ("%1.2f, ", PP->occ_w[i]); printf ("\n ### ");
261
+ //FLOOP (i, item, II->item_max) printf ("%1.2f, ", PP->occ_w[i]); printf ("\n");
262
+
263
+ //printf ("add:"); QUEUE_perm_print (&II->add, II->perm);
264
+ //for (i=0 ; i<II->imax ; i++ ) printf ("%d(%d) ", II->perm[i], II->itemflag[i]); printf ("\n");
265
+
266
+ II->iters++;
267
+ PP->itemcand.s = PP->itemcand.t;
268
+ // if ( II->flag&ITEMSET_POSI_RATIO && pfrq!=0 ) II->frq /= (pfrq+pfrq-II->frq);
269
+ if ( II->flag&ITEMSET_POSI_RATIO && pfrq!=0 ) rposi = pfrq / (pfrq+pfrq-II->frq);
270
+ TRSACT_delivery (TT, &TT->jump, PP->occ_w, PP->occ_pw, occ, imax);
271
+ // if the itemset is empty, set frq to the original #trsactions, and compute item_frq's
272
+ if ( II->itemset.t == 0 ){
273
+ if ( TT->total_w_org != 0.0 )
274
+ FLOOP (i, 0, TT->T.clms) II->item_frq[i] = PP->occ_w[i]/TT->total_w_org;
275
+ }
276
+
277
+ II->frq = frq; II->pfrq = pfrq;
278
+ m = LCM_maximality_check (PP, occ, item, &f, &cnt);
279
+ // printf ("add: "); QUEUE_print__ ( &II->add);
280
+
281
+ if ( !(PP->problem & PROBLEM_FREQSET) && m<TT->T.clms ){ // ppc check
282
+ MQUE_FLOOP (TT->jump, x) TT->OQ[*x].end = 0;
283
+ goto END;
284
+ }
285
+ if ( !(PP->problem&PROBLEM_MAXIMAL) || f>=TT->T.clms || PP->occ_w[f]<II->frq_lb ){
286
+ if ( !(II->flag & ITEMSET_POSI_RATIO) || (rposi<=II->rposi_ub && rposi>=II->rposi_lb) ){
287
+ II->prob = 1.0;
288
+ MQUE_FLOOP (II->itemset, x) II->prob *= II->item_frq[*x];
289
+ MQUE_FLOOP (II->add, x) II->prob *= II->item_frq[*x];
290
+ ITEMSET_check_all_rule (II, PP->occ_w, occ, &TT->jump, TT->total_pw_org, 0); // if (ERROR_MES) return;
291
+ }
292
+ }
293
+ // select freqeut (and addible) items with smaller indices
294
+ MQUE_FLOOP (TT->jump, x){
295
+ TT->OQ[*x].end = 0; // in the case of freqset mining, automatically done by rightmost sweep;
296
+ if ( *x<item && II->itemflag[*x] == 0 ){
297
+ QUE_INS (PP->itemcand, *x);
298
+ PP->occ_w2[*x] = PP->occ_w[*x];
299
+ if ( TT->flag & TRSACT_NEGATIVE ) PP->occ_pw2[*x] = PP->occ_pw[*x];
300
+ }
301
+ }
302
+
303
+ if ( QUEUE_LENGTH_(PP->itemcand)==0 || II->itemset.t >= II->ub ) goto END;
304
+ qsort_QUEUE_INT (PP->itemcand.v+PP->itemcand.s, PP->itemcand.t-PP->itemcand.s, -1);
305
+ //QUEUE_print__ (&PP->itemcand);
306
+ qsort_QUEUE_INT (II->add.v+qt, II->add.t-qt, -1);
307
+
308
+ // database reduction
309
+ if ( cnt>2 && (II->flag & ITEMSET_TRSACT_ID)==0 && II->itemset.t >0){
310
+ TRSACT_find_same (TT, occ, item);
311
+ TRSACT_merge_trsact (TT, &TT->OQ[TT->T.clms], item);
312
+ TRSACT_reduce_occ (TT, occ);
313
+ }
314
+ // occurrence deliver
315
+ TRSACT_deliv (TT, occ, item);
316
+
317
+ // loop for recursive calls
318
+ cnt = QUEUE_LENGTH_ (PP->itemcand); f=0; // for showing progress
319
+ while ( QUEUE_LENGTH_ (PP->itemcand) > 0 ){
320
+ e = QUEUE_ext_tail_ (&PP->itemcand);
321
+ if ( PP->occ_pw2[e] >= MAX(II->frq_lb, II->posi_lb) ){ // if the item is frequent
322
+ LCM_add_item (PP, &II->itemset, e);
323
+ LCM (PP, e, &TT->OQ[e], PP->occ_w2[e], PP->occ_pw2[e]); // recursive call
324
+ if ( ERROR_MES ) return;
325
+ LCM_del_item (PP, &II->itemset);
326
+ }
327
+ TT->OQ[e].end = TT->OQ[e].t = 0; // clear the occurrences, for the further delivery
328
+ PP->occ_w[e] = PP->occ_pw[e] = -WEIGHTHUGE; // unnecessary?
329
+
330
+ if ( (II->flag & SHOW_PROGRESS) && (II->itemset.t == 0 ) ){
331
+ f++; print_err ("%d/%d (" LONGF " iterations)\n", f, cnt, II->iters);
332
+ }
333
+ }
334
+
335
+ TT->new_t = new_t;
336
+ TT->buf.num = bnum, TT->buf.block_num = bblock;
337
+ TT->wbuf.num = wnum, TT->wbuf.block_num = wblock;
338
+
339
+ END:;
340
+ while ( II->add.t > qt ) LCM_del_item (PP, &II->add);
341
+ PP->itemcand.t = PP->itemcand.s;
342
+ PP->itemcand.s = js;
343
+ }
344
+
345
+ /*************************************************************************/
346
+ /* initialization of LCM main routine */
347
+ /*************************************************************************/
348
+ void LCM_init (PROBLEM *PP){
349
+ ITEMSET *II = &PP->II;
350
+ TRSACT *TT = &PP->TT;
351
+ SGRAPH *SG = &PP->SG;
352
+ PERM *sperm = NULL, *tmp=NULL;
353
+ QUEUE_INT i;
354
+
355
+ II->X = TT;
356
+ II->flag |= ITEMSET_ITEMFRQ + ITEMSET_ADD;
357
+ PP->clms = ((PP->problem&PROBLEM_FREQSET)&&(II->flag&ITEMSET_RULE)==0);
358
+ PROBLEM_alloc (PP, TT->T.clms, TT->T.t, 0, TT->perm, PROBLEM_ITEMCAND +(PP->SG.fname?PROBLEM_ITEMARY:0) +((TT->flag&TRSACT_NEGATIVE)?PROBLEM_OCC_PW: PROBLEM_OCC_W) +((PP->problem&PROBLEM_FREQSET)?0:PROBLEM_OCC_W2));
359
+ PP->th = (II->flag&ITEMSET_RULE)? ((II->flag&ITEMSET_RULE_INFRQ)? -WEIGHTHUGE: II->frq_lb * II->ratio_lb ): II->frq_lb; // threshold for database reduction
360
+ if ( TT->flag&TRSACT_SHRINK ) PP->oo = QUEUE_dup_ (&TT->OQ[TT->T.clms]); // preserve occ
361
+ else { QUEUE_alloc (&PP->oo, TT->T.t); ARY_INIT_PERM(PP->oo.v, TT->T.t); PP->oo.t = TT->T.t; }
362
+ TT->perm = NULL;
363
+ TT->OQ[TT->T.clms].t = 0;
364
+ print_mes (&PP->TT, "separated at %d\n", PP->TT.sep);
365
+
366
+ if ( !(TT->sc) ) calloc2 (TT->sc, TT->T.clms+2, return);
367
+ free2 (II->itemflag); II->itemflag = TT->sc; // II->itemflag and TT->sc shares the same memory
368
+ II->frq = TT->total_w_org; II->pfrq = TT->total_pw_org;
369
+
370
+ if ( PP->SG.fname ){
371
+ if ( SG->edge.t < TT->T.clms )
372
+ print_mes (&PP->TT, "#nodes in constraint graph is smaller than #items\n");
373
+ if ( TT->perm ){
374
+ malloc2 (sperm, SG->edge.t, EXIT);
375
+ ARY_INIT_PERM (sperm, SG->edge.t);
376
+ FLOOP (i, 0, MIN(TT->T.t, SG->edge.t)) sperm[i] = TT->perm[i];
377
+ ARY_INV_PERM (tmp, sperm, SG->edge.t, {free(sperm);EXIT;});
378
+ SGRAPH_replace_index (SG, sperm, tmp);
379
+ mfree (tmp, sperm);
380
+ SG->perm = NULL;
381
+ }
382
+
383
+ SG->edge.flag |= LOAD_INCSORT +LOAD_RM_DUP;
384
+ SETFAMILY_sort (&SG->edge);
385
+ }
386
+ II->total_weight = TT->total_w;
387
+ }
388
+
389
+ /*************************************************************************/
390
+ /* main of LCM ver. 5 */
391
+ /*************************************************************************/
392
+ int LCM_main (int argc, char *argv[]){
393
+ PROBLEM PP;
394
+ ITEMSET *II = &PP.II;
395
+ TRSACT *TT = &PP.TT;
396
+ SGRAPH *SG = &PP.SG;
397
+ ERROR_MES = NULL;
398
+ PROBLEM_init (&PP);
399
+ LCM_read_param (argc, argv, &PP);
400
+ if ( ERROR_MES ) return (1);
401
+ TT->flag |= LOAD_PERM +TRSACT_FRQSORT +LOAD_DECSORT +LOAD_RM_DUP +TRSACT_MAKE_NEW +TRSACT_DELIV_SC +TRSACT_ALLOC_OCC + ((II->flag & ITEMSET_TRSACT_ID)?0: (TRSACT_SHRINK+TRSACT_1ST_SHRINK)) ;
402
+ if ( II->flag&ITEMSET_RULE ) TT->w_lb = -WEIGHTHUGE; else TT->w_lb = II->frq_lb;
403
+ SG->flag = LOAD_EDGE;
404
+ PROBLEM_load (&PP);
405
+ if ( !ERROR_MES ){
406
+ LCM_init (&PP);
407
+ if ( !ERROR_MES ) LCM (&PP, TT->T.clms, &PP.oo, TT->total_w_org, TT->total_pw_org);
408
+ ITEMSET_last_output (II);
409
+ }
410
+
411
+ TT->sc = NULL;
412
+ PROBLEM_end (&PP);
413
+ return (ERROR_MES?1:0);
414
+ }
415
+
416
+ /*******************************************************************************/
417
+ #ifndef _NO_MAIN_
418
+ #define _NO_MAIN_
419
+ int main (int argc, char *argv[]){
420
+ return (LCM_main (argc, argv));
421
+ }
422
+ #endif
423
+ /*******************************************************************************/
424
+
425
+ #endif
426
+
427
+