nysol-zdd 3.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/ext/zdd_so/BDD.cc +495 -0
  3. data/ext/zdd_so/BDD.h +356 -0
  4. data/ext/zdd_so/BDDDG.cc +1818 -0
  5. data/ext/zdd_so/BDDDG.h +107 -0
  6. data/ext/zdd_so/BDDHASH.cc +91 -0
  7. data/ext/zdd_so/BtoI.cc +503 -0
  8. data/ext/zdd_so/BtoI.h +144 -0
  9. data/ext/zdd_so/CtoI.cc +1072 -0
  10. data/ext/zdd_so/CtoI.h +186 -0
  11. data/ext/zdd_so/MLZBDDV.cc +153 -0
  12. data/ext/zdd_so/MLZBDDV.h +42 -0
  13. data/ext/zdd_so/SOP.cc +608 -0
  14. data/ext/zdd_so/SOP.h +199 -0
  15. data/ext/zdd_so/ZBDD.cc +1035 -0
  16. data/ext/zdd_so/ZBDD.h +243 -0
  17. data/ext/zdd_so/ZBDDDG.cc +1834 -0
  18. data/ext/zdd_so/ZBDDDG.h +105 -0
  19. data/ext/zdd_so/ZBDDHASH.cc +91 -0
  20. data/ext/zdd_so/bddc.c +2816 -0
  21. data/ext/zdd_so/bddc.h +132 -0
  22. data/ext/zdd_so/extconf.rb +25 -0
  23. data/ext/zdd_so/include/aheap.c +211 -0
  24. data/ext/zdd_so/include/aheap.h +111 -0
  25. data/ext/zdd_so/include/base.c +93 -0
  26. data/ext/zdd_so/include/base.h +60 -0
  27. data/ext/zdd_so/include/itemset.c +473 -0
  28. data/ext/zdd_so/include/itemset.h +153 -0
  29. data/ext/zdd_so/include/problem.c +371 -0
  30. data/ext/zdd_so/include/problem.h +160 -0
  31. data/ext/zdd_so/include/queue.c +518 -0
  32. data/ext/zdd_so/include/queue.h +177 -0
  33. data/ext/zdd_so/include/sgraph.c +331 -0
  34. data/ext/zdd_so/include/sgraph.h +170 -0
  35. data/ext/zdd_so/include/stdlib2.c +832 -0
  36. data/ext/zdd_so/include/stdlib2.h +746 -0
  37. data/ext/zdd_so/include/trsact.c +723 -0
  38. data/ext/zdd_so/include/trsact.h +167 -0
  39. data/ext/zdd_so/include/vec.c +583 -0
  40. data/ext/zdd_so/include/vec.h +159 -0
  41. data/ext/zdd_so/lcm-vsop.cc +596 -0
  42. data/ext/zdd_so/print.cc +683 -0
  43. data/ext/zdd_so/table.cc +330 -0
  44. data/ext/zdd_so/vsop.h +88 -0
  45. data/ext/zdd_so/zdd_so.cpp +3277 -0
  46. data/lib/nysol/zdd.rb +31 -0
  47. metadata +131 -0
@@ -0,0 +1,159 @@
1
+ /* library for sparse vector */
2
+ /* Takeaki Uno 27/Dec/2008 */
3
+
4
+ #ifndef _vec_h_
5
+ #define _vec_h_
6
+
7
+ #define STDLIB2_USE_MATH
8
+
9
+ #include"math.h"
10
+ #include"queue.h"
11
+
12
+ #ifndef SVEC_VAL
13
+ #ifdef SVEC_VAL_INT
14
+ #define SVEC_VAL int
15
+ #define SVEC_VAL2 LONG
16
+ #define SVEC_VAL_END INTHUGE
17
+ #define SVEC_VAL2_END LONGHUGE
18
+ #define SVEC_VALF "%d"
19
+ #else
20
+ #define SVEC_VAL double
21
+ #define SVEC_VAL2 double
22
+ #define SVEC_VAL_END DOUBLEHUGE
23
+ #define SVEC_VAL2_END DOUBLEHUGE
24
+ #define SVEC_VALF "%f"
25
+ #endif
26
+ #endif
27
+
28
+ #define VEC_LOAD_BIN 16777216 // read binary file
29
+ #define VEC_LOAD_BIN2 33554432 // read binary file with 2byte for each number
30
+ #define VEC_LOAD_BIN4 67108864 // read binary file with 4byte for each number
31
+ #define VEC_LOAD_CENTERIZE 134217728 // read binary file, and minus the half(128) from each number
32
+ #define VEC_NORMALIZE 268435456 // read binary file, and minus the half(128) from each number
33
+
34
+ /* matrix */
35
+ typedef struct {
36
+ unsigned char type; // mark to identify type of the structure
37
+ VEC *v;
38
+ VEC_ID end;
39
+ VEC_ID t;
40
+ VEC_VAL *buf, *buf2;
41
+ int flag;
42
+ VEC_ID clms;
43
+ size_t eles;
44
+ } MAT;
45
+
46
+ /* sparse vector, element */
47
+ typedef struct {
48
+ QUEUE_ID i;
49
+ SVEC_VAL a;
50
+ } SVEC_ELE;
51
+
52
+ /* sparse vector, vector */
53
+ typedef struct {
54
+ unsigned char type; // mark to identify type of the structure
55
+ SVEC_ELE *v;
56
+ VEC_ID end;
57
+ VEC_ID t;
58
+ } SVEC;
59
+
60
+ /* sparse vector, matrix */
61
+ typedef struct {
62
+ unsigned char type; // mark to identify type of the structure
63
+ SVEC *v;
64
+ VEC_ID end;
65
+ VEC_ID t;
66
+ SVEC_ELE *buf, *buf2;
67
+ int flag;
68
+ VEC_ID clms;
69
+ size_t eles, ele_end;
70
+ } SMAT;
71
+
72
+ /* sparse vector, matrix */
73
+ typedef struct {
74
+ unsigned char type; // mark to identify type of the structure
75
+ QUEUE *v;
76
+ VEC_ID end;
77
+ VEC_ID t;
78
+ QUEUE_INT *buf, *buf2;
79
+ int flag;
80
+ VEC_ID clms;
81
+ size_t eles, ele_end;
82
+ WEIGHT **w, *wbuf;
83
+ int unit;
84
+ } SETFAMILY;
85
+
86
+ #define INIT_SETFAMILY_ {TYPE_SETFAMILY,NULL,0,0,NULL,NULL,0,0,0,0,NULL,NULL,sizeof(QUEUE_INT)}
87
+
88
+ extern MAT INIT_MAT;
89
+ extern SVEC INIT_SVEC;
90
+ extern SMAT INIT_SMAT;
91
+ extern SETFAMILY INIT_SETFAMILY;
92
+
93
+ QSORT_TYPE_HEADER (SVEC_VAL, SVEC_VAL)
94
+ QSORT_TYPE_HEADER (SVEC_VAL2, SVEC_VAL2)
95
+
96
+ #define ARY_QUEUE_INPRO(f,U,V) do{(f)=0;FLOOP(common_QUEUE_ID, 0, (QUEUE_ID)(U).t)(f)+=(V)[(U).v[common_QUEUE_ID]];}while(0)
97
+ #define ARY_SVEC_INPRO(f,U,V) do{(f)=0;FLOOP(common_VEC_ID, 0, (VEC_ID)(U).t)(f)+=((double)(U).v[common_VEC_ID].a)*(V)[(U).v[common_VEC_ID].i];}while(0)
98
+
99
+ /* terminate routine for VEC */
100
+ void VEC_end (VEC *V);
101
+ void MAT_end (MAT *M);
102
+ void SVEC_end (SVEC *V);
103
+ void SMAT_end (SMAT *M);
104
+ void SETFAMILY_end (SETFAMILY *M);
105
+
106
+ /* allocate memory according to rows and rowt */
107
+ void VEC_alloc (VEC *V, VEC_ID clms);
108
+ void MAT_alloc (MAT *M, VEC_ID rows, VEC_ID clms);
109
+ void SVEC_alloc (SVEC *V, VEC_ID end);
110
+ void SMAT_alloc (SMAT *M, VEC_ID rows, VEC_ID *rowt, VEC_ID clms, size_t eles);
111
+ void SETFAMILY_alloc (SETFAMILY *M, VEC_ID rows, VEC_ID *rowt, VEC_ID clms, size_t eles);
112
+ void SETFAMILY_alloc_weight (SETFAMILY *M);
113
+
114
+ /* count/read the number in file for MAT */
115
+ /* if *rows>0, only read count the numbers in a row, for the first scan. */
116
+ void MAT_load_bin (MAT *M, FILE2 *fp, int unit);
117
+ void MAT_file_load (MAT *M, FILE2 *fp);
118
+ void MAT_load (MAT *M, char *fname);
119
+ void SMAT_load (SMAT *M, char *fname);
120
+ void SETFAMILY_load (SETFAMILY *M, char *fname, char *wfname);
121
+ void SETFAMILY_load_weight (SETFAMILY *M, char *fname);
122
+
123
+ void MAT_print (FILE *fp, MAT *M);
124
+ void SVEC_print (FILE *fp, SVEC *M);
125
+ void SMAT_print (FILE *fp, SMAT *M);
126
+ void SETFAMILY_print (FILE *fp, SETFAMILY *M);
127
+ void SETFAMILY_print_weight (FILE *fp, SETFAMILY *M);
128
+
129
+
130
+ /* norm, normalization **************************/
131
+ double SVEC_norm (SVEC *V);
132
+ void SVEC_normalize (SVEC *V);
133
+
134
+ /* inner product **************************/
135
+ SVEC_VAL2 SVEC_inpro (SVEC *V1, SVEC *V2);
136
+
137
+ /** Euclidean distance routines *********************************/
138
+ double VEC_eucdist (VEC *V1, VEC *V2);
139
+ double SVEC_eucdist (SVEC *V1, SVEC *V2);
140
+ double VEC_SVEC_eucdist (VEC *V1, SVEC *V2);
141
+ double QUEUE_eucdist (QUEUE *Q1, QUEUE *Q2);
142
+ double VEC_QUEUE_eucdist (VEC *V, QUEUE *Q);
143
+
144
+ void VEC_rand_gaussian (VEC *V);
145
+
146
+ /* compute the inner product, Euclidean distance for multi vector */
147
+ double MVEC_norm (void *V);
148
+ double MVEC_inpro (void *V, void *U);
149
+ double MVEC_double_inpro (void *V, double *p);
150
+ double MVEC_eucdist (void *V, void *U);
151
+
152
+ /* compute the inner product, euclidean distance for i,jth vector */
153
+ double MMAT_inpro_ij (void *M, int i, int j);
154
+ double MMAT_double_inpro_i (void *M, int i, double *p);
155
+ double MMAT_eucdist_ij (void *M, int i, int j);
156
+ double MMAT_norm_i (void *M, int i);
157
+
158
+
159
+ #endif
@@ -0,0 +1,596 @@
1
+ /* Linear time Closed itemset Miner for Frequent Itemset Mining problems */
2
+ /* 2004/4/10 Takeaki Uno, e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, do not forget to
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about LCM for the users.
11
+ For the commercial use, please make a contact to Takeaki Uno. */
12
+
13
+ #ifndef _lcm_c_
14
+ #define _lcm_c_
15
+ #include "CtoI.h" //ZDD
16
+
17
+ #define WEIGHT_DOUBLE
18
+ #define TRSACT_DEFAULT_WEIGHT 1
19
+
20
+ #define LCM_UNCONST 16777216 // use the complement graph of the constraint graph
21
+ #define LCM_POSI_EQUISUPP 33554432 // an item will be dealt as "equisupp" when "positive"-frequency is equal to the positive-frequency of the current itemset
22
+
23
+ #define ERROR_RET
24
+
25
+ #include"trsact.c"
26
+ #include"sgraph.c"
27
+ #include"problem.c"
28
+
29
+ void LCM_error (){
30
+ ERROR_MES = "command explanation";
31
+ print_err ("LCM: [FCMfQIq] [options] input-filename support [output-filename]\n\
32
+ F:frequent itemset mining, C:closed frequent itemset mining\n\
33
+ M:maximal frequent itemset mining, P:positive-closed itemset mining\n\
34
+ f:output frequency following to each output itemset\n\
35
+ A:output positive/negative frequency, and their ratio\n\
36
+ Q:output frequency and coverages preceding to itemsets\n\
37
+ q:no output to standard output, V:show progress of computation\n\
38
+ I:output ID's of transactions including each pattern\n\
39
+ i:do not output itemset to the output file (only rules)\n\
40
+ s:output confidence and item frequency by absolute values\n\
41
+ t:transpose the input database (item i will be i-th transaction, and i-th transaction will be item i)\n\
42
+ [options]\n\
43
+ -K [num]:output [num] most frequent itemsets\n\
44
+ -l,-u [num]:output itemsets with size at least/most [num]\n\
45
+ -U [num]:upper bound for support(maximum support)\n\
46
+ -w [filename]:read weights of transactions from the file\n\
47
+ -c,-C [filename]:read item constraint/un-constraint file\n\
48
+ -S [num]:stop aftre outputting [num] solutions\n\
49
+ -i [num]: find association rule for item [num]\n\
50
+ -a,-A [ratio]: find association rules of confidence at least/most [ratio]\n\
51
+ -r,-R [ratio]: find association rules of relational confidence at least/most [ratio]\n\
52
+ -f,F [ratio]: output itemsets with frequency no less/greater than [ratio] times the frequency given by product of the probability of each item appearance\n\
53
+ -p,-P [num]: output itemset only if (frequency)/(abusolute frequency) is no less/no greater than [num]\n\
54
+ -n,-N [num]: output itemset only if its negative frequency is no less/no greater than [num] (negative frequency is the sum of weights of transactions having negative weights)\n\
55
+ -o,-O [num]: output itemset only if its positive frequency is no less/no greater than [num] (positive frequency is the sum of weights of transactions having positive weights)\n\
56
+ -m,-M [filename]:read/write item permutation from/to file [filename]\n\
57
+ if the 1st letter of input-filename is '-', be considered as 'parameter list'\n");
58
+ EXIT;
59
+ }
60
+
61
+ /***********************************************************************/
62
+ /* read parameters given by command line */
63
+ /***********************************************************************/
64
+ void LCM_read_param (int argc, char *argv[], PROBLEM *PP){
65
+ ITEMSET *II = &PP->II;
66
+ int c=1, f=0;
67
+ if ( argc < c+3 ){ LCM_error (); return; }
68
+
69
+ if ( !strchr (argv[c], 'q') ){ II->flag |= SHOW_MESSAGE; PP->TT.flag |= SHOW_MESSAGE; }
70
+ if ( strchr (argv[c], 'f') ) II->flag |= ITEMSET_FREQ;
71
+ if ( strchr (argv[c], 'Q') ) II->flag |= ITEMSET_PRE_FREQ;
72
+ if ( strchr (argv[c], 'A') ) II->flag |= ITEMSET_OUTPUT_POSINEGA;
73
+ if ( strchr (argv[c], 'C') ){ PP->problem |= PROBLEM_CLOSED; PP->TT.flag |= TRSACT_INTSEC; }
74
+ else if ( strchr (argv[c], 'F') ){ PP->problem |= PROBLEM_FREQSET; II->flag |= ITEMSET_ALL; }
75
+ else if ( strchr (argv[c], 'M') ){ PP->problem |= PROBLEM_MAXIMAL; PP->TT.flag |= TRSACT_UNION; }
76
+ else error ("one of F, C, M has to be given", EXIT);
77
+ if ( strchr (argv[c], 'P') ) PP->problem |= LCM_POSI_EQUISUPP;
78
+ if ( strchr (argv[c], 'V') ) II->flag |= SHOW_PROGRESS;
79
+ if ( strchr (argv[c], 'I') ) II->flag |= ITEMSET_TRSACT_ID;
80
+ if ( strchr (argv[c], 'i') ) II->flag |= ITEMSET_NOT_ITEMSET;
81
+ if ( strchr (argv[c], 's') ) II->flag |= ITEMSET_RULE_SUPP;
82
+ if ( strchr (argv[c], 't') ) PP->TT.flag |= LOAD_TPOSE;
83
+ c++;
84
+
85
+ while ( argv[c][0] == '-' ){
86
+ switch (argv[c][1]){
87
+ case 'K': if ( PP->problem & PROBLEM_MAXIMAL )
88
+ error ("M command and -K option can not be given simltaneously", EXIT);
89
+ II->topk.end = atoi (argv[c+1]);
90
+ break; case 'm': PP->trsact_pfname = argv[c+1];
91
+ break; case 'M': PP->trsact_pfname = argv[c+1]; PP->TT.flag |= TRSACT_WRITE_PERM;
92
+ break; case 'l': II->lb = atoi (argv[c+1]);
93
+ break; case 'u': II->ub = atoi(argv[c+1]);
94
+ break; case 'U': II->frq_ub = (WEIGHT)atof(argv[c+1]);
95
+ break; case 'w': PP->trsact_wfname = argv[c+1];
96
+ break; case 'c': PP->sgraph_fname = argv[c+1];
97
+ break; case 'C': PP->sgraph_fname = argv[c+1]; PP->problem |= LCM_UNCONST;
98
+ break; case 'S': II->max_solutions = atoi(argv[c+1]);
99
+ break; case 'f': II->prob_lb = atof(argv[c+1]); II->flag |= ITEMSET_RFRQ; f++;
100
+ break; case 'F': II->prob_ub = atof(argv[c+1]); II->flag |= ITEMSET_RINFRQ; f++;
101
+ break; case 'i': II->target = atoi(argv[c+1]);
102
+ break; case 'a': II->ratio_lb = atof(argv[c+1]); II->flag |= ITEMSET_RULE_FRQ; f|=1;
103
+ break; case 'A': II->ratio_ub = atof(argv[c+1]); II->flag |= ITEMSET_RULE_INFRQ; f|=1;
104
+ break; case 'r': II->ratio_lb = atof(argv[c+1]); II->flag |= ITEMSET_RULE_RFRQ; f|=2;
105
+ break; case 'R': II->ratio_ub = atof(argv[c+1]); II->flag |= ITEMSET_RULE_RINFRQ; f|=2;
106
+ break; case 'P': II->flag |= ITEMSET_POSI_RATIO; II->flag |= ITEMSET_IGNORE_BOUND; II->rposi_ub = atof(argv[c+1]); f|=4;
107
+ break; case 'p': II->flag |= ITEMSET_POSI_RATIO; II->flag |= ITEMSET_IGNORE_BOUND; II->rposi_lb = atof(argv[c+1]); f|=4;
108
+ break; case 'n': II->nega_lb = atof(argv[c+1]);
109
+ break; case 'N': II->nega_ub = atof(argv[c+1]);
110
+ break; case 'o': II->posi_lb = atof(argv[c+1]);
111
+ break; case 'O': II->posi_ub = atof(argv[c+1]);
112
+ break; default: goto NEXT;
113
+ }
114
+ c += 2;
115
+ if ( argc < c+2 ){ LCM_error (); return; }
116
+ }
117
+
118
+ NEXT:;
119
+ if ( (f&3)==3 || (f&5)==5 || (f&6)==6 ) error ("-f, -F, -a, -A, -p, -P, -r and -R can not specified simultaneously", EXIT);
120
+ if ( f && (II->flag & ITEMSET_PRE_FREQ) ) BITRM (II->flag, ITEMSET_PRE_FREQ);
121
+
122
+ if ( ( PP->problem & PROBLEM_CLOSED ) && PP->sgraph_fname )
123
+ error ("closed itemset mining does not work with item constraints", EXIT);
124
+
125
+ if ( (PP->problem & PROBLEM_FREQSET) && (II->flag & (ITEMSET_RULE + ITEMSET_RFRQ + ITEMSET_RINFRQ)) ){
126
+ PP->problem |= PROBLEM_CLOSED; BITRM (PP->problem, PROBLEM_FREQSET);
127
+ BITRM (II->flag, ITEMSET_ALL);
128
+ }
129
+ PP->trsact_fname = argv[c];
130
+ if ( II->topk.end==0 ) II->frq_lb = (WEIGHT)atof(argv[c+1]);
131
+ if ( argc>c+2 ) PP->output_fname = argv[c+2];
132
+ }
133
+
134
+ /*********************************************************************/
135
+ /* add an item to itemset, and update data */
136
+ /*********************************************************************/
137
+ void LCM_add_item (PROBLEM *PP, QUEUE *Q, QUEUE_INT item){
138
+ QUEUE_INT *x;
139
+ ARY_INS (*Q, item);
140
+ PP->II.itemflag[item] = 1;
141
+ if ( PP->sgraph_fname )
142
+ MQUE_MLOOP (PP->SG.edge.v[item], x, item) PP->itemary[*x]++;
143
+ }
144
+
145
+ /*********************************************************************/
146
+ /* delete an item from itemset, and update data */
147
+ /*********************************************************************/
148
+ void LCM_del_item (PROBLEM *PP, QUEUE *Q){
149
+ QUEUE_INT *x, item = Q->v[--Q->t];
150
+ PP->II.itemflag[item] = 0;
151
+ if ( PP->sgraph_fname )
152
+ MQUE_MLOOP (PP->SG.edge.v[item], x, item) PP->itemary[*x]--;
153
+ }
154
+
155
+ /* remove unnecessary transactions which do not include all posi_closed items */
156
+ /* scan of each transaction is up to item */
157
+ void LCM_reduce_occ_by_posi_equisupp (PROBLEM *PP, QUEUE *occ, QUEUE_INT item, QUEUE_INT full){
158
+ QUEUE_ID ii=0;
159
+ TRSACT *TT = &PP->TT;
160
+ ITEMSET *II = &PP->II;
161
+ QUEUE_INT *x, *y, *z, cnt;
162
+
163
+ MQUE_FLOOP (*occ, x){
164
+ if ( TT->w[*x]>= 0 ) continue;
165
+ cnt = 0;
166
+ MQUE_MLOOP (TT->T.v[*x], y, item) if ( II->itemflag[*y] == 2 ) cnt++;
167
+ if ( cnt==full ) occ->v[ii++] = *x;
168
+ else {
169
+ II->frq -= TT->w[*x];
170
+ MQUE_MLOOP (TT->T.v[*x], z, item) PP->occ_w[*z] -= TT->w[*x];
171
+ }
172
+ }
173
+ occ->t = ii;
174
+ MQUE_FLOOP (PP->itemcand, x){
175
+ if ( II->itemflag[*x] == 2 ) II->itemflag[*x] = 1;
176
+ }
177
+ }
178
+
179
+ /*************************************************************************/
180
+ /* ppc check and maximality check */
181
+ /* INPUT: O:occurrence, jump:items, th:support, frq:frequency, add:itemset
182
+ OUTPUT: maximum item i s.t. frq(i)=frq
183
+ OPERATION: remove infrequent items from jump, and
184
+ insert items i to "add" s.t. frq(i)=frq */
185
+ /*************************************************************************/
186
+ /* functions
187
+ 1. when closed itemset mining or maximal frequent itemset mining, find all items
188
+ included in all transactions in occ (checked by pfrq, occ_w
189
+ if there is such an item with index>item, ppc condition is violated, and return non-negative value
190
+ 2. when constraint graph is given, set the frequency (occ_w) of the items which can
191
+ not be added to itemset to infrequent number.
192
+ 3. count the size of reduced database
193
+ 4. call LCM_reduce_occ_posi
194
+ */
195
+ QUEUE_INT LCM_maximality_check (PROBLEM *PP, QUEUE *occ, QUEUE_INT item, QUEUE_INT *fmax, QUEUE_INT *cnt){
196
+ ITEMSET *II = &PP->II;
197
+ TRSACT *TT = &PP->TT;
198
+ QUEUE_INT m = TT->T.clms, full=0, *x;
199
+ WEIGHT w=-WEIGHTHUGE;
200
+ *fmax = TT->T.clms; *cnt=0;
201
+
202
+ MQUE_FLOOP (TT->jump, x){
203
+ if ( II->itemflag[*x] == 1) continue;
204
+ //QUEUE_perm_print (&II->itemset, II->perm);
205
+ if ( PP->sgraph_fname && ( (((PP->problem & LCM_UNCONST)==0) && (PP->itemary[*x]>0) ) ||
206
+ ((PP->problem & LCM_UNCONST) && (PP->itemary[*x]<II->itemset.t ))) ){
207
+ // e can not be added by item constraint
208
+ // PP->occ_pw[e] = PP->occ_w[e] = II->frq_lb -1;
209
+ II->itemflag[*x] = 3;
210
+ } else if ( ISEQUAL(PP->occ_pw[*x],II->pfrq) && ( ISEQUAL(PP->occ_w[*x],II->frq) || (PP->problem & LCM_POSI_EQUISUPP) ) ){ // check e is included in all transactions in occ
211
+ if ( *x<item ){
212
+ if ( !PP->sgraph_fname ){ // add item as "equisupport"
213
+ LCM_add_item (PP, &II->add, *x);
214
+ if ( (PP->problem&LCM_POSI_EQUISUPP) && (II->flag&ITEMSET_RULE) ) II->itemflag[*x] = 0; // in POSI_EQUISUPP, occ_w[*x] is not equal to II->frq, thus we have to deal it in the rule mining
215
+ }
216
+ if ( !ISEQUAL(PP->occ_w[*x],II->frq) ){ full++; II->itemflag[*x] = 2; }
217
+ } else m = *x; // an item in prefix can be added without going to another closed itemset
218
+ } else {
219
+ if ( *x<item ) (*cnt)++;
220
+ II->itemflag[*x] = PP->occ_pw[*x] < PP->th? 3: 0; // mark item by freq/infreq
221
+ if ( PP->occ_w[*x] > w ){
222
+ *fmax = *x;
223
+ w = PP->occ_w[*x];
224
+ }
225
+ }
226
+ }
227
+ if ( full && (PP->problem & LCM_POSI_EQUISUPP) && m<item ) // m<item always holds in frequent itemset mining
228
+ LCM_reduce_occ_by_posi_equisupp (PP, occ, item, full);
229
+ return (m);
230
+ }
231
+
232
+ /***************************************************************/
233
+ /* iteration of LCM ver. 5 */
234
+ /* INPUT: item:tail of the current solution, t_new,buf:head of the list of
235
+ ID and buffer memory of new transactions */
236
+ /*************************************************************************/
237
+ CtoI LCM (PROBLEM *PP, int item, QUEUE *occ, WEIGHT frq, WEIGHT pfrq){ //ZDD
238
+ int ii, xx,vv,iii,ix; //ZDD
239
+ ZBDDV vz; //ZDD
240
+ CtoI F, G,H,E; //ZDD
241
+ F = CtoI(0); //ZDD
242
+ ITEMSET *II = &PP->II;
243
+ TRSACT *TT = &PP->TT;
244
+ int bnum = TT->buf.num, bblock = TT->buf.block_num;
245
+ int wnum = TT->wbuf.num, wblock = TT->wbuf.block_num;
246
+ VEC_ID new_t = TT->new_t;
247
+ QUEUE_INT cnt, f, *x, m, e, imax = PP->clms? item: TT->T.clms;
248
+ QUEUE_ID js = PP->itemcand.s, qt = II->add.t, i;
249
+ WEIGHT rposi=0.0;
250
+
251
+ //TRSACT_print (TT, occ, NULL);
252
+ //printf ("itemset: %f ::::", II->frq); QUEUE_print__ ( &II->itemset);
253
+ //QUEUE_print__ ( occ );
254
+ //printf ("itemset: %f ::::", II->frq); QUEUE_perm_print ( &II->itemset, II->perm);
255
+ //printf ("add:"); QUEUE_perm_print ( &II->add, II->perm);
256
+ //for (i=0 ; i<II->imax ; i++ ) printf ("%d(%d) ", II->perm[i], II->itemflag[i]); printf ("\n");
257
+
258
+ II->iters++;
259
+ PP->itemcand.s = PP->itemcand.t;
260
+ // if ( II->flag&ITEMSET_POSI_RATIO && pfrq!=0 ) II->frq /= (pfrq+pfrq-II->frq);
261
+ if ( II->flag&ITEMSET_POSI_RATIO && pfrq!=0 ) rposi = pfrq / (pfrq+pfrq-II->frq);
262
+ TRSACT_delivery (TT, &TT->jump, PP->occ_w, PP->occ_pw, occ, imax);
263
+ // if the itemset is empty, set frq to the original #trsactions, and compute item_frq's
264
+ if ( II->itemset.t == 0 ){
265
+ if ( TT->total_w_org != 0.0 )
266
+ FLOOP (i, 0, TT->T.clms) II->item_frq[i] = PP->occ_w[i]/TT->total_w_org;
267
+ }
268
+
269
+ II->frq = frq; II->pfrq = pfrq;
270
+ m = LCM_maximality_check (PP, occ, item, &f, &cnt);
271
+ // printf ("add: "); QUEUE_print__ ( &II->add);
272
+ if ( !(PP->problem & PROBLEM_FREQSET) && m<TT->T.clms ){ // ppc check
273
+ MQUE_FLOOP (TT->jump, x) TT->OQ[*x].end = 0;
274
+ goto END;
275
+ }
276
+ if ( !(PP->problem&PROBLEM_MAXIMAL) || f>=TT->T.clms || PP->occ_w[f]<II->frq_lb ){
277
+ if ( !(II->flag & ITEMSET_POSI_RATIO) || (rposi<=II->rposi_ub && rposi>=II->rposi_lb) ){
278
+ II->prob = 1.0;
279
+ MQUE_FLOOP (II->itemset, x) II->prob *= II->item_frq[*x];
280
+ MQUE_FLOOP (II->add, x) II->prob *= II->item_frq[*x];
281
+ ITEMSET_check_all_rule (II, PP->occ_w, occ, &TT->jump, TT->total_pw_org, 0); // if (ERROR_MES) return;
282
+ if ( II->itemset.t>II->ub || (!(II->flag&ITEMSET_ALL) && II->itemset.t+II->add.t>II->ub)) goto UBSKIP; //ZDD
283
+ F = (II->flag & ITEMSET_PRE_FREQ)? CtoI((int)II->frq): CtoI(1); //ZDD
284
+ if(II->flag&ITEMSET_ALL && II->itemset.t+II->add.t>II->ub ){//ZDD-nain
285
+ H = F; //ZDD-nain
286
+ for(ii=0; ii<II->add.t; ii++) { //ZDD-nain
287
+ xx = II->add.v[ii]; //ZDD-nain
288
+ H = CtoI_Union(H, F.AffixVar(BDD_VarOfLev(xx+1))); //ZDD-nain
289
+ }//ZDD-nain
290
+ E = F; //ZDD-nain
291
+ for(ii=1; ii<II->ub-II->itemset.t; ii++) { //ZDD-nain
292
+ for(iii=0; iii<II->add.t; iii++) { //ZDD-nain
293
+ xx = II->add.v[iii]; //ZDD-nain
294
+ E = CtoI_Union(E,H.AffixVar(BDD_VarOfLev(xx+1))); //ZDD-nain
295
+ } //ZDD-nain
296
+ H=E; //ZDD-nain
297
+ } //ZDD-nain
298
+ F = H; //ZDD-nain
299
+ }else{ //ZDD-nain
300
+ for(ii=0; ii<II->add.t; ii++) { //ZDD
301
+ xx = II->add.v[ii]; //ZDD
302
+ G = F.AffixVar(BDD_VarOfLev(xx+1)); //ZDD
303
+ if(PP->problem & PROBLEM_FREQSET) { //ZDD
304
+ F = CtoI_Union(F, G); //ZDD
305
+ } //ZDD
306
+ else F = G; //ZDD
307
+ } //ZDD
308
+ } //ZDD-nain
309
+ UBSKIP:;
310
+ }
311
+ }
312
+ // select freqeut (and addible) items with smaller indices
313
+ MQUE_FLOOP (TT->jump, x){
314
+ TT->OQ[*x].end = 0; // in the case of freqset mining, automatically done by rightmost sweep;
315
+ if ( *x<item && II->itemflag[*x] == 0 ){
316
+ ARY_INS (PP->itemcand, *x);
317
+ PP->occ_w2[*x] = PP->occ_w[*x];
318
+ if ( TT->flag & TRSACT_NEGATIVE ) PP->occ_pw2[*x] = PP->occ_pw[*x];
319
+ }
320
+ }
321
+
322
+ if ( QUEUE_LENGTH_(PP->itemcand)==0 || II->itemset.t >= II->ub ) goto END;
323
+ qsort_QUEUE_INT (PP->itemcand.v+PP->itemcand.s, PP->itemcand.t-PP->itemcand.s, -1);
324
+ //QUEUE_print__ (&PP->itemcand);
325
+ qsort_QUEUE_INT (II->add.v+qt, II->add.t-qt, -1);
326
+
327
+ // database reduction
328
+ if ( cnt>2 && (II->flag & ITEMSET_TRSACT_ID)==0 && II->itemset.t >0){
329
+ TRSACT_find_same (TT, occ, item);
330
+ TRSACT_merge_trsact (TT, &TT->OQ[TT->T.clms], item);
331
+ TRSACT_reduce_occ (TT, occ);
332
+ }
333
+ // occurrence deliver
334
+ TRSACT_deliv (TT, occ, item);
335
+
336
+ // loop for recursive calls
337
+ cnt = QUEUE_LENGTH_ (PP->itemcand); f=0; // for showing progress
338
+ while ( QUEUE_LENGTH_ (PP->itemcand) > 0 ){
339
+ e = QUEUE_ext_tail_ (&PP->itemcand);
340
+ if ( PP->occ_pw2[e] >= MAX(II->frq_lb, II->posi_lb) ){ // if the item is frequent
341
+ LCM_add_item (PP, &II->itemset, e);
342
+ G = LCM (PP, e, &TT->OQ[e], PP->occ_w2[e], PP->occ_pw2[e]); // recursive call //ZDD
343
+ F = CtoI_Union(F, G); //ZDD
344
+ if ( ERROR_MES ) return CtoI(-1); //ZDD
345
+ LCM_del_item (PP, &II->itemset);
346
+ }
347
+ TT->OQ[e].end = TT->OQ[e].t = 0; // clear the occurrences, for the further delivery
348
+ PP->occ_w[e] = PP->occ_pw[e] = -WEIGHTHUGE; // unnecessary?
349
+
350
+ if ( (II->flag & SHOW_PROGRESS) && (II->itemset.t == 0 ) ){
351
+ f++; print_err ("%d/%d (%lld iterations)\n", f, cnt, II->iters);
352
+ }
353
+ }
354
+
355
+ TT->new_t = new_t;
356
+ TT->buf.num = bnum, TT->buf.block_num = bblock;
357
+ TT->wbuf.num = wnum, TT->wbuf.block_num = wblock;
358
+
359
+ END:;
360
+ if(item < II->item_max) { //ZDD
361
+ xx = item; //ZDD
362
+ F = F.AffixVar(BDD_VarOfLev(xx+1)); //ZDD
363
+ } //ZDD
364
+ while ( II->add.t > qt ) LCM_del_item (PP, &II->add);
365
+ PP->itemcand.t = PP->itemcand.s;
366
+ PP->itemcand.s = js;
367
+ return F; //ZDD
368
+ }
369
+
370
+ /*************************************************************************/
371
+ /* initialization of LCM main routine */
372
+ /*************************************************************************/
373
+ void LCM_init (PROBLEM *PP){
374
+ ITEMSET *II = &PP->II;
375
+ TRSACT *TT = &PP->TT;
376
+ SGRAPH *SG = &PP->SG;
377
+ PERM *sperm = NULL, *tmp=NULL;
378
+ QUEUE_INT i;
379
+
380
+ II->X = TT;
381
+ II->flag |= ITEMSET_ITEMFRQ + ITEMSET_ADD;
382
+ PP->clms = ((PP->problem&PROBLEM_FREQSET)&&(II->flag&ITEMSET_RULE)==0);
383
+ PROBLEM_alloc (PP, TT->T.clms, TT->T.t, 0, TT->perm, PROBLEM_ITEMCAND +(PP->sgraph_fname?PROBLEM_ITEMARY:0) +((TT->flag&TRSACT_NEGATIVE)?PROBLEM_OCC_PW: PROBLEM_OCC_W) +((PP->problem&PROBLEM_FREQSET)?0:PROBLEM_OCC_W2));
384
+ PP->th = (II->flag&ITEMSET_RULE)? ((II->flag&ITEMSET_RULE_INFRQ)? -WEIGHTHUGE: II->frq_lb * II->ratio_lb ): II->frq_lb; // threshold for database reduction
385
+ if ( TT->flag&TRSACT_SHRINK ) PP->oo = QUEUE_dup_ (&TT->OQ[TT->T.clms]); // preserve occ
386
+ else { QUEUE_alloc (&PP->oo, TT->T.t); ARY_INIT_PERM(PP->oo.v, TT->T.t); PP->oo.t = TT->T.t; }
387
+ TT->perm = NULL;
388
+ TT->OQ[TT->T.clms].t = 0;
389
+ print_mes (PP->TT.flag, "separated at %d\n", PP->TT.sep);
390
+ if ( !(TT->sc) ) calloc2 (TT->sc, TT->T.clms+2, "LCM_init: item_flag", return);
391
+ free2 (II->itemflag); II->itemflag = TT->sc; // II->itemflag and TT->sc shares the same memory
392
+ II->frq = TT->total_w_org; II->pfrq = TT->total_pw_org;
393
+
394
+ if ( PP->sgraph_fname ){
395
+ if ( SG->edge.t < TT->T.clms )
396
+ print_mes (PP->problem, "#nodes in constraint graph is smaller than #items\n");
397
+ if ( TT->perm ){
398
+ malloc2 (sperm, SG->edge.t, "LCM_init: sperm", EXIT);
399
+ ARY_INIT_PERM (sperm, SG->edge.t);
400
+ FLOOP (i, 0, MIN(TT->T.t, SG->edge.t)) sperm[i] = TT->perm[i];
401
+ ARY_INV_PERM (tmp, sperm, SG->edge.t, "LCM_init:INV_PERM", {free(sperm);EXIT;});
402
+ SGRAPH_replace_index (SG, sperm, tmp);
403
+ mfree (tmp, sperm);
404
+ SG->perm = NULL;
405
+ }
406
+
407
+ SG->edge.flag |= LOAD_INCSORT +LOAD_RM_DUP;
408
+ SETFAMILY_sort (&SG->edge);
409
+ }
410
+ II->total_weight = TT->total_w;
411
+ }
412
+
413
+ /*************************************************************************/
414
+ /* main of LCM ver. 5 */
415
+ /*************************************************************************/
416
+ extern PROBLEM LCM_PP; //ZDD
417
+ PROBLEM LCM_PP; //ZDD
418
+ extern CtoI CtoI_Lcm2(); //ZDD
419
+
420
+ int CtoI_LcmItems() { //ZDD
421
+ if( ERROR_MES ) return -1; //ZDD
422
+ return LCM_PP.II.item_max; //ZDD
423
+ } //ZDD
424
+
425
+ int CtoI_LcmPerm(int k) { //ZDD
426
+ if( ERROR_MES ) return -1; //ZDD
427
+ return LCM_PP.II.perm[k]; //ZDD
428
+ } //ZDD
429
+
430
+ CtoI CtoI_LcmA ( char *fname1, char *fname2, int th){ //ZDD
431
+ CtoI_Lcm1(fname1, fname2, th, 0); //ZDD
432
+ if( !ERROR_MES ) while(CtoI_LcmItems() > BDD_VarUsed()) BDD_NewVar(); //ZDD
433
+ return CtoI_Lcm2(); //ZDD
434
+ } //ZDD
435
+
436
+ CtoI CtoI_LcmC ( char *fname1, char *fname2, int th){ //ZDD
437
+ CtoI_Lcm1(fname1, fname2, th, 1); //ZDD
438
+ if( !ERROR_MES ) while(CtoI_LcmItems() > BDD_VarUsed()) BDD_NewVar(); //ZDD
439
+ return CtoI_Lcm2(); //ZDD
440
+ } //ZDD
441
+
442
+ CtoI CtoI_LcmM ( char *fname1, char *fname2, int th){ //ZDD
443
+ CtoI_Lcm1(fname1, fname2, th, 2); //ZDD
444
+ if( !ERROR_MES ) while(CtoI_LcmItems() > BDD_VarUsed()) BDD_NewVar(); //ZDD
445
+ return CtoI_Lcm2(); //ZDD
446
+ } //ZDD
447
+
448
+ CtoI CtoI_LcmAV ( char *fname1, char *fname2, int th){ //ZDD
449
+ CtoI_Lcm1(fname1, fname2, th, 10); //ZDD
450
+ if( !ERROR_MES ) while(CtoI_LcmItems() > BDD_VarUsed()) BDD_NewVar(); //ZDD
451
+ return CtoI_Lcm2(); //ZDD
452
+ } //ZDD
453
+
454
+ CtoI CtoI_LcmCV ( char *fname1, char *fname2, int th){ //ZDD
455
+ CtoI_Lcm1(fname1, fname2, th, 11); //ZDD
456
+ if( !ERROR_MES ) while(CtoI_LcmItems() > BDD_VarUsed()) BDD_NewVar(); //ZDD
457
+ return CtoI_Lcm2(); //ZDD
458
+ } //ZDD
459
+
460
+ CtoI CtoI_LcmMV ( char *fname1, char *fname2, int th){ //ZDD
461
+ CtoI_Lcm1(fname1, fname2, th, 12); //ZDD
462
+ if( !ERROR_MES ) while(CtoI_LcmItems() > BDD_VarUsed()) BDD_NewVar(); //ZDD
463
+ return CtoI_Lcm2(); //ZDD
464
+ } //ZDD
465
+
466
+ int CtoI_Lcm1 ( char *fname1, char *fname2, int th, int param ) { //ZDD
467
+ ITEMSET *II = &LCM_PP.II; //ZDD
468
+ TRSACT *TT = &LCM_PP.TT; //ZDD
469
+ SGRAPH *SG = &LCM_PP.SG; //ZDD
470
+
471
+ ERROR_MES = 0; //ZDD
472
+ PROBLEM_init ( &LCM_PP ); //ZDD
473
+ switch(param) //ZDD
474
+ { //ZDD
475
+ case 12: //ZDD
476
+ LCM_PP.problem |= PROBLEM_MAXIMAL; //ZDD
477
+ II->flag |= ITEMSET_PRE_FREQ; //ZDD
478
+ break; //ZDD
479
+ case 11: //ZDD
480
+ LCM_PP.problem |= PROBLEM_CLOSED; //ZDD
481
+ II->flag |= ITEMSET_PRE_FREQ; //ZDD
482
+ break; //ZDD
483
+ case 10: //ZDD
484
+ LCM_PP.problem |= PROBLEM_FREQSET; //ZDD
485
+ II->flag |= ITEMSET_PRE_FREQ; //ZDD
486
+ II->flag |= ITEMSET_ALL; //ZDD
487
+ break; //ZDD
488
+ case 2: //ZDD
489
+ LCM_PP.problem |= PROBLEM_MAXIMAL; //ZDD
490
+ break; //ZDD
491
+ case 1: //ZDD
492
+ LCM_PP.problem |= PROBLEM_CLOSED; //ZDD
493
+ break; //ZDD
494
+ case 0: //ZDD
495
+ default: //ZDD
496
+ LCM_PP.problem |= PROBLEM_FREQSET; //ZDD
497
+ II->flag |= ITEMSET_ALL; //ZDD
498
+ } //ZDD
499
+ LCM_PP.trsact_fname = fname1; //ZDD
500
+ LCM_PP.trsact_pfname = fname2; //ZDD
501
+ II->frq_lb = (WEIGHT)th; //ZDD
502
+ if ( ERROR_MES ) return (1);
503
+ TT->flag |= LOAD_PERM +TRSACT_FRQSORT +LOAD_DECSORT +LOAD_RM_DUP +TRSACT_MAKE_NEW +TRSACT_DELIV_SC +TRSACT_ALLOC_OCC + ((II->flag & ITEMSET_TRSACT_ID)?0: TRSACT_SHRINK) ;
504
+ if ( II->flag&ITEMSET_RULE ) TT->w_lb = -WEIGHTHUGE; else TT->w_lb = II->frq_lb;
505
+ SG->flag = LOAD_EDGE;
506
+ PROBLEM_init2 ( &LCM_PP, PROBLEM_PRINT_SHRINK + PROBLEM_PRINT_FRQ ); //ZDD
507
+ if ( !ERROR_MES ){
508
+ LCM_init(&LCM_PP); //ZDD
509
+ } //ZDD
510
+ return 0; //ZDD
511
+ } //ZDD
512
+
513
+ CtoI CtoI_Lcm2 () { //ZDD
514
+ CtoI F, G; //ZDD
515
+ if ( ERROR_MES ) { //ZDD
516
+ PROBLEM_end( &LCM_PP ); //ZDD
517
+ return CtoI(-1); //ZDD
518
+ } //ZDD
519
+ else //ZDD
520
+ { //ZDD
521
+ F = CtoI(0); //ZDD
522
+ if ( !ERROR_MES ) G = LCM (&LCM_PP, LCM_PP.TT.T.clms, &LCM_PP.oo, LCM_PP.TT.total_w_org, LCM_PP.TT.total_pw_org); //ZDD
523
+ else G = CtoI(0); //ZDD
524
+ F = CtoI_Union(F, G); //ZDD
525
+ ITEMSET_last_output (&LCM_PP.II); //ZDD
526
+ }
527
+
528
+ LCM_PP.TT.sc = NULL; //ZDD
529
+ PROBLEM_end (&LCM_PP); //ZDD
530
+ return F; //ZDD
531
+ }
532
+
533
+ /*******************************************************************************/
534
+ #ifndef _NO_MAIN_
535
+ #define _NO_MAIN_
536
+ int main (int argc, char *argv[]){
537
+ return (LCM_main (argc, argv));
538
+ }
539
+ #endif
540
+ /*******************************************************************************/
541
+
542
+ #endif
543
+
544
+
545
+
546
+ int CtoI_Lcm1_ub ( char *fname1, char *fname2, int th, int param, int len_ub ) { //ZDD
547
+ ITEMSET *II = &LCM_PP.II; //ZDD
548
+ TRSACT *TT = &LCM_PP.TT; //ZDD
549
+ SGRAPH *SG = &LCM_PP.SG; //ZDD
550
+
551
+ ERROR_MES = 0; //ZDD
552
+ PROBLEM_init ( &LCM_PP ); //ZDD
553
+ switch(param) //ZDD
554
+ { //ZDD
555
+ case 12: //ZDD
556
+ LCM_PP.problem |= PROBLEM_MAXIMAL; //ZDD
557
+ II->flag |= ITEMSET_PRE_FREQ; //ZDD
558
+ break; //ZDD
559
+ case 11: //ZDD
560
+ LCM_PP.problem |= PROBLEM_CLOSED; //ZDD
561
+ II->flag |= ITEMSET_PRE_FREQ; //ZDD
562
+ break; //ZDD
563
+ case 10: //ZDD
564
+ LCM_PP.problem |= PROBLEM_FREQSET; //ZDD
565
+ II->flag |= ITEMSET_PRE_FREQ; //ZDD
566
+ II->flag |= ITEMSET_ALL; //ZDD
567
+ break; //ZDD
568
+ case 2: //ZDD
569
+ LCM_PP.problem |= PROBLEM_MAXIMAL; //ZDD
570
+ break; //ZDD
571
+ case 1: //ZDD
572
+ LCM_PP.problem |= PROBLEM_CLOSED; //ZDD
573
+ break; //ZDD
574
+ case 0: //ZDD
575
+ default: //ZDD
576
+ LCM_PP.problem |= PROBLEM_FREQSET; //ZDD
577
+ II->flag |= ITEMSET_ALL; //ZDD
578
+ } //ZDD
579
+ LCM_PP.trsact_fname = fname1; //ZDD
580
+ LCM_PP.trsact_pfname = fname2; //ZDD
581
+ II->frq_lb = (WEIGHT)th; //ZDD
582
+
583
+ II->ub = len_ub; // ham
584
+
585
+ if ( ERROR_MES ) return (1);
586
+ TT->flag |= LOAD_PERM +TRSACT_FRQSORT +LOAD_DECSORT +LOAD_RM_DUP +TRSACT_MAKE_NEW +TRSACT_DELIV_SC +TRSACT_ALLOC_OCC + ((II->flag & ITEMSET_TRSACT_ID)?0: TRSACT_SHRINK) ;
587
+ if ( II->flag&ITEMSET_RULE ) TT->w_lb = -WEIGHTHUGE; else TT->w_lb = II->frq_lb;
588
+ SG->flag = LOAD_EDGE;
589
+ PROBLEM_init2 ( &LCM_PP, PROBLEM_PRINT_SHRINK + PROBLEM_PRINT_FRQ ); //ZDD
590
+
591
+ if ( !ERROR_MES ){
592
+ LCM_init(&LCM_PP); //ZDD
593
+ } //ZDD
594
+ return 0; //ZDD
595
+ } //ZDD
596
+