nysol-zdd 3.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/ext/zdd_so/BDD.cc +495 -0
- data/ext/zdd_so/BDD.h +356 -0
- data/ext/zdd_so/BDDDG.cc +1818 -0
- data/ext/zdd_so/BDDDG.h +107 -0
- data/ext/zdd_so/BDDHASH.cc +91 -0
- data/ext/zdd_so/BtoI.cc +503 -0
- data/ext/zdd_so/BtoI.h +144 -0
- data/ext/zdd_so/CtoI.cc +1072 -0
- data/ext/zdd_so/CtoI.h +186 -0
- data/ext/zdd_so/MLZBDDV.cc +153 -0
- data/ext/zdd_so/MLZBDDV.h +42 -0
- data/ext/zdd_so/SOP.cc +608 -0
- data/ext/zdd_so/SOP.h +199 -0
- data/ext/zdd_so/ZBDD.cc +1035 -0
- data/ext/zdd_so/ZBDD.h +243 -0
- data/ext/zdd_so/ZBDDDG.cc +1834 -0
- data/ext/zdd_so/ZBDDDG.h +105 -0
- data/ext/zdd_so/ZBDDHASH.cc +91 -0
- data/ext/zdd_so/bddc.c +2816 -0
- data/ext/zdd_so/bddc.h +132 -0
- data/ext/zdd_so/extconf.rb +25 -0
- data/ext/zdd_so/include/aheap.c +211 -0
- data/ext/zdd_so/include/aheap.h +111 -0
- data/ext/zdd_so/include/base.c +93 -0
- data/ext/zdd_so/include/base.h +60 -0
- data/ext/zdd_so/include/itemset.c +473 -0
- data/ext/zdd_so/include/itemset.h +153 -0
- data/ext/zdd_so/include/problem.c +371 -0
- data/ext/zdd_so/include/problem.h +160 -0
- data/ext/zdd_so/include/queue.c +518 -0
- data/ext/zdd_so/include/queue.h +177 -0
- data/ext/zdd_so/include/sgraph.c +331 -0
- data/ext/zdd_so/include/sgraph.h +170 -0
- data/ext/zdd_so/include/stdlib2.c +832 -0
- data/ext/zdd_so/include/stdlib2.h +746 -0
- data/ext/zdd_so/include/trsact.c +723 -0
- data/ext/zdd_so/include/trsact.h +167 -0
- data/ext/zdd_so/include/vec.c +583 -0
- data/ext/zdd_so/include/vec.h +159 -0
- data/ext/zdd_so/lcm-vsop.cc +596 -0
- data/ext/zdd_so/print.cc +683 -0
- data/ext/zdd_so/table.cc +330 -0
- data/ext/zdd_so/vsop.h +88 -0
- data/ext/zdd_so/zdd_so.cpp +3277 -0
- data/lib/nysol/zdd.rb +31 -0
- metadata +131 -0
@@ -0,0 +1,93 @@
|
|
1
|
+
/*
|
2
|
+
blocked memory allocation library
|
3
|
+
12/Mar/2002 by Takeaki Uno e-mail:uno@nii.jp,
|
4
|
+
homepage: http://research.nii.ac.jp/~uno/index.html */
|
5
|
+
/* This program is available for only academic use, basically.
|
6
|
+
Anyone can modify this program, but he/she has to write down
|
7
|
+
the change of the modification on the top of the source code.
|
8
|
+
Neither contact nor appointment to Takeaki Uno is needed.
|
9
|
+
If one wants to re-distribute this code, do not forget to
|
10
|
+
refer the newest code, and show the link to homepage of
|
11
|
+
Takeaki Uno, to notify the news about the codes for the users.
|
12
|
+
For the commercial use, please make a contact to Takeaki Uno. */
|
13
|
+
|
14
|
+
#ifndef _base_c_
|
15
|
+
#define _base_c_
|
16
|
+
|
17
|
+
#include"base.h"
|
18
|
+
|
19
|
+
BASE INIT_BASE = {TYPE_BASE,NULL,0,0,0,0,-1,NULL};
|
20
|
+
|
21
|
+
/* initialization, and allocate memory for header */
|
22
|
+
void BASE_alloc (BASE *B, int unit, int block_siz){
|
23
|
+
*B = INIT_BASE;
|
24
|
+
B->dellist = B;
|
25
|
+
B->unit = unit;
|
26
|
+
B->block_siz = block_siz;
|
27
|
+
B->num = block_siz;
|
28
|
+
B->block_num = -1;
|
29
|
+
calloc2 (B->base, 20, "BASE_alloc: B->base", EXIT);
|
30
|
+
B->block_end = 20;
|
31
|
+
}
|
32
|
+
|
33
|
+
/* termination */
|
34
|
+
void BASE_end (BASE *B){
|
35
|
+
int i;
|
36
|
+
FLOOP (i, 0, B->block_end) free2 (B->base[i]);
|
37
|
+
free2 (B->base);
|
38
|
+
*B = INIT_BASE;
|
39
|
+
}
|
40
|
+
|
41
|
+
/* return pointer to the cell corresponding to the given index */
|
42
|
+
void *BASE_pnt (BASE *B, size_t i){
|
43
|
+
return ( B->base[i/BASE_BLOCK] + (i%BASE_BLOCK)*B->unit);
|
44
|
+
}
|
45
|
+
/* return index corresponding to the given pointer */
|
46
|
+
size_t BASE_index (BASE *B, void *x){
|
47
|
+
size_t i;
|
48
|
+
FLOOP (i, 0, (size_t)(B->block_end+1)){
|
49
|
+
if ( ((char*)x)>= B->base[i] && ((char*)x)<=B->base[i]+B->unit*BASE_BLOCK )
|
50
|
+
return ( i*BASE_BLOCK + ((size_t)(((char *)x) - B->base[i])) / B->unit);
|
51
|
+
}
|
52
|
+
return (0);
|
53
|
+
}
|
54
|
+
|
55
|
+
/* increment the current memory block pointer and (re)allcate memory if necessary */
|
56
|
+
void *BASE_get_memory (BASE *B, int i){
|
57
|
+
B->num += i;
|
58
|
+
if ( B->num >= B->block_siz ){ /* if reach to the end of base array */
|
59
|
+
B->num = i; /* allocate one more base array, and increment the counter */
|
60
|
+
B->block_num++;
|
61
|
+
reallocx(B->base, B->block_end, B->block_num, NULL, "BASE:block", EXIT0);
|
62
|
+
if ( B->base[B->block_num] == NULL )
|
63
|
+
malloc2 (B->base[B->block_num], B->block_siz*B->unit, "BASE_new: base", EXIT0);
|
64
|
+
return (B->base[B->block_num]);
|
65
|
+
}
|
66
|
+
return (B->base[B->block_num] + (B->num-i)*B->unit);
|
67
|
+
}
|
68
|
+
|
69
|
+
|
70
|
+
/* allocate new cell */
|
71
|
+
void *BASE_new (BASE *B){
|
72
|
+
char *x;
|
73
|
+
|
74
|
+
/* use deleted cell if it exists */
|
75
|
+
if ( B->dellist != ((void *)B) ){
|
76
|
+
x = (char *)B->dellist; /* return the deleted cell */
|
77
|
+
B->dellist = (void *)(*((char **)x)); /* increment the head of the list */
|
78
|
+
} else {
|
79
|
+
/* take a new cell from the base array if no deleted one exists */
|
80
|
+
x = (char *)BASE_get_memory (B, 1);
|
81
|
+
}
|
82
|
+
return (x);
|
83
|
+
}
|
84
|
+
|
85
|
+
/* delete one cell. (add to the deleted list) */
|
86
|
+
void BASE_del (BASE *B, void *x){
|
87
|
+
*((void **)x) = B->dellist;
|
88
|
+
B->dellist = x;
|
89
|
+
}
|
90
|
+
|
91
|
+
#endif
|
92
|
+
|
93
|
+
|
@@ -0,0 +1,60 @@
|
|
1
|
+
/*
|
2
|
+
blocked memory allocation library
|
3
|
+
12/Mar/2002 by Takeaki Uno e-mail:uno@nii.jp,
|
4
|
+
homepage: http://research.nii.ac.jp/~uno/index.html */
|
5
|
+
/* This program is available for only academic use, basically.
|
6
|
+
Anyone can modify this program, but he/she has to write down
|
7
|
+
the change of the modification on the top of the source code.
|
8
|
+
Neither contact nor appointment to Takeaki Uno is needed.
|
9
|
+
If one wants to re-distribute this code, do not forget to
|
10
|
+
refer the newest code, and show the link to homepage of
|
11
|
+
Takeaki Uno, to notify the news about the codes for the users.
|
12
|
+
For the commercial use, please make a contact to Takeaki Uno. */
|
13
|
+
|
14
|
+
|
15
|
+
#ifndef _base_h_
|
16
|
+
#define _base_h_
|
17
|
+
|
18
|
+
#include"stdlib2.h"
|
19
|
+
|
20
|
+
/* structure for base array */
|
21
|
+
#define BASE_UNIT 16
|
22
|
+
#define BASE_BLOCK 65536
|
23
|
+
|
24
|
+
typedef struct {
|
25
|
+
unsigned char type;
|
26
|
+
char **base;
|
27
|
+
int block_siz; // size of one block of memory
|
28
|
+
int block_num; // currently using block
|
29
|
+
int unit; // size of one unit memory
|
30
|
+
int num; // current position in a block
|
31
|
+
int block_end; // current end of the block
|
32
|
+
void *dellist;
|
33
|
+
} BASE;
|
34
|
+
|
35
|
+
extern BASE INIT_BASE;
|
36
|
+
|
37
|
+
/* initialization, and allocate memory for header */
|
38
|
+
void BASE_alloc (BASE *B, int unit, int block_siz);
|
39
|
+
|
40
|
+
/* termination */
|
41
|
+
void BASE_end (BASE *B);
|
42
|
+
|
43
|
+
/* return pointer to the cell corresponding to the given index */
|
44
|
+
void *BASE_pnt (BASE *B, size_t i);
|
45
|
+
|
46
|
+
/* return index corresponding to the given pointer */
|
47
|
+
size_t BASE_index (BASE *B, void *x);
|
48
|
+
|
49
|
+
/* increment the current memory block pointer and (re)allcate memory if necessary */
|
50
|
+
void *BASE_get_memory (BASE *B, int i);
|
51
|
+
|
52
|
+
/* allocate new cell */
|
53
|
+
void *BASE_new (BASE *B);
|
54
|
+
|
55
|
+
/* delete one cell. (add to the deleted list) */
|
56
|
+
void BASE_del (BASE *B, void *x);
|
57
|
+
|
58
|
+
|
59
|
+
#endif
|
60
|
+
|
@@ -0,0 +1,473 @@
|
|
1
|
+
/* itemset search input/output common routines
|
2
|
+
25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
|
3
|
+
homepage: http://research.nii.ac.jp/~uno/index.html */
|
4
|
+
/* This program is available for only academic use, basically.
|
5
|
+
Anyone can modify this program, but he/she has to write down
|
6
|
+
the change of the modification on the top of the source code.
|
7
|
+
Neither contact nor appointment to Takeaki Uno is needed.
|
8
|
+
If one wants to re-distribute this code, do not forget to
|
9
|
+
refer the newest code, and show the link to homepage of
|
10
|
+
Takeaki Uno, to notify the news about the codes for the users.
|
11
|
+
For the commercial use, please make a contact to Takeaki Uno. */
|
12
|
+
|
13
|
+
/* routines for itemset mining */
|
14
|
+
|
15
|
+
#ifndef _itemset_c_
|
16
|
+
#define _itemset_c_
|
17
|
+
|
18
|
+
#include"itemset.h"
|
19
|
+
#include"queue.c"
|
20
|
+
#include"aheap.c"
|
21
|
+
|
22
|
+
/* flush the write buffer, available for multi-core mode */
|
23
|
+
void ITEMSET_flush (ITEMSET *I, FILE2 *fp){
|
24
|
+
if ( !(I->flag&ITEMSET_MULTI_OUTPUT) || (fp->buf-fp->buf_org) > FILE2_BUFSIZ/2 ){
|
25
|
+
SPIN_LOCK(I->multi_core, I->lock_output);
|
26
|
+
FILE2_flush (fp);
|
27
|
+
SPIN_UNLOCK(I->multi_core, I->lock_output);
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
/* Output information about ITEMSET structure. flag&1: print frequency constraint */
|
32
|
+
void ITEMSET_print (ITEMSET *I, int flag){
|
33
|
+
if ( I->lb>0 || I->ub<INTHUGE ){
|
34
|
+
if ( I->lb > 0 ) print_err ("%d <= ", I->lb);
|
35
|
+
print_err ("itemsets ");
|
36
|
+
if ( I->ub < INTHUGE ) print_err (" <= %d\n", I->ub);
|
37
|
+
print_err ("\n");
|
38
|
+
}
|
39
|
+
if ( flag&1 ){
|
40
|
+
if ( I->frq_lb > -WEIGHTHUGE ) print_err (WEIGHTF" <=", I->frq_lb);
|
41
|
+
print_err (" frequency ");
|
42
|
+
if ( I->frq_ub < WEIGHTHUGE ) print_err (" <="WEIGHTF, I->frq_ub);
|
43
|
+
print_err ("\n");
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
/* ITEMSET initialization */
|
48
|
+
void ITEMSET_init (ITEMSET *I){
|
49
|
+
I->flag = 0;
|
50
|
+
I->iters = I->iters2 = I->iters3 = 0;
|
51
|
+
I->solutions = I->solutions2 = I->max_solutions = I->outputs = I->outputs2 = 0;
|
52
|
+
I->topk.end = 0;
|
53
|
+
I->item_max = I->item_max_org = 0;
|
54
|
+
I->ub = I->len_ub = I->gap_ub = INTHUGE;
|
55
|
+
I->lb = I->len_lb = I->gap_lb = 0;
|
56
|
+
I->frq = I->pfrq = I->total_weight = 0;
|
57
|
+
I->ratio = I->prob = 0.0;
|
58
|
+
I->posi_ub = I->nega_ub = I->frq_ub = WEIGHTHUGE;
|
59
|
+
I->posi_lb = I->nega_lb = I->frq_lb = I->setrule_lb = -WEIGHTHUGE;
|
60
|
+
I->dir = 0;
|
61
|
+
I->target = INTHUGE;
|
62
|
+
I->prob_ub = I->ratio_ub = I->rposi_ub = 1;
|
63
|
+
I->prob_lb = I->ratio_lb = I->rposi_lb = 0;
|
64
|
+
I->itemflag = NULL;
|
65
|
+
I->perm = NULL;
|
66
|
+
I->item_frq = NULL;
|
67
|
+
I->sc = NULL;
|
68
|
+
I->X = NULL;
|
69
|
+
I->fp = NULL;
|
70
|
+
I->topk = INIT_AHEAP;
|
71
|
+
I->itemset = I->add = INIT_QUEUE;
|
72
|
+
I->set_weight = NULL;
|
73
|
+
I->set_occ = NULL;
|
74
|
+
|
75
|
+
I->multi_iters = I->multi_iters2 = I->multi_iters3 = NULL;
|
76
|
+
I->multi_outputs = I->multi_outputs2 = NULL;
|
77
|
+
I->multi_solutions = I->multi_solutions2 = NULL;
|
78
|
+
I->multi_fp = NULL;
|
79
|
+
I->multi_core = 0;
|
80
|
+
}
|
81
|
+
|
82
|
+
|
83
|
+
/* second initialization
|
84
|
+
topk.end>0 => initialize heap for topk mining */
|
85
|
+
/* all pointers will be set to 0, but not for */
|
86
|
+
/* if topK mining, set topk.end to "K" */
|
87
|
+
void ITEMSET_init2 (ITEMSET *I, char *fname, PERM *perm, QUEUE_INT item_max, size_t item_max_org){
|
88
|
+
LONG i;
|
89
|
+
size_t siz = (I->flag&ITEMSET_USE_ORG)?item_max_org+2: item_max+2;
|
90
|
+
I->prob = I->ratio = 1.0;
|
91
|
+
I->frq = 0;
|
92
|
+
I->perm = perm;
|
93
|
+
if ( I->topk.end>0 ){
|
94
|
+
AHEAP_alloc (&I->topk, I->topk.end);
|
95
|
+
FLOOP (i, 0, I->topk.end) AHEAP_chg (&I->topk, (AHEAP_ID)i, -WEIGHTHUGE);
|
96
|
+
I->frq_lb = -WEIGHTHUGE;
|
97
|
+
} else I->topk.v = NULL;
|
98
|
+
QUEUE_alloc (&I->itemset, (QUEUE_ID)siz); I->itemset.end = (QUEUE_ID)siz;
|
99
|
+
if ( I->flag&ITEMSET_ADD ) QUEUE_alloc (&I->add, (QUEUE_ID)siz);
|
100
|
+
calloc2 (I->sc, siz+2, "ITEMSET_init2: sc", goto ERR);
|
101
|
+
if ( I->flag&ITEMSET_SET_RULE ){
|
102
|
+
calloc2 (I->set_weight, siz, "ITEMSET_init2: set_weight", goto ERR);
|
103
|
+
if ( I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT) )
|
104
|
+
calloc2 (I->set_occ, siz, "ITEMSET_init2: set_weight", goto ERR);
|
105
|
+
}
|
106
|
+
I->iters = I->iters2 = I->solutions = 0;
|
107
|
+
I->item_max = item_max;
|
108
|
+
I->item_max_org = (QUEUE_INT)item_max_org;
|
109
|
+
if ( fname ){ fopen2 (I->fp, fname, "w", "ITEMSET_init2", goto ERR);}
|
110
|
+
else I->fp = 0;
|
111
|
+
if ( I->flag&ITEMSET_ITEMFRQ )
|
112
|
+
malloc2 (I->item_frq, item_max+2, "ITEMSET_init2: item_frqs", goto ERR);
|
113
|
+
if ( I->flag&ITEMSET_RULE ){
|
114
|
+
calloc2 (I->itemflag, item_max+2, "ITEMSET_init2: item_flag", goto ERR);
|
115
|
+
}
|
116
|
+
I->total_weight = 1;
|
117
|
+
|
118
|
+
calloc2 (I->multi_iters, I->multi_core+1, "ITEMSET_init2: multi_iters", goto ERR);
|
119
|
+
calloc2 (I->multi_iters2, I->multi_core+1, "ITEMSET_init2: multi_iters2", goto ERR);
|
120
|
+
calloc2 (I->multi_iters3, I->multi_core+1, "ITEMSET_init2: multi_iters3", goto ERR);
|
121
|
+
calloc2 (I->multi_outputs, I->multi_core+1, "ITEMSET_init2: multi_outputs", goto ERR);
|
122
|
+
calloc2 (I->multi_outputs2, I->multi_core+1, "ITEMSET_init2: multi_outputs2", goto ERR);
|
123
|
+
calloc2 (I->multi_solutions, I->multi_core+1, "ITEMSET_init2: multi_solutions", goto ERR);
|
124
|
+
calloc2 (I->multi_solutions2, I->multi_core+1, "ITEMSET_init2: multi_solutions2", goto ERR);
|
125
|
+
calloc2 (I->multi_fp, I->multi_core+1, "ITEMSET_init2: multi_fp", goto ERR);
|
126
|
+
|
127
|
+
FLOOP (i, 0, MAX(I->multi_core,1))
|
128
|
+
FILE2_open_ (I->multi_fp[i], I->fp, "ITEMSET_init2: multi_fp[i]", goto ERR);
|
129
|
+
#ifdef MULTI_CORE
|
130
|
+
if ( I->multi_core > 0 ){
|
131
|
+
pthread_spin_init (&I->lock_counter, PTHREAD_PROCESS_PRIVATE);
|
132
|
+
pthread_spin_init (&I->lock_sc, PTHREAD_PROCESS_PRIVATE);
|
133
|
+
pthread_spin_init (&I->lock_output, PTHREAD_PROCESS_PRIVATE);
|
134
|
+
}
|
135
|
+
#endif
|
136
|
+
return;
|
137
|
+
ERR:;
|
138
|
+
ITEMSET_end (I);
|
139
|
+
EXIT;
|
140
|
+
}
|
141
|
+
|
142
|
+
/* sum the counters computed by each thread */
|
143
|
+
void ITEMSET_merge_counters (ITEMSET *I){
|
144
|
+
int i;
|
145
|
+
FLOOP (i, 0, MAX(I->multi_core,1)){
|
146
|
+
I->iters += I->multi_iters[i];
|
147
|
+
I->iters2 += I->multi_iters2[i];
|
148
|
+
I->iters3 += I->multi_iters3[i];
|
149
|
+
I->outputs += I->multi_outputs[i];
|
150
|
+
I->outputs2 += I->multi_outputs2[i];
|
151
|
+
I->solutions += I->multi_solutions[i];
|
152
|
+
I->solutions2 += I->multi_solutions2[i];
|
153
|
+
FILE2_flush ( &I->multi_fp[i]);
|
154
|
+
}
|
155
|
+
}
|
156
|
+
|
157
|
+
/*******************************************************************/
|
158
|
+
/* termination of ITEMSET */
|
159
|
+
/*******************************************************************/
|
160
|
+
void ITEMSET_end (ITEMSET *I){
|
161
|
+
int i;
|
162
|
+
QUEUE_end (&I->itemset);
|
163
|
+
QUEUE_end (&I->add);
|
164
|
+
AHEAP_end (&I->topk);
|
165
|
+
fclose2 ( I->fp);
|
166
|
+
mfree (I->sc, I->item_frq, I->itemflag, I->perm, I->set_weight, I->set_occ);
|
167
|
+
|
168
|
+
if ( I->multi_fp )
|
169
|
+
FLOOP (i, 0, MAX(I->multi_core,1)) free2 (I->multi_fp[i].buf);
|
170
|
+
mfree (I->multi_fp, I->multi_iters, I->multi_iters2, I->multi_iters3);
|
171
|
+
mfree (I->multi_outputs, I->multi_outputs2, I->multi_solutions, I->multi_solutions2);
|
172
|
+
#ifdef MULTI_CORE
|
173
|
+
if ( I->multi_core>0 ){
|
174
|
+
pthread_spin_destroy(&I->lock_counter);
|
175
|
+
pthread_spin_destroy(&I->lock_sc);
|
176
|
+
pthread_spin_destroy(&I->lock_output);
|
177
|
+
}
|
178
|
+
#endif
|
179
|
+
ITEMSET_init (I);
|
180
|
+
}
|
181
|
+
|
182
|
+
/*******************************************************************/
|
183
|
+
/* output at the termination of the algorithm */
|
184
|
+
/* print #of itemsets of size k, for each k */
|
185
|
+
/*******************************************************************/
|
186
|
+
void ITEMSET_last_output (ITEMSET *I){
|
187
|
+
QUEUE_ID i;
|
188
|
+
unsigned long long n=0, nn=0;
|
189
|
+
|
190
|
+
ITEMSET_merge_counters (I);
|
191
|
+
if ( I->topk.end > 0 ){
|
192
|
+
i = AHEAP_findmin_head (&I->topk);
|
193
|
+
fprint_WEIGHT (stdout, AHEAP_H (I->topk, i));
|
194
|
+
printf ("\n");
|
195
|
+
return;
|
196
|
+
}
|
197
|
+
FLOOP (i, 0, I->itemset.end+1){
|
198
|
+
n += I->sc[i];
|
199
|
+
if ( I->sc[i] != 0 ) nn = i;
|
200
|
+
}
|
201
|
+
if ( !(I->flag&SHOW_MESSAGE) ) return;
|
202
|
+
if ( n!=0 ){
|
203
|
+
printf ("%llu\n", n);
|
204
|
+
FLOOP (i, 0, nn+1) printf ("%llu\n", I->sc[i]);
|
205
|
+
}
|
206
|
+
print_err ("iters=%lld", I->iters);
|
207
|
+
if ( I->flag&ITEMSET_ITERS2 ) print_err (", iters2=%lld", I->iters2);
|
208
|
+
print_err ("\n");
|
209
|
+
}
|
210
|
+
|
211
|
+
/* output frequency, coverage */
|
212
|
+
void ITEMSET_output_frequency (ITEMSET *I, int core_id){
|
213
|
+
FILE2 *fp = &I->multi_fp[core_id];
|
214
|
+
if ( I->flag&(ITEMSET_FREQ+ITEMSET_PRE_FREQ) ){
|
215
|
+
if ( I->flag&ITEMSET_FREQ ) FILE2_putc (fp, ' ');
|
216
|
+
FILE2_print_WEIGHT (fp, I->frq, 4, '(');
|
217
|
+
FILE2_putc (fp, ')');
|
218
|
+
if ( I->flag&ITEMSET_PRE_FREQ ) FILE2_putc (fp, ' ');
|
219
|
+
}
|
220
|
+
if ( I->flag&ITEMSET_OUTPUT_POSINEGA ){ // output positive sum, negative sum in the occurrence
|
221
|
+
FILE2_putc (fp, ' ');
|
222
|
+
FILE2_print_WEIGHT (fp, I->pfrq, 4, '(');
|
223
|
+
FILE2_print_WEIGHT (fp, I->pfrq-I->frq, 4, ',');
|
224
|
+
FILE2_print_WEIGHT (fp, I->pfrq/(2*I->pfrq-I->frq), 4, ',');
|
225
|
+
FILE2_putc (fp, ')');
|
226
|
+
}
|
227
|
+
}
|
228
|
+
|
229
|
+
#ifdef _trsact_h_
|
230
|
+
void ITEMSET_output_occ (ITEMSET *I, QUEUE *occ, int core_id){
|
231
|
+
QUEUE_ID i;
|
232
|
+
QUEUE_INT *x;
|
233
|
+
FILE2 *fp = &I->multi_fp[core_id];
|
234
|
+
TRSACT *TT = (TRSACT *)(I->X);
|
235
|
+
VEC_ID j, ee = TT->rows_org;
|
236
|
+
int flag = I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT);
|
237
|
+
|
238
|
+
i=0; MQUE_FLOOP_ (*occ, x, TT->occ_unit){
|
239
|
+
if ( (I->flag&ITEMSET_RM_DUP_TRSACT)==0 || *x != ee ){
|
240
|
+
FILE2_print_int (fp, TT->trperm? TT->trperm[*x]: *x,' ');
|
241
|
+
if (flag == ITEMSET_MULTI_OCC_PRINT ){
|
242
|
+
FLOOP (j, 1, (VEC_ID)(TT->occ_unit/sizeof(QUEUE_INT)))
|
243
|
+
FILE2_print_int (fp, *(x+j), ' ');
|
244
|
+
} else if ( flag == (ITEMSET_MULTI_OCC_PRINT+ITEMSET_TRSACT_ID) ){
|
245
|
+
FILE2_print_int (fp, *(x+1), ' ');
|
246
|
+
}
|
247
|
+
}
|
248
|
+
ee = *x;
|
249
|
+
if ( (++i)%256==0 ) ITEMSET_flush (I, fp);
|
250
|
+
}
|
251
|
+
FILE2_putc (fp, '\n');
|
252
|
+
}
|
253
|
+
#endif
|
254
|
+
|
255
|
+
/* output an itemset to the output file */
|
256
|
+
void ITEMSET_output_itemset (ITEMSET *I, QUEUE *occ, int core_id){
|
257
|
+
QUEUE_ID i;
|
258
|
+
QUEUE_INT e;
|
259
|
+
FILE2 *fp = &I->multi_fp[core_id];
|
260
|
+
|
261
|
+
I->multi_outputs[core_id]++;
|
262
|
+
if ( (I->flag&SHOW_PROGRESS ) && (I->iters%(ITEMSET_INTERVAL) == 0) )
|
263
|
+
print_err ("---- %lld solutions in %lld candidates\n", I->solutions, I->outputs);
|
264
|
+
if ( I->itemset.t < I->lb || I->itemset.t > I->ub ) return;
|
265
|
+
if ( (I->flag&ITEMSET_IGNORE_BOUND)==0 && (I->frq < I->frq_lb || I->frq > I->frq_ub) ) return;
|
266
|
+
if ( (I->flag&ITEMSET_IGNORE_BOUND)==0 && (I->pfrq < I->posi_lb || I->pfrq > I->posi_ub || (I->frq - I->pfrq) > I->nega_ub || (I->frq - I->pfrq) < I->nega_lb) ) return;
|
267
|
+
|
268
|
+
I->multi_solutions[core_id]++;
|
269
|
+
if ( I->max_solutions>0 && I->solutions > I->max_solutions ){
|
270
|
+
ITEMSET_last_output (I);
|
271
|
+
ERROR_MES = "reached to maximum number of solutions";
|
272
|
+
EXIT;
|
273
|
+
}
|
274
|
+
if ( I->topk.v ){
|
275
|
+
e = AHEAP_findmin_head (&(I->topk));
|
276
|
+
if ( I->frq > AHEAP_H (I->topk, e) ){
|
277
|
+
AHEAP_chg (&(I->topk), e, I->frq);
|
278
|
+
e = AHEAP_findmin_head (&(I->topk));
|
279
|
+
I->frq_lb = AHEAP_H (I->topk, e);
|
280
|
+
}
|
281
|
+
} else if ( I->fp ){
|
282
|
+
if ( I->flag&ITEMSET_PRE_FREQ ) ITEMSET_output_frequency (I, core_id);
|
283
|
+
if ( (I->flag & ITEMSET_NOT_ITEMSET) == 0 ){
|
284
|
+
#ifdef _agraph_h_
|
285
|
+
if ( I->flag&ITEMSET_OUTPUT_EDGE ){
|
286
|
+
ARY_FLOOP (I->itemset, i, e){
|
287
|
+
FILE2_print_int (fp, AGRAPH_INC_FROM(*((AGRAPH *)(I->X)),e,I->dir), '(' );
|
288
|
+
FILE2_print_int (fp, AGRAPH_INC_TO(*((AGRAPH *)(I->X)),e,I->dir), ',');
|
289
|
+
FILE2_putc (fp, ')');
|
290
|
+
if ( i<I->itemset.t-1 ) FILE2_putc (fp, ' ');
|
291
|
+
if ( (i+1)%256==0 ) ITEMSET_flush (I, fp);
|
292
|
+
}
|
293
|
+
goto NEXT;
|
294
|
+
}
|
295
|
+
#endif
|
296
|
+
ARY_FLOOP (I->itemset, i, e){
|
297
|
+
FILE2_print_int (fp, I->perm? I->perm[e]: e, i==0? 0: ' ');
|
298
|
+
if ( (i+1)%256==0 ) ITEMSET_flush (I, fp);
|
299
|
+
}
|
300
|
+
#ifdef _agraph_h_
|
301
|
+
NEXT:;
|
302
|
+
#endif
|
303
|
+
}
|
304
|
+
if ( !(I->flag&ITEMSET_PRE_FREQ) ) ITEMSET_output_frequency (I, core_id);
|
305
|
+
if ( ((I->flag & ITEMSET_NOT_ITEMSET) == 0) || (I->flag&ITEMSET_FREQ) || (I->flag&ITEMSET_PRE_FREQ) ) FILE2_putc (fp, '\n');
|
306
|
+
|
307
|
+
#ifdef _trsact_h_
|
308
|
+
if (I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT)) ITEMSET_output_occ (I, occ, core_id);
|
309
|
+
#endif
|
310
|
+
}
|
311
|
+
I->sc[I->itemset.t]++;
|
312
|
+
ITEMSET_flush (I, fp);
|
313
|
+
}
|
314
|
+
|
315
|
+
/* output itemsets with adding all combination of "add"
|
316
|
+
at the first call, i has to be "add->t" */
|
317
|
+
void ITEMSET_solution_iter (ITEMSET *I, QUEUE *occ, int core_id){
|
318
|
+
QUEUE_ID t=I->add.t;
|
319
|
+
if ( I->itemset.t > I->ub ) return;
|
320
|
+
ITEMSET_output_itemset (I, occ, core_id);
|
321
|
+
if ( ERROR_MES ) return;
|
322
|
+
BLOOP (I->add.t, I->add.t, 0){
|
323
|
+
ARY_INS (I->itemset, I->add.v[I->add.t]);
|
324
|
+
ITEMSET_solution_iter (I, occ, core_id);
|
325
|
+
if ( ERROR_MES ) return;
|
326
|
+
I->itemset.t--;
|
327
|
+
}
|
328
|
+
I->add.t = t;
|
329
|
+
}
|
330
|
+
|
331
|
+
void ITEMSET_solution (ITEMSET *I, QUEUE *occ, int core_id){
|
332
|
+
QUEUE_ID i;
|
333
|
+
LONG s;
|
334
|
+
if ( I->itemset.t > I->ub ) return;
|
335
|
+
if ( I->flag & ITEMSET_ALL ){
|
336
|
+
if ( I->fp || I->topk.v ) ITEMSET_solution_iter (I, occ, core_id);
|
337
|
+
else {
|
338
|
+
s=1; FLOOP (i, 0, I->add.t+1){
|
339
|
+
I->sc[I->itemset.t+i] += s;
|
340
|
+
s = s*(I->add.t-i)/(i+1);
|
341
|
+
}
|
342
|
+
}
|
343
|
+
} else {
|
344
|
+
FLOOP (i, 0, I->add.t) ARY_INS (I->itemset, I->add.v[i]);
|
345
|
+
ITEMSET_output_itemset (I, occ, core_id);
|
346
|
+
I->itemset.t -= I->add.t;
|
347
|
+
}
|
348
|
+
}
|
349
|
+
|
350
|
+
/*************************************************************************/
|
351
|
+
/* ourput a rule */
|
352
|
+
/*************************************************************************/
|
353
|
+
void ITEMSET_output_rule (ITEMSET *I, QUEUE *occ, double p1, double p2, size_t item, int core_id){
|
354
|
+
FILE2 *fp = &I->multi_fp[core_id];
|
355
|
+
if ( fp->fp && !(I->topk.v) ){
|
356
|
+
FILE2_print_real (fp, p1, 4, '(');
|
357
|
+
FILE2_print_real (fp, p2, 4, ',');
|
358
|
+
FILE2_putc (fp, ')');
|
359
|
+
FILE2_print_int (fp, I->perm[item], ' ');
|
360
|
+
FILE2_puts (fp, " <= ");
|
361
|
+
}
|
362
|
+
if ( I->flag & ITEMSET_RULE ) ITEMSET_output_itemset (I, occ, core_id);
|
363
|
+
else ITEMSET_solution (I, occ, core_id);
|
364
|
+
}
|
365
|
+
/*************************************************************************/
|
366
|
+
/* check all rules for a pair of itemset and item */
|
367
|
+
/*************************************************************************/
|
368
|
+
void ITEMSET_check_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, size_t item, int core_id){
|
369
|
+
double p = w[item]/I->frq, pp, ff;
|
370
|
+
// printf ("[ratio] %f, p=%f, (%f/ %f), %d(%d) <= ", I->ratio_lb, p, w[item], I->frq, I->perm[item], I->itemflag[item]);
|
371
|
+
if ( I->itemflag[item]==1 ) return;
|
372
|
+
if ( w[item] <= -WEIGHTHUGE ) p = 0;
|
373
|
+
pp = p; ff = I->item_frq[item];
|
374
|
+
if ( I->flag & ITEMSET_RULE_SUPP ){ pp = w[item]; ff *= I->total_weight; }
|
375
|
+
|
376
|
+
if ( I->flag & (ITEMSET_RULE_FRQ+ITEMSET_RULE_INFRQ)){
|
377
|
+
if ( (I->flag & ITEMSET_RULE_FRQ) && p < I->ratio_lb ) return;
|
378
|
+
if ( (I->flag & ITEMSET_RULE_INFRQ) && p > I->ratio_ub ) return;
|
379
|
+
ITEMSET_output_rule (I, occ, pp, ff, item, core_id);
|
380
|
+
} else if ( I->flag & (ITEMSET_RULE_RFRQ+ITEMSET_RULE_RINFRQ) ){
|
381
|
+
if ( (I->flag & ITEMSET_RULE_RFRQ) && (1-p) > I->ratio_lb * (1-I->item_frq[item]) ) return;
|
382
|
+
if ( (I->flag & ITEMSET_RULE_RINFRQ) && p > I->ratio_ub * I->item_frq[item] ) return;
|
383
|
+
ITEMSET_output_rule (I, occ, pp, ff, item, core_id);
|
384
|
+
}
|
385
|
+
}
|
386
|
+
|
387
|
+
/*************************************************************************/
|
388
|
+
/* check all rules for an itemset and all items */
|
389
|
+
/*************************************************************************/
|
390
|
+
void ITEMSET_check_all_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, QUEUE *jump, WEIGHT total, int core_id){
|
391
|
+
QUEUE_ID i, t;
|
392
|
+
QUEUE_INT e, f=0, *x;
|
393
|
+
WEIGHT d = I->frq/total;
|
394
|
+
|
395
|
+
// checking out of range for itemset size and (posi/nega) frequency
|
396
|
+
if ( I->itemset.t+I->add.t < I->lb || I->itemset.t>I->ub || (!(I->flag&ITEMSET_ALL) && I->itemset.t+I->add.t>I->ub)) return;
|
397
|
+
if ( !(I->flag&ITEMSET_IGNORE_BOUND) && (I->frq < I->frq_lb || I->frq > I->frq_ub) ) return;
|
398
|
+
if ( !(I->flag&ITEMSET_IGNORE_BOUND) && (I->pfrq < I->posi_lb || I->pfrq > I->posi_ub || (I->frq - I->pfrq) > I->nega_ub || (I->frq - I->pfrq) < I->nega_lb) ) return;
|
399
|
+
|
400
|
+
if ( I->flag&ITEMSET_SET_RULE ){ // itemset->itemset rule for sequence mining
|
401
|
+
FLOOP (i, 0, I->itemset.t-1){
|
402
|
+
if ( I->frq/I->set_weight[i] >= I->setrule_lb && I->fp ){
|
403
|
+
I->sc[i]++;
|
404
|
+
if ( I->flag&ITEMSET_PRE_FREQ ) ITEMSET_output_frequency (I, core_id);
|
405
|
+
FLOOP (t, 0, I->itemset.t){
|
406
|
+
FILE2_print_int (&I->multi_fp[core_id], I->itemset.v[t], t?' ':0);
|
407
|
+
if ( t == i ){
|
408
|
+
FILE2_putc (&I->multi_fp[core_id], ' ');
|
409
|
+
FILE2_putc (&I->multi_fp[core_id], '=');
|
410
|
+
FILE2_putc (&I->multi_fp[core_id], '>');
|
411
|
+
}
|
412
|
+
}
|
413
|
+
if ( !(I->flag&ITEMSET_PRE_FREQ) ) ITEMSET_output_frequency ( I, core_id);
|
414
|
+
FILE2_putc (&I->multi_fp[core_id], ' ');
|
415
|
+
FILE2_print_real (&I->multi_fp[core_id], I->frq/I->set_weight[i], 4, '(');
|
416
|
+
FILE2_putc (&I->multi_fp[core_id], ')');
|
417
|
+
FILE2_putc (&I->multi_fp[core_id], '\n');
|
418
|
+
#ifdef _trsact_h_
|
419
|
+
if ( I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT) )
|
420
|
+
ITEMSET_output_occ (I, I->set_occ[i], core_id);
|
421
|
+
#endif
|
422
|
+
ITEMSET_flush (I, &I->multi_fp[core_id]);
|
423
|
+
}
|
424
|
+
}
|
425
|
+
}
|
426
|
+
// constraint of relational frequency
|
427
|
+
if ( ((I->flag&ITEMSET_RFRQ)==0 || d >= I->prob_lb * I->prob )
|
428
|
+
&& ((I->flag&ITEMSET_RINFRQ)==0 || d <= I->prob * I->prob_ub) ){
|
429
|
+
if ( I->flag&ITEMSET_RULE ){ // rule mining routines
|
430
|
+
if ( I->itemset.t == 0 ) return;
|
431
|
+
if ( I->target < I->item_max ){
|
432
|
+
ITEMSET_check_rule (I, w, occ, I->target, core_id); if (ERROR_MES) return;
|
433
|
+
} else {
|
434
|
+
if ( I->flag & (ITEMSET_RULE_FRQ + ITEMSET_RULE_RFRQ) ){
|
435
|
+
if ( I->add.t>0 ){
|
436
|
+
// if ( I->itemflag[I->add.v[0]] ) // for POSI_EQUISUPP (occ_w[e] may not be 100%, in the case)
|
437
|
+
f = I->add.v[I->add.t-1]; t = I->add.t; I->add.t--;
|
438
|
+
FLOOP (i, 0, t){
|
439
|
+
e = I->add.v[i];
|
440
|
+
I->add.v[i] = f;
|
441
|
+
ITEMSET_check_rule (I, w, occ, e, core_id); if (ERROR_MES) return;
|
442
|
+
I->add.v[i] = e;
|
443
|
+
}
|
444
|
+
I->add.t++;
|
445
|
+
}
|
446
|
+
MQUE_FLOOP (*jump, x)
|
447
|
+
ITEMSET_check_rule (I, w, occ, *x, core_id); if (ERROR_MES) return;
|
448
|
+
} else {
|
449
|
+
if ( I->flag & (ITEMSET_RULE_INFRQ + ITEMSET_RULE_RINFRQ) ){
|
450
|
+
// ARY_FLOOP ( *jump, i, e ) I->itemflag[e]--;
|
451
|
+
FLOOP (i, 0, I->item_max){
|
452
|
+
if ( I->itemflag[i] != 1 ){
|
453
|
+
ITEMSET_check_rule (I, w, occ, i, core_id); if (ERROR_MES) return;
|
454
|
+
}
|
455
|
+
}
|
456
|
+
// ARY_FLOOP ( *jump, i, e ) I->itemflag[e]++;
|
457
|
+
// }
|
458
|
+
// ARY_FLOOP ( *jump, i, e ) ITEMSET_check_rule (I, w, occ, e);
|
459
|
+
}
|
460
|
+
}
|
461
|
+
}
|
462
|
+
} else { // usual mining (not rule mining)
|
463
|
+
if ( I->fp && (I->flag&(ITEMSET_RFRQ+ITEMSET_RINFRQ))){
|
464
|
+
FILE2_print_real (&I->multi_fp[core_id], d, 4, '[');
|
465
|
+
FILE2_print_real (&I->multi_fp[core_id], I->prob, 4, ',');
|
466
|
+
FILE2_putc (&I->multi_fp[core_id], ']');
|
467
|
+
}
|
468
|
+
ITEMSET_solution (I, occ, core_id);
|
469
|
+
}
|
470
|
+
}
|
471
|
+
}
|
472
|
+
|
473
|
+
#endif
|