nysol-zdd 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ext/zdd_so/BDD.cc +495 -0
- data/ext/zdd_so/BDD.h +356 -0
- data/ext/zdd_so/BDDDG.cc +1818 -0
- data/ext/zdd_so/BDDDG.h +107 -0
- data/ext/zdd_so/BDDHASH.cc +91 -0
- data/ext/zdd_so/BtoI.cc +503 -0
- data/ext/zdd_so/BtoI.h +144 -0
- data/ext/zdd_so/CtoI.cc +1072 -0
- data/ext/zdd_so/CtoI.h +186 -0
- data/ext/zdd_so/MLZBDDV.cc +153 -0
- data/ext/zdd_so/MLZBDDV.h +42 -0
- data/ext/zdd_so/SOP.cc +608 -0
- data/ext/zdd_so/SOP.h +199 -0
- data/ext/zdd_so/ZBDD.cc +1035 -0
- data/ext/zdd_so/ZBDD.h +243 -0
- data/ext/zdd_so/ZBDDDG.cc +1834 -0
- data/ext/zdd_so/ZBDDDG.h +105 -0
- data/ext/zdd_so/ZBDDHASH.cc +91 -0
- data/ext/zdd_so/bddc.c +2816 -0
- data/ext/zdd_so/bddc.h +132 -0
- data/ext/zdd_so/extconf.rb +25 -0
- data/ext/zdd_so/include/aheap.c +211 -0
- data/ext/zdd_so/include/aheap.h +111 -0
- data/ext/zdd_so/include/base.c +93 -0
- data/ext/zdd_so/include/base.h +60 -0
- data/ext/zdd_so/include/itemset.c +473 -0
- data/ext/zdd_so/include/itemset.h +153 -0
- data/ext/zdd_so/include/problem.c +371 -0
- data/ext/zdd_so/include/problem.h +160 -0
- data/ext/zdd_so/include/queue.c +518 -0
- data/ext/zdd_so/include/queue.h +177 -0
- data/ext/zdd_so/include/sgraph.c +331 -0
- data/ext/zdd_so/include/sgraph.h +170 -0
- data/ext/zdd_so/include/stdlib2.c +832 -0
- data/ext/zdd_so/include/stdlib2.h +746 -0
- data/ext/zdd_so/include/trsact.c +723 -0
- data/ext/zdd_so/include/trsact.h +167 -0
- data/ext/zdd_so/include/vec.c +583 -0
- data/ext/zdd_so/include/vec.h +159 -0
- data/ext/zdd_so/lcm-vsop.cc +596 -0
- data/ext/zdd_so/print.cc +683 -0
- data/ext/zdd_so/table.cc +330 -0
- data/ext/zdd_so/vsop.h +88 -0
- data/ext/zdd_so/zdd_so.cpp +3277 -0
- data/lib/nysol/zdd.rb +31 -0
- metadata +131 -0
@@ -0,0 +1,93 @@
|
|
1
|
+
/*
|
2
|
+
blocked memory allocation library
|
3
|
+
12/Mar/2002 by Takeaki Uno e-mail:uno@nii.jp,
|
4
|
+
homepage: http://research.nii.ac.jp/~uno/index.html */
|
5
|
+
/* This program is available for only academic use, basically.
|
6
|
+
Anyone can modify this program, but he/she has to write down
|
7
|
+
the change of the modification on the top of the source code.
|
8
|
+
Neither contact nor appointment to Takeaki Uno is needed.
|
9
|
+
If one wants to re-distribute this code, do not forget to
|
10
|
+
refer the newest code, and show the link to homepage of
|
11
|
+
Takeaki Uno, to notify the news about the codes for the users.
|
12
|
+
For the commercial use, please make a contact to Takeaki Uno. */
|
13
|
+
|
14
|
+
#ifndef _base_c_
|
15
|
+
#define _base_c_
|
16
|
+
|
17
|
+
#include"base.h"
|
18
|
+
|
19
|
+
BASE INIT_BASE = {TYPE_BASE,NULL,0,0,0,0,-1,NULL};
|
20
|
+
|
21
|
+
/* initialization, and allocate memory for header */
|
22
|
+
void BASE_alloc (BASE *B, int unit, int block_siz){
|
23
|
+
*B = INIT_BASE;
|
24
|
+
B->dellist = B;
|
25
|
+
B->unit = unit;
|
26
|
+
B->block_siz = block_siz;
|
27
|
+
B->num = block_siz;
|
28
|
+
B->block_num = -1;
|
29
|
+
calloc2 (B->base, 20, "BASE_alloc: B->base", EXIT);
|
30
|
+
B->block_end = 20;
|
31
|
+
}
|
32
|
+
|
33
|
+
/* termination */
|
34
|
+
void BASE_end (BASE *B){
|
35
|
+
int i;
|
36
|
+
FLOOP (i, 0, B->block_end) free2 (B->base[i]);
|
37
|
+
free2 (B->base);
|
38
|
+
*B = INIT_BASE;
|
39
|
+
}
|
40
|
+
|
41
|
+
/* return pointer to the cell corresponding to the given index */
|
42
|
+
void *BASE_pnt (BASE *B, size_t i){
|
43
|
+
return ( B->base[i/BASE_BLOCK] + (i%BASE_BLOCK)*B->unit);
|
44
|
+
}
|
45
|
+
/* return index corresponding to the given pointer */
|
46
|
+
size_t BASE_index (BASE *B, void *x){
|
47
|
+
size_t i;
|
48
|
+
FLOOP (i, 0, (size_t)(B->block_end+1)){
|
49
|
+
if ( ((char*)x)>= B->base[i] && ((char*)x)<=B->base[i]+B->unit*BASE_BLOCK )
|
50
|
+
return ( i*BASE_BLOCK + ((size_t)(((char *)x) - B->base[i])) / B->unit);
|
51
|
+
}
|
52
|
+
return (0);
|
53
|
+
}
|
54
|
+
|
55
|
+
/* increment the current memory block pointer and (re)allcate memory if necessary */
|
56
|
+
void *BASE_get_memory (BASE *B, int i){
|
57
|
+
B->num += i;
|
58
|
+
if ( B->num >= B->block_siz ){ /* if reach to the end of base array */
|
59
|
+
B->num = i; /* allocate one more base array, and increment the counter */
|
60
|
+
B->block_num++;
|
61
|
+
reallocx(B->base, B->block_end, B->block_num, NULL, "BASE:block", EXIT0);
|
62
|
+
if ( B->base[B->block_num] == NULL )
|
63
|
+
malloc2 (B->base[B->block_num], B->block_siz*B->unit, "BASE_new: base", EXIT0);
|
64
|
+
return (B->base[B->block_num]);
|
65
|
+
}
|
66
|
+
return (B->base[B->block_num] + (B->num-i)*B->unit);
|
67
|
+
}
|
68
|
+
|
69
|
+
|
70
|
+
/* allocate new cell */
|
71
|
+
void *BASE_new (BASE *B){
|
72
|
+
char *x;
|
73
|
+
|
74
|
+
/* use deleted cell if it exists */
|
75
|
+
if ( B->dellist != ((void *)B) ){
|
76
|
+
x = (char *)B->dellist; /* return the deleted cell */
|
77
|
+
B->dellist = (void *)(*((char **)x)); /* increment the head of the list */
|
78
|
+
} else {
|
79
|
+
/* take a new cell from the base array if no deleted one exists */
|
80
|
+
x = (char *)BASE_get_memory (B, 1);
|
81
|
+
}
|
82
|
+
return (x);
|
83
|
+
}
|
84
|
+
|
85
|
+
/* delete one cell. (add to the deleted list) */
|
86
|
+
void BASE_del (BASE *B, void *x){
|
87
|
+
*((void **)x) = B->dellist;
|
88
|
+
B->dellist = x;
|
89
|
+
}
|
90
|
+
|
91
|
+
#endif
|
92
|
+
|
93
|
+
|
@@ -0,0 +1,60 @@
|
|
1
|
+
/*
|
2
|
+
blocked memory allocation library
|
3
|
+
12/Mar/2002 by Takeaki Uno e-mail:uno@nii.jp,
|
4
|
+
homepage: http://research.nii.ac.jp/~uno/index.html */
|
5
|
+
/* This program is available for only academic use, basically.
|
6
|
+
Anyone can modify this program, but he/she has to write down
|
7
|
+
the change of the modification on the top of the source code.
|
8
|
+
Neither contact nor appointment to Takeaki Uno is needed.
|
9
|
+
If one wants to re-distribute this code, do not forget to
|
10
|
+
refer the newest code, and show the link to homepage of
|
11
|
+
Takeaki Uno, to notify the news about the codes for the users.
|
12
|
+
For the commercial use, please make a contact to Takeaki Uno. */
|
13
|
+
|
14
|
+
|
15
|
+
#ifndef _base_h_
|
16
|
+
#define _base_h_
|
17
|
+
|
18
|
+
#include"stdlib2.h"
|
19
|
+
|
20
|
+
/* structure for base array */
|
21
|
+
#define BASE_UNIT 16
|
22
|
+
#define BASE_BLOCK 65536
|
23
|
+
|
24
|
+
typedef struct {
|
25
|
+
unsigned char type;
|
26
|
+
char **base;
|
27
|
+
int block_siz; // size of one block of memory
|
28
|
+
int block_num; // currently using block
|
29
|
+
int unit; // size of one unit memory
|
30
|
+
int num; // current position in a block
|
31
|
+
int block_end; // current end of the block
|
32
|
+
void *dellist;
|
33
|
+
} BASE;
|
34
|
+
|
35
|
+
extern BASE INIT_BASE;
|
36
|
+
|
37
|
+
/* initialization, and allocate memory for header */
|
38
|
+
void BASE_alloc (BASE *B, int unit, int block_siz);
|
39
|
+
|
40
|
+
/* termination */
|
41
|
+
void BASE_end (BASE *B);
|
42
|
+
|
43
|
+
/* return pointer to the cell corresponding to the given index */
|
44
|
+
void *BASE_pnt (BASE *B, size_t i);
|
45
|
+
|
46
|
+
/* return index corresponding to the given pointer */
|
47
|
+
size_t BASE_index (BASE *B, void *x);
|
48
|
+
|
49
|
+
/* increment the current memory block pointer and (re)allcate memory if necessary */
|
50
|
+
void *BASE_get_memory (BASE *B, int i);
|
51
|
+
|
52
|
+
/* allocate new cell */
|
53
|
+
void *BASE_new (BASE *B);
|
54
|
+
|
55
|
+
/* delete one cell. (add to the deleted list) */
|
56
|
+
void BASE_del (BASE *B, void *x);
|
57
|
+
|
58
|
+
|
59
|
+
#endif
|
60
|
+
|
@@ -0,0 +1,473 @@
|
|
1
|
+
/* itemset search input/output common routines
|
2
|
+
25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
|
3
|
+
homepage: http://research.nii.ac.jp/~uno/index.html */
|
4
|
+
/* This program is available for only academic use, basically.
|
5
|
+
Anyone can modify this program, but he/she has to write down
|
6
|
+
the change of the modification on the top of the source code.
|
7
|
+
Neither contact nor appointment to Takeaki Uno is needed.
|
8
|
+
If one wants to re-distribute this code, do not forget to
|
9
|
+
refer the newest code, and show the link to homepage of
|
10
|
+
Takeaki Uno, to notify the news about the codes for the users.
|
11
|
+
For the commercial use, please make a contact to Takeaki Uno. */
|
12
|
+
|
13
|
+
/* routines for itemset mining */
|
14
|
+
|
15
|
+
#ifndef _itemset_c_
|
16
|
+
#define _itemset_c_
|
17
|
+
|
18
|
+
#include"itemset.h"
|
19
|
+
#include"queue.c"
|
20
|
+
#include"aheap.c"
|
21
|
+
|
22
|
+
/* flush the write buffer, available for multi-core mode */
|
23
|
+
void ITEMSET_flush (ITEMSET *I, FILE2 *fp){
|
24
|
+
if ( !(I->flag&ITEMSET_MULTI_OUTPUT) || (fp->buf-fp->buf_org) > FILE2_BUFSIZ/2 ){
|
25
|
+
SPIN_LOCK(I->multi_core, I->lock_output);
|
26
|
+
FILE2_flush (fp);
|
27
|
+
SPIN_UNLOCK(I->multi_core, I->lock_output);
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
/* Output information about ITEMSET structure. flag&1: print frequency constraint */
|
32
|
+
void ITEMSET_print (ITEMSET *I, int flag){
|
33
|
+
if ( I->lb>0 || I->ub<INTHUGE ){
|
34
|
+
if ( I->lb > 0 ) print_err ("%d <= ", I->lb);
|
35
|
+
print_err ("itemsets ");
|
36
|
+
if ( I->ub < INTHUGE ) print_err (" <= %d\n", I->ub);
|
37
|
+
print_err ("\n");
|
38
|
+
}
|
39
|
+
if ( flag&1 ){
|
40
|
+
if ( I->frq_lb > -WEIGHTHUGE ) print_err (WEIGHTF" <=", I->frq_lb);
|
41
|
+
print_err (" frequency ");
|
42
|
+
if ( I->frq_ub < WEIGHTHUGE ) print_err (" <="WEIGHTF, I->frq_ub);
|
43
|
+
print_err ("\n");
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
/* ITEMSET initialization */
|
48
|
+
void ITEMSET_init (ITEMSET *I){
|
49
|
+
I->flag = 0;
|
50
|
+
I->iters = I->iters2 = I->iters3 = 0;
|
51
|
+
I->solutions = I->solutions2 = I->max_solutions = I->outputs = I->outputs2 = 0;
|
52
|
+
I->topk.end = 0;
|
53
|
+
I->item_max = I->item_max_org = 0;
|
54
|
+
I->ub = I->len_ub = I->gap_ub = INTHUGE;
|
55
|
+
I->lb = I->len_lb = I->gap_lb = 0;
|
56
|
+
I->frq = I->pfrq = I->total_weight = 0;
|
57
|
+
I->ratio = I->prob = 0.0;
|
58
|
+
I->posi_ub = I->nega_ub = I->frq_ub = WEIGHTHUGE;
|
59
|
+
I->posi_lb = I->nega_lb = I->frq_lb = I->setrule_lb = -WEIGHTHUGE;
|
60
|
+
I->dir = 0;
|
61
|
+
I->target = INTHUGE;
|
62
|
+
I->prob_ub = I->ratio_ub = I->rposi_ub = 1;
|
63
|
+
I->prob_lb = I->ratio_lb = I->rposi_lb = 0;
|
64
|
+
I->itemflag = NULL;
|
65
|
+
I->perm = NULL;
|
66
|
+
I->item_frq = NULL;
|
67
|
+
I->sc = NULL;
|
68
|
+
I->X = NULL;
|
69
|
+
I->fp = NULL;
|
70
|
+
I->topk = INIT_AHEAP;
|
71
|
+
I->itemset = I->add = INIT_QUEUE;
|
72
|
+
I->set_weight = NULL;
|
73
|
+
I->set_occ = NULL;
|
74
|
+
|
75
|
+
I->multi_iters = I->multi_iters2 = I->multi_iters3 = NULL;
|
76
|
+
I->multi_outputs = I->multi_outputs2 = NULL;
|
77
|
+
I->multi_solutions = I->multi_solutions2 = NULL;
|
78
|
+
I->multi_fp = NULL;
|
79
|
+
I->multi_core = 0;
|
80
|
+
}
|
81
|
+
|
82
|
+
|
83
|
+
/* second initialization
|
84
|
+
topk.end>0 => initialize heap for topk mining */
|
85
|
+
/* all pointers will be set to 0, but not for */
|
86
|
+
/* if topK mining, set topk.end to "K" */
|
87
|
+
void ITEMSET_init2 (ITEMSET *I, char *fname, PERM *perm, QUEUE_INT item_max, size_t item_max_org){
|
88
|
+
LONG i;
|
89
|
+
size_t siz = (I->flag&ITEMSET_USE_ORG)?item_max_org+2: item_max+2;
|
90
|
+
I->prob = I->ratio = 1.0;
|
91
|
+
I->frq = 0;
|
92
|
+
I->perm = perm;
|
93
|
+
if ( I->topk.end>0 ){
|
94
|
+
AHEAP_alloc (&I->topk, I->topk.end);
|
95
|
+
FLOOP (i, 0, I->topk.end) AHEAP_chg (&I->topk, (AHEAP_ID)i, -WEIGHTHUGE);
|
96
|
+
I->frq_lb = -WEIGHTHUGE;
|
97
|
+
} else I->topk.v = NULL;
|
98
|
+
QUEUE_alloc (&I->itemset, (QUEUE_ID)siz); I->itemset.end = (QUEUE_ID)siz;
|
99
|
+
if ( I->flag&ITEMSET_ADD ) QUEUE_alloc (&I->add, (QUEUE_ID)siz);
|
100
|
+
calloc2 (I->sc, siz+2, "ITEMSET_init2: sc", goto ERR);
|
101
|
+
if ( I->flag&ITEMSET_SET_RULE ){
|
102
|
+
calloc2 (I->set_weight, siz, "ITEMSET_init2: set_weight", goto ERR);
|
103
|
+
if ( I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT) )
|
104
|
+
calloc2 (I->set_occ, siz, "ITEMSET_init2: set_weight", goto ERR);
|
105
|
+
}
|
106
|
+
I->iters = I->iters2 = I->solutions = 0;
|
107
|
+
I->item_max = item_max;
|
108
|
+
I->item_max_org = (QUEUE_INT)item_max_org;
|
109
|
+
if ( fname ){ fopen2 (I->fp, fname, "w", "ITEMSET_init2", goto ERR);}
|
110
|
+
else I->fp = 0;
|
111
|
+
if ( I->flag&ITEMSET_ITEMFRQ )
|
112
|
+
malloc2 (I->item_frq, item_max+2, "ITEMSET_init2: item_frqs", goto ERR);
|
113
|
+
if ( I->flag&ITEMSET_RULE ){
|
114
|
+
calloc2 (I->itemflag, item_max+2, "ITEMSET_init2: item_flag", goto ERR);
|
115
|
+
}
|
116
|
+
I->total_weight = 1;
|
117
|
+
|
118
|
+
calloc2 (I->multi_iters, I->multi_core+1, "ITEMSET_init2: multi_iters", goto ERR);
|
119
|
+
calloc2 (I->multi_iters2, I->multi_core+1, "ITEMSET_init2: multi_iters2", goto ERR);
|
120
|
+
calloc2 (I->multi_iters3, I->multi_core+1, "ITEMSET_init2: multi_iters3", goto ERR);
|
121
|
+
calloc2 (I->multi_outputs, I->multi_core+1, "ITEMSET_init2: multi_outputs", goto ERR);
|
122
|
+
calloc2 (I->multi_outputs2, I->multi_core+1, "ITEMSET_init2: multi_outputs2", goto ERR);
|
123
|
+
calloc2 (I->multi_solutions, I->multi_core+1, "ITEMSET_init2: multi_solutions", goto ERR);
|
124
|
+
calloc2 (I->multi_solutions2, I->multi_core+1, "ITEMSET_init2: multi_solutions2", goto ERR);
|
125
|
+
calloc2 (I->multi_fp, I->multi_core+1, "ITEMSET_init2: multi_fp", goto ERR);
|
126
|
+
|
127
|
+
FLOOP (i, 0, MAX(I->multi_core,1))
|
128
|
+
FILE2_open_ (I->multi_fp[i], I->fp, "ITEMSET_init2: multi_fp[i]", goto ERR);
|
129
|
+
#ifdef MULTI_CORE
|
130
|
+
if ( I->multi_core > 0 ){
|
131
|
+
pthread_spin_init (&I->lock_counter, PTHREAD_PROCESS_PRIVATE);
|
132
|
+
pthread_spin_init (&I->lock_sc, PTHREAD_PROCESS_PRIVATE);
|
133
|
+
pthread_spin_init (&I->lock_output, PTHREAD_PROCESS_PRIVATE);
|
134
|
+
}
|
135
|
+
#endif
|
136
|
+
return;
|
137
|
+
ERR:;
|
138
|
+
ITEMSET_end (I);
|
139
|
+
EXIT;
|
140
|
+
}
|
141
|
+
|
142
|
+
/* sum the counters computed by each thread */
|
143
|
+
void ITEMSET_merge_counters (ITEMSET *I){
|
144
|
+
int i;
|
145
|
+
FLOOP (i, 0, MAX(I->multi_core,1)){
|
146
|
+
I->iters += I->multi_iters[i];
|
147
|
+
I->iters2 += I->multi_iters2[i];
|
148
|
+
I->iters3 += I->multi_iters3[i];
|
149
|
+
I->outputs += I->multi_outputs[i];
|
150
|
+
I->outputs2 += I->multi_outputs2[i];
|
151
|
+
I->solutions += I->multi_solutions[i];
|
152
|
+
I->solutions2 += I->multi_solutions2[i];
|
153
|
+
FILE2_flush ( &I->multi_fp[i]);
|
154
|
+
}
|
155
|
+
}
|
156
|
+
|
157
|
+
/*******************************************************************/
|
158
|
+
/* termination of ITEMSET */
|
159
|
+
/*******************************************************************/
|
160
|
+
void ITEMSET_end (ITEMSET *I){
|
161
|
+
int i;
|
162
|
+
QUEUE_end (&I->itemset);
|
163
|
+
QUEUE_end (&I->add);
|
164
|
+
AHEAP_end (&I->topk);
|
165
|
+
fclose2 ( I->fp);
|
166
|
+
mfree (I->sc, I->item_frq, I->itemflag, I->perm, I->set_weight, I->set_occ);
|
167
|
+
|
168
|
+
if ( I->multi_fp )
|
169
|
+
FLOOP (i, 0, MAX(I->multi_core,1)) free2 (I->multi_fp[i].buf);
|
170
|
+
mfree (I->multi_fp, I->multi_iters, I->multi_iters2, I->multi_iters3);
|
171
|
+
mfree (I->multi_outputs, I->multi_outputs2, I->multi_solutions, I->multi_solutions2);
|
172
|
+
#ifdef MULTI_CORE
|
173
|
+
if ( I->multi_core>0 ){
|
174
|
+
pthread_spin_destroy(&I->lock_counter);
|
175
|
+
pthread_spin_destroy(&I->lock_sc);
|
176
|
+
pthread_spin_destroy(&I->lock_output);
|
177
|
+
}
|
178
|
+
#endif
|
179
|
+
ITEMSET_init (I);
|
180
|
+
}
|
181
|
+
|
182
|
+
/*******************************************************************/
|
183
|
+
/* output at the termination of the algorithm */
|
184
|
+
/* print #of itemsets of size k, for each k */
|
185
|
+
/*******************************************************************/
|
186
|
+
void ITEMSET_last_output (ITEMSET *I){
|
187
|
+
QUEUE_ID i;
|
188
|
+
unsigned long long n=0, nn=0;
|
189
|
+
|
190
|
+
ITEMSET_merge_counters (I);
|
191
|
+
if ( I->topk.end > 0 ){
|
192
|
+
i = AHEAP_findmin_head (&I->topk);
|
193
|
+
fprint_WEIGHT (stdout, AHEAP_H (I->topk, i));
|
194
|
+
printf ("\n");
|
195
|
+
return;
|
196
|
+
}
|
197
|
+
FLOOP (i, 0, I->itemset.end+1){
|
198
|
+
n += I->sc[i];
|
199
|
+
if ( I->sc[i] != 0 ) nn = i;
|
200
|
+
}
|
201
|
+
if ( !(I->flag&SHOW_MESSAGE) ) return;
|
202
|
+
if ( n!=0 ){
|
203
|
+
printf ("%llu\n", n);
|
204
|
+
FLOOP (i, 0, nn+1) printf ("%llu\n", I->sc[i]);
|
205
|
+
}
|
206
|
+
print_err ("iters=%lld", I->iters);
|
207
|
+
if ( I->flag&ITEMSET_ITERS2 ) print_err (", iters2=%lld", I->iters2);
|
208
|
+
print_err ("\n");
|
209
|
+
}
|
210
|
+
|
211
|
+
/* output frequency, coverage */
|
212
|
+
void ITEMSET_output_frequency (ITEMSET *I, int core_id){
|
213
|
+
FILE2 *fp = &I->multi_fp[core_id];
|
214
|
+
if ( I->flag&(ITEMSET_FREQ+ITEMSET_PRE_FREQ) ){
|
215
|
+
if ( I->flag&ITEMSET_FREQ ) FILE2_putc (fp, ' ');
|
216
|
+
FILE2_print_WEIGHT (fp, I->frq, 4, '(');
|
217
|
+
FILE2_putc (fp, ')');
|
218
|
+
if ( I->flag&ITEMSET_PRE_FREQ ) FILE2_putc (fp, ' ');
|
219
|
+
}
|
220
|
+
if ( I->flag&ITEMSET_OUTPUT_POSINEGA ){ // output positive sum, negative sum in the occurrence
|
221
|
+
FILE2_putc (fp, ' ');
|
222
|
+
FILE2_print_WEIGHT (fp, I->pfrq, 4, '(');
|
223
|
+
FILE2_print_WEIGHT (fp, I->pfrq-I->frq, 4, ',');
|
224
|
+
FILE2_print_WEIGHT (fp, I->pfrq/(2*I->pfrq-I->frq), 4, ',');
|
225
|
+
FILE2_putc (fp, ')');
|
226
|
+
}
|
227
|
+
}
|
228
|
+
|
229
|
+
#ifdef _trsact_h_
|
230
|
+
void ITEMSET_output_occ (ITEMSET *I, QUEUE *occ, int core_id){
|
231
|
+
QUEUE_ID i;
|
232
|
+
QUEUE_INT *x;
|
233
|
+
FILE2 *fp = &I->multi_fp[core_id];
|
234
|
+
TRSACT *TT = (TRSACT *)(I->X);
|
235
|
+
VEC_ID j, ee = TT->rows_org;
|
236
|
+
int flag = I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT);
|
237
|
+
|
238
|
+
i=0; MQUE_FLOOP_ (*occ, x, TT->occ_unit){
|
239
|
+
if ( (I->flag&ITEMSET_RM_DUP_TRSACT)==0 || *x != ee ){
|
240
|
+
FILE2_print_int (fp, TT->trperm? TT->trperm[*x]: *x,' ');
|
241
|
+
if (flag == ITEMSET_MULTI_OCC_PRINT ){
|
242
|
+
FLOOP (j, 1, (VEC_ID)(TT->occ_unit/sizeof(QUEUE_INT)))
|
243
|
+
FILE2_print_int (fp, *(x+j), ' ');
|
244
|
+
} else if ( flag == (ITEMSET_MULTI_OCC_PRINT+ITEMSET_TRSACT_ID) ){
|
245
|
+
FILE2_print_int (fp, *(x+1), ' ');
|
246
|
+
}
|
247
|
+
}
|
248
|
+
ee = *x;
|
249
|
+
if ( (++i)%256==0 ) ITEMSET_flush (I, fp);
|
250
|
+
}
|
251
|
+
FILE2_putc (fp, '\n');
|
252
|
+
}
|
253
|
+
#endif
|
254
|
+
|
255
|
+
/* output an itemset to the output file */
|
256
|
+
void ITEMSET_output_itemset (ITEMSET *I, QUEUE *occ, int core_id){
|
257
|
+
QUEUE_ID i;
|
258
|
+
QUEUE_INT e;
|
259
|
+
FILE2 *fp = &I->multi_fp[core_id];
|
260
|
+
|
261
|
+
I->multi_outputs[core_id]++;
|
262
|
+
if ( (I->flag&SHOW_PROGRESS ) && (I->iters%(ITEMSET_INTERVAL) == 0) )
|
263
|
+
print_err ("---- %lld solutions in %lld candidates\n", I->solutions, I->outputs);
|
264
|
+
if ( I->itemset.t < I->lb || I->itemset.t > I->ub ) return;
|
265
|
+
if ( (I->flag&ITEMSET_IGNORE_BOUND)==0 && (I->frq < I->frq_lb || I->frq > I->frq_ub) ) return;
|
266
|
+
if ( (I->flag&ITEMSET_IGNORE_BOUND)==0 && (I->pfrq < I->posi_lb || I->pfrq > I->posi_ub || (I->frq - I->pfrq) > I->nega_ub || (I->frq - I->pfrq) < I->nega_lb) ) return;
|
267
|
+
|
268
|
+
I->multi_solutions[core_id]++;
|
269
|
+
if ( I->max_solutions>0 && I->solutions > I->max_solutions ){
|
270
|
+
ITEMSET_last_output (I);
|
271
|
+
ERROR_MES = "reached to maximum number of solutions";
|
272
|
+
EXIT;
|
273
|
+
}
|
274
|
+
if ( I->topk.v ){
|
275
|
+
e = AHEAP_findmin_head (&(I->topk));
|
276
|
+
if ( I->frq > AHEAP_H (I->topk, e) ){
|
277
|
+
AHEAP_chg (&(I->topk), e, I->frq);
|
278
|
+
e = AHEAP_findmin_head (&(I->topk));
|
279
|
+
I->frq_lb = AHEAP_H (I->topk, e);
|
280
|
+
}
|
281
|
+
} else if ( I->fp ){
|
282
|
+
if ( I->flag&ITEMSET_PRE_FREQ ) ITEMSET_output_frequency (I, core_id);
|
283
|
+
if ( (I->flag & ITEMSET_NOT_ITEMSET) == 0 ){
|
284
|
+
#ifdef _agraph_h_
|
285
|
+
if ( I->flag&ITEMSET_OUTPUT_EDGE ){
|
286
|
+
ARY_FLOOP (I->itemset, i, e){
|
287
|
+
FILE2_print_int (fp, AGRAPH_INC_FROM(*((AGRAPH *)(I->X)),e,I->dir), '(' );
|
288
|
+
FILE2_print_int (fp, AGRAPH_INC_TO(*((AGRAPH *)(I->X)),e,I->dir), ',');
|
289
|
+
FILE2_putc (fp, ')');
|
290
|
+
if ( i<I->itemset.t-1 ) FILE2_putc (fp, ' ');
|
291
|
+
if ( (i+1)%256==0 ) ITEMSET_flush (I, fp);
|
292
|
+
}
|
293
|
+
goto NEXT;
|
294
|
+
}
|
295
|
+
#endif
|
296
|
+
ARY_FLOOP (I->itemset, i, e){
|
297
|
+
FILE2_print_int (fp, I->perm? I->perm[e]: e, i==0? 0: ' ');
|
298
|
+
if ( (i+1)%256==0 ) ITEMSET_flush (I, fp);
|
299
|
+
}
|
300
|
+
#ifdef _agraph_h_
|
301
|
+
NEXT:;
|
302
|
+
#endif
|
303
|
+
}
|
304
|
+
if ( !(I->flag&ITEMSET_PRE_FREQ) ) ITEMSET_output_frequency (I, core_id);
|
305
|
+
if ( ((I->flag & ITEMSET_NOT_ITEMSET) == 0) || (I->flag&ITEMSET_FREQ) || (I->flag&ITEMSET_PRE_FREQ) ) FILE2_putc (fp, '\n');
|
306
|
+
|
307
|
+
#ifdef _trsact_h_
|
308
|
+
if (I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT)) ITEMSET_output_occ (I, occ, core_id);
|
309
|
+
#endif
|
310
|
+
}
|
311
|
+
I->sc[I->itemset.t]++;
|
312
|
+
ITEMSET_flush (I, fp);
|
313
|
+
}
|
314
|
+
|
315
|
+
/* output itemsets with adding all combination of "add"
|
316
|
+
at the first call, i has to be "add->t" */
|
317
|
+
void ITEMSET_solution_iter (ITEMSET *I, QUEUE *occ, int core_id){
|
318
|
+
QUEUE_ID t=I->add.t;
|
319
|
+
if ( I->itemset.t > I->ub ) return;
|
320
|
+
ITEMSET_output_itemset (I, occ, core_id);
|
321
|
+
if ( ERROR_MES ) return;
|
322
|
+
BLOOP (I->add.t, I->add.t, 0){
|
323
|
+
ARY_INS (I->itemset, I->add.v[I->add.t]);
|
324
|
+
ITEMSET_solution_iter (I, occ, core_id);
|
325
|
+
if ( ERROR_MES ) return;
|
326
|
+
I->itemset.t--;
|
327
|
+
}
|
328
|
+
I->add.t = t;
|
329
|
+
}
|
330
|
+
|
331
|
+
void ITEMSET_solution (ITEMSET *I, QUEUE *occ, int core_id){
|
332
|
+
QUEUE_ID i;
|
333
|
+
LONG s;
|
334
|
+
if ( I->itemset.t > I->ub ) return;
|
335
|
+
if ( I->flag & ITEMSET_ALL ){
|
336
|
+
if ( I->fp || I->topk.v ) ITEMSET_solution_iter (I, occ, core_id);
|
337
|
+
else {
|
338
|
+
s=1; FLOOP (i, 0, I->add.t+1){
|
339
|
+
I->sc[I->itemset.t+i] += s;
|
340
|
+
s = s*(I->add.t-i)/(i+1);
|
341
|
+
}
|
342
|
+
}
|
343
|
+
} else {
|
344
|
+
FLOOP (i, 0, I->add.t) ARY_INS (I->itemset, I->add.v[i]);
|
345
|
+
ITEMSET_output_itemset (I, occ, core_id);
|
346
|
+
I->itemset.t -= I->add.t;
|
347
|
+
}
|
348
|
+
}
|
349
|
+
|
350
|
+
/*************************************************************************/
|
351
|
+
/* ourput a rule */
|
352
|
+
/*************************************************************************/
|
353
|
+
void ITEMSET_output_rule (ITEMSET *I, QUEUE *occ, double p1, double p2, size_t item, int core_id){
|
354
|
+
FILE2 *fp = &I->multi_fp[core_id];
|
355
|
+
if ( fp->fp && !(I->topk.v) ){
|
356
|
+
FILE2_print_real (fp, p1, 4, '(');
|
357
|
+
FILE2_print_real (fp, p2, 4, ',');
|
358
|
+
FILE2_putc (fp, ')');
|
359
|
+
FILE2_print_int (fp, I->perm[item], ' ');
|
360
|
+
FILE2_puts (fp, " <= ");
|
361
|
+
}
|
362
|
+
if ( I->flag & ITEMSET_RULE ) ITEMSET_output_itemset (I, occ, core_id);
|
363
|
+
else ITEMSET_solution (I, occ, core_id);
|
364
|
+
}
|
365
|
+
/*************************************************************************/
|
366
|
+
/* check all rules for a pair of itemset and item */
|
367
|
+
/*************************************************************************/
|
368
|
+
void ITEMSET_check_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, size_t item, int core_id){
|
369
|
+
double p = w[item]/I->frq, pp, ff;
|
370
|
+
// printf ("[ratio] %f, p=%f, (%f/ %f), %d(%d) <= ", I->ratio_lb, p, w[item], I->frq, I->perm[item], I->itemflag[item]);
|
371
|
+
if ( I->itemflag[item]==1 ) return;
|
372
|
+
if ( w[item] <= -WEIGHTHUGE ) p = 0;
|
373
|
+
pp = p; ff = I->item_frq[item];
|
374
|
+
if ( I->flag & ITEMSET_RULE_SUPP ){ pp = w[item]; ff *= I->total_weight; }
|
375
|
+
|
376
|
+
if ( I->flag & (ITEMSET_RULE_FRQ+ITEMSET_RULE_INFRQ)){
|
377
|
+
if ( (I->flag & ITEMSET_RULE_FRQ) && p < I->ratio_lb ) return;
|
378
|
+
if ( (I->flag & ITEMSET_RULE_INFRQ) && p > I->ratio_ub ) return;
|
379
|
+
ITEMSET_output_rule (I, occ, pp, ff, item, core_id);
|
380
|
+
} else if ( I->flag & (ITEMSET_RULE_RFRQ+ITEMSET_RULE_RINFRQ) ){
|
381
|
+
if ( (I->flag & ITEMSET_RULE_RFRQ) && (1-p) > I->ratio_lb * (1-I->item_frq[item]) ) return;
|
382
|
+
if ( (I->flag & ITEMSET_RULE_RINFRQ) && p > I->ratio_ub * I->item_frq[item] ) return;
|
383
|
+
ITEMSET_output_rule (I, occ, pp, ff, item, core_id);
|
384
|
+
}
|
385
|
+
}
|
386
|
+
|
387
|
+
/*************************************************************************/
|
388
|
+
/* check all rules for an itemset and all items */
|
389
|
+
/*************************************************************************/
|
390
|
+
void ITEMSET_check_all_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, QUEUE *jump, WEIGHT total, int core_id){
|
391
|
+
QUEUE_ID i, t;
|
392
|
+
QUEUE_INT e, f=0, *x;
|
393
|
+
WEIGHT d = I->frq/total;
|
394
|
+
|
395
|
+
// checking out of range for itemset size and (posi/nega) frequency
|
396
|
+
if ( I->itemset.t+I->add.t < I->lb || I->itemset.t>I->ub || (!(I->flag&ITEMSET_ALL) && I->itemset.t+I->add.t>I->ub)) return;
|
397
|
+
if ( !(I->flag&ITEMSET_IGNORE_BOUND) && (I->frq < I->frq_lb || I->frq > I->frq_ub) ) return;
|
398
|
+
if ( !(I->flag&ITEMSET_IGNORE_BOUND) && (I->pfrq < I->posi_lb || I->pfrq > I->posi_ub || (I->frq - I->pfrq) > I->nega_ub || (I->frq - I->pfrq) < I->nega_lb) ) return;
|
399
|
+
|
400
|
+
if ( I->flag&ITEMSET_SET_RULE ){ // itemset->itemset rule for sequence mining
|
401
|
+
FLOOP (i, 0, I->itemset.t-1){
|
402
|
+
if ( I->frq/I->set_weight[i] >= I->setrule_lb && I->fp ){
|
403
|
+
I->sc[i]++;
|
404
|
+
if ( I->flag&ITEMSET_PRE_FREQ ) ITEMSET_output_frequency (I, core_id);
|
405
|
+
FLOOP (t, 0, I->itemset.t){
|
406
|
+
FILE2_print_int (&I->multi_fp[core_id], I->itemset.v[t], t?' ':0);
|
407
|
+
if ( t == i ){
|
408
|
+
FILE2_putc (&I->multi_fp[core_id], ' ');
|
409
|
+
FILE2_putc (&I->multi_fp[core_id], '=');
|
410
|
+
FILE2_putc (&I->multi_fp[core_id], '>');
|
411
|
+
}
|
412
|
+
}
|
413
|
+
if ( !(I->flag&ITEMSET_PRE_FREQ) ) ITEMSET_output_frequency ( I, core_id);
|
414
|
+
FILE2_putc (&I->multi_fp[core_id], ' ');
|
415
|
+
FILE2_print_real (&I->multi_fp[core_id], I->frq/I->set_weight[i], 4, '(');
|
416
|
+
FILE2_putc (&I->multi_fp[core_id], ')');
|
417
|
+
FILE2_putc (&I->multi_fp[core_id], '\n');
|
418
|
+
#ifdef _trsact_h_
|
419
|
+
if ( I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT) )
|
420
|
+
ITEMSET_output_occ (I, I->set_occ[i], core_id);
|
421
|
+
#endif
|
422
|
+
ITEMSET_flush (I, &I->multi_fp[core_id]);
|
423
|
+
}
|
424
|
+
}
|
425
|
+
}
|
426
|
+
// constraint of relational frequency
|
427
|
+
if ( ((I->flag&ITEMSET_RFRQ)==0 || d >= I->prob_lb * I->prob )
|
428
|
+
&& ((I->flag&ITEMSET_RINFRQ)==0 || d <= I->prob * I->prob_ub) ){
|
429
|
+
if ( I->flag&ITEMSET_RULE ){ // rule mining routines
|
430
|
+
if ( I->itemset.t == 0 ) return;
|
431
|
+
if ( I->target < I->item_max ){
|
432
|
+
ITEMSET_check_rule (I, w, occ, I->target, core_id); if (ERROR_MES) return;
|
433
|
+
} else {
|
434
|
+
if ( I->flag & (ITEMSET_RULE_FRQ + ITEMSET_RULE_RFRQ) ){
|
435
|
+
if ( I->add.t>0 ){
|
436
|
+
// if ( I->itemflag[I->add.v[0]] ) // for POSI_EQUISUPP (occ_w[e] may not be 100%, in the case)
|
437
|
+
f = I->add.v[I->add.t-1]; t = I->add.t; I->add.t--;
|
438
|
+
FLOOP (i, 0, t){
|
439
|
+
e = I->add.v[i];
|
440
|
+
I->add.v[i] = f;
|
441
|
+
ITEMSET_check_rule (I, w, occ, e, core_id); if (ERROR_MES) return;
|
442
|
+
I->add.v[i] = e;
|
443
|
+
}
|
444
|
+
I->add.t++;
|
445
|
+
}
|
446
|
+
MQUE_FLOOP (*jump, x)
|
447
|
+
ITEMSET_check_rule (I, w, occ, *x, core_id); if (ERROR_MES) return;
|
448
|
+
} else {
|
449
|
+
if ( I->flag & (ITEMSET_RULE_INFRQ + ITEMSET_RULE_RINFRQ) ){
|
450
|
+
// ARY_FLOOP ( *jump, i, e ) I->itemflag[e]--;
|
451
|
+
FLOOP (i, 0, I->item_max){
|
452
|
+
if ( I->itemflag[i] != 1 ){
|
453
|
+
ITEMSET_check_rule (I, w, occ, i, core_id); if (ERROR_MES) return;
|
454
|
+
}
|
455
|
+
}
|
456
|
+
// ARY_FLOOP ( *jump, i, e ) I->itemflag[e]++;
|
457
|
+
// }
|
458
|
+
// ARY_FLOOP ( *jump, i, e ) ITEMSET_check_rule (I, w, occ, e);
|
459
|
+
}
|
460
|
+
}
|
461
|
+
}
|
462
|
+
} else { // usual mining (not rule mining)
|
463
|
+
if ( I->fp && (I->flag&(ITEMSET_RFRQ+ITEMSET_RINFRQ))){
|
464
|
+
FILE2_print_real (&I->multi_fp[core_id], d, 4, '[');
|
465
|
+
FILE2_print_real (&I->multi_fp[core_id], I->prob, 4, ',');
|
466
|
+
FILE2_putc (&I->multi_fp[core_id], ']');
|
467
|
+
}
|
468
|
+
ITEMSET_solution (I, occ, core_id);
|
469
|
+
}
|
470
|
+
}
|
471
|
+
}
|
472
|
+
|
473
|
+
#endif
|