nysol-take 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/mbiclique.rb +317 -0
- data/bin/mbipolish.rb +362 -0
- data/bin/mccomp.rb +235 -0
- data/bin/mclique.rb +295 -0
- data/bin/mclique2g.rb +105 -0
- data/bin/mcliqueInfo.rb +203 -0
- data/bin/mfriends.rb +202 -0
- data/bin/mgdiff.rb +252 -0
- data/bin/mhifriend.rb +456 -0
- data/bin/mhipolish.rb +465 -0
- data/bin/mitemset.rb +168 -0
- data/bin/mpal.rb +410 -0
- data/bin/mpolishing.rb +399 -0
- data/bin/msequence.rb +165 -0
- data/bin/mtra2g.rb +476 -0
- data/bin/mtra2gc.rb +360 -0
- data/ext/grhfilrun/extconf.rb +12 -0
- data/ext/grhfilrun/grhfilrun.c +85 -0
- data/ext/grhfilrun/src/_sspc.c +358 -0
- data/ext/grhfilrun/src/aheap.c +545 -0
- data/ext/grhfilrun/src/aheap.h +251 -0
- data/ext/grhfilrun/src/base.c +92 -0
- data/ext/grhfilrun/src/base.h +59 -0
- data/ext/grhfilrun/src/fstar.c +497 -0
- data/ext/grhfilrun/src/fstar.h +80 -0
- data/ext/grhfilrun/src/grhfil.c +214 -0
- data/ext/grhfilrun/src/itemset.c +713 -0
- data/ext/grhfilrun/src/itemset.h +170 -0
- data/ext/grhfilrun/src/problem.c +415 -0
- data/ext/grhfilrun/src/problem.h +179 -0
- data/ext/grhfilrun/src/queue.c +533 -0
- data/ext/grhfilrun/src/queue.h +182 -0
- data/ext/grhfilrun/src/sample.c +19 -0
- data/ext/grhfilrun/src/sspc.c +597 -0
- data/ext/grhfilrun/src/sspc2.c +491 -0
- data/ext/grhfilrun/src/stdlib2.c +1482 -0
- data/ext/grhfilrun/src/stdlib2.h +892 -0
- data/ext/grhfilrun/src/trsact.c +817 -0
- data/ext/grhfilrun/src/trsact.h +160 -0
- data/ext/grhfilrun/src/vec.c +745 -0
- data/ext/grhfilrun/src/vec.h +172 -0
- data/ext/lcmrun/extconf.rb +20 -0
- data/ext/lcmrun/lcmrun.cpp +99 -0
- data/ext/lcmrun/src/aheap.c +216 -0
- data/ext/lcmrun/src/aheap.h +111 -0
- data/ext/lcmrun/src/base.c +92 -0
- data/ext/lcmrun/src/base.h +59 -0
- data/ext/lcmrun/src/itemset.c +496 -0
- data/ext/lcmrun/src/itemset.h +157 -0
- data/ext/lcmrun/src/lcm.c +427 -0
- data/ext/lcmrun/src/problem.c +349 -0
- data/ext/lcmrun/src/problem.h +177 -0
- data/ext/lcmrun/src/queue.c +528 -0
- data/ext/lcmrun/src/queue.h +176 -0
- data/ext/lcmrun/src/sgraph.c +359 -0
- data/ext/lcmrun/src/sgraph.h +173 -0
- data/ext/lcmrun/src/stdlib2.c +1282 -0
- data/ext/lcmrun/src/stdlib2.h +823 -0
- data/ext/lcmrun/src/trsact.c +747 -0
- data/ext/lcmrun/src/trsact.h +159 -0
- data/ext/lcmrun/src/vec.c +731 -0
- data/ext/lcmrun/src/vec.h +171 -0
- data/ext/lcmseq0run/extconf.rb +20 -0
- data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
- data/ext/lcmseq0run/src/aheap.c +216 -0
- data/ext/lcmseq0run/src/aheap.h +111 -0
- data/ext/lcmseq0run/src/base.c +92 -0
- data/ext/lcmseq0run/src/base.h +59 -0
- data/ext/lcmseq0run/src/itemset.c +518 -0
- data/ext/lcmseq0run/src/itemset.h +157 -0
- data/ext/lcmseq0run/src/itemset_zero.c +522 -0
- data/ext/lcmseq0run/src/lcm_seq.c +446 -0
- data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
- data/ext/lcmseq0run/src/problem.c +439 -0
- data/ext/lcmseq0run/src/problem.h +179 -0
- data/ext/lcmseq0run/src/problem_zero.c +439 -0
- data/ext/lcmseq0run/src/queue.c +533 -0
- data/ext/lcmseq0run/src/queue.h +182 -0
- data/ext/lcmseq0run/src/stdlib2.c +1350 -0
- data/ext/lcmseq0run/src/stdlib2.h +864 -0
- data/ext/lcmseq0run/src/trsact.c +747 -0
- data/ext/lcmseq0run/src/trsact.h +159 -0
- data/ext/lcmseq0run/src/vec.c +779 -0
- data/ext/lcmseq0run/src/vec.h +172 -0
- data/ext/lcmseqrun/extconf.rb +20 -0
- data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
- data/ext/lcmseqrun/src/aheap.c +216 -0
- data/ext/lcmseqrun/src/aheap.h +111 -0
- data/ext/lcmseqrun/src/base.c +92 -0
- data/ext/lcmseqrun/src/base.h +59 -0
- data/ext/lcmseqrun/src/itemset.c +518 -0
- data/ext/lcmseqrun/src/itemset.h +157 -0
- data/ext/lcmseqrun/src/itemset_zero.c +522 -0
- data/ext/lcmseqrun/src/lcm_seq.c +447 -0
- data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
- data/ext/lcmseqrun/src/problem.c +439 -0
- data/ext/lcmseqrun/src/problem.h +179 -0
- data/ext/lcmseqrun/src/problem_zero.c +439 -0
- data/ext/lcmseqrun/src/queue.c +533 -0
- data/ext/lcmseqrun/src/queue.h +182 -0
- data/ext/lcmseqrun/src/stdlib2.c +1350 -0
- data/ext/lcmseqrun/src/stdlib2.h +864 -0
- data/ext/lcmseqrun/src/trsact.c +747 -0
- data/ext/lcmseqrun/src/trsact.h +159 -0
- data/ext/lcmseqrun/src/vec.c +779 -0
- data/ext/lcmseqrun/src/vec.h +172 -0
- data/ext/lcmtransrun/extconf.rb +18 -0
- data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
- data/ext/macerun/extconf.rb +20 -0
- data/ext/macerun/macerun.cpp +57 -0
- data/ext/macerun/src/aheap.c +217 -0
- data/ext/macerun/src/aheap.h +112 -0
- data/ext/macerun/src/itemset.c +491 -0
- data/ext/macerun/src/itemset.h +158 -0
- data/ext/macerun/src/mace.c +503 -0
- data/ext/macerun/src/problem.c +346 -0
- data/ext/macerun/src/problem.h +174 -0
- data/ext/macerun/src/queue.c +529 -0
- data/ext/macerun/src/queue.h +177 -0
- data/ext/macerun/src/sgraph.c +360 -0
- data/ext/macerun/src/sgraph.h +174 -0
- data/ext/macerun/src/stdlib2.c +993 -0
- data/ext/macerun/src/stdlib2.h +811 -0
- data/ext/macerun/src/vec.c +634 -0
- data/ext/macerun/src/vec.h +170 -0
- data/ext/sspcrun/extconf.rb +20 -0
- data/ext/sspcrun/src/_sspc.c +358 -0
- data/ext/sspcrun/src/aheap.c +545 -0
- data/ext/sspcrun/src/aheap.h +251 -0
- data/ext/sspcrun/src/base.c +92 -0
- data/ext/sspcrun/src/base.h +59 -0
- data/ext/sspcrun/src/fstar.c +496 -0
- data/ext/sspcrun/src/fstar.h +80 -0
- data/ext/sspcrun/src/grhfil.c +213 -0
- data/ext/sspcrun/src/itemset.c +713 -0
- data/ext/sspcrun/src/itemset.h +170 -0
- data/ext/sspcrun/src/problem.c +415 -0
- data/ext/sspcrun/src/problem.h +179 -0
- data/ext/sspcrun/src/queue.c +533 -0
- data/ext/sspcrun/src/queue.h +182 -0
- data/ext/sspcrun/src/sample.c +19 -0
- data/ext/sspcrun/src/sspc.c +598 -0
- data/ext/sspcrun/src/sspc2.c +491 -0
- data/ext/sspcrun/src/stdlib2.c +1482 -0
- data/ext/sspcrun/src/stdlib2.h +892 -0
- data/ext/sspcrun/src/trsact.c +817 -0
- data/ext/sspcrun/src/trsact.h +160 -0
- data/ext/sspcrun/src/vec.c +745 -0
- data/ext/sspcrun/src/vec.h +172 -0
- data/ext/sspcrun/sspcrun.cpp +54 -0
- data/lib/nysol/enumLcmEp.rb +338 -0
- data/lib/nysol/enumLcmEsp.rb +284 -0
- data/lib/nysol/enumLcmIs.rb +275 -0
- data/lib/nysol/enumLcmSeq.rb +143 -0
- data/lib/nysol/items.rb +201 -0
- data/lib/nysol/seqDB.rb +256 -0
- data/lib/nysol/take.rb +39 -0
- data/lib/nysol/taxonomy.rb +113 -0
- data/lib/nysol/traDB.rb +257 -0
- metadata +239 -0
@@ -0,0 +1,157 @@
|
|
1
|
+
/* itemset search input/output common routines
|
2
|
+
25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
|
3
|
+
homepage: http://research.nii.ac.jp/~uno/index.html */
|
4
|
+
/* This program is available for only academic use, basically.
|
5
|
+
Anyone can modify this program, but he/she has to write down
|
6
|
+
the change of the modification on the top of the source code.
|
7
|
+
Neither contact nor appointment to Takeaki Uno is needed.
|
8
|
+
If one wants to re-distribute this code, please
|
9
|
+
refer the newest code, and show the link to homepage of
|
10
|
+
Takeaki Uno, to notify the news about the codes for the users. */
|
11
|
+
|
12
|
+
/* routines for itemset mining */
|
13
|
+
|
14
|
+
#ifndef _itemset_h_
|
15
|
+
#define _itemset_h_
|
16
|
+
|
17
|
+
#include"stdlib2.h"
|
18
|
+
#include"queue.h"
|
19
|
+
#define AHEAP_KEY_WEIGHT
|
20
|
+
#include"aheap.h"
|
21
|
+
|
22
|
+
|
23
|
+
typedef struct {
|
24
|
+
int a;
|
25
|
+
QUEUE itemset; // current operating itemset
|
26
|
+
QUEUE add; // for equisupport (hypercube decomposition)
|
27
|
+
int ub, lb; // upper/lower bounds for the itemset size
|
28
|
+
WEIGHT frq, pfrq, frq_ub, frq_lb; // upper/lower bounds for the frequency
|
29
|
+
WEIGHT rposi_lb, rposi_ub, posi_lb, posi_ub, nega_ub, nega_lb; // upper/lower bounds for the sum of positive/negative weights
|
30
|
+
WEIGHT setrule_lb; // frequency lower bound for set rule
|
31
|
+
double ratio, prob; // confidence and independent probability of the current pattern
|
32
|
+
double ratio_ub, ratio_lb, prob_ub, prob_lb; // upper/lower bounds for confidence and independent probability
|
33
|
+
QUEUE_INT target; // target item for rule mining
|
34
|
+
char *itemflag; // 1 if it is include in the pattern (and 2 if included in add)
|
35
|
+
WEIGHT *item_frq; // frequency of each item
|
36
|
+
WEIGHT total_weight; // total weight of the input database
|
37
|
+
int len_ub, len_lb; // upper/lower bounds for the length of the pattern
|
38
|
+
int gap_ub, gap_lb; // upper/lower bounds for the gaps in the pattern
|
39
|
+
LONG *sc; // #itemsets classified by the sizes
|
40
|
+
QUEUE_INT item_max, item_max_org; // (original) maximum item
|
41
|
+
AHEAP topk; // heap for topk mining. valid if topk->h is not NULL
|
42
|
+
int flag; // flag for various functions
|
43
|
+
PERM *perm; // permutation array for output itemset: item => original item
|
44
|
+
FILE *fp; // file pointer to the output file
|
45
|
+
char separator; // separator of items output
|
46
|
+
int progress;
|
47
|
+
LONG iters, iters2, iters3; //iterations
|
48
|
+
LONG solutions, solutions2; // number of solutions output
|
49
|
+
LONG outputs, outputs2; // #calls of ITEMSET_output_itemset or ITEMSET_solusion
|
50
|
+
LONG max_solutions; // maximum solutions to be output
|
51
|
+
void *X; // pointer to the original data
|
52
|
+
int dir; // direction flag for AGRAPH & SGRAPH
|
53
|
+
|
54
|
+
int multi_core; // number of processors
|
55
|
+
LONG *multi_iters, *multi_iters2, *multi_iters3; //iterations
|
56
|
+
LONG *multi_solutions, *multi_solutions2; // number of solutions output
|
57
|
+
LONG *multi_outputs, *multi_outputs2; // #calls of ITEMSET_output_itemset or ITEMSET_solusion
|
58
|
+
FILE2 *multi_fp; // output file2 pointer for multi-core mode
|
59
|
+
WEIGHT *set_weight; // the frequency of each prefix of current itemset
|
60
|
+
QUEUE **set_occ; // the occurrence of each prefix of current itemset
|
61
|
+
|
62
|
+
#ifdef MULTI_CORE
|
63
|
+
pthread_spinlock_t lock_counter; // couneter locker for jump counter
|
64
|
+
pthread_spinlock_t lock_sc; // couneter locker for score counter
|
65
|
+
pthread_spinlock_t lock_output; // couneter locker for #output
|
66
|
+
#endif
|
67
|
+
} ITEMSET;
|
68
|
+
|
69
|
+
/* parameters for ITEMSET.flag */
|
70
|
+
|
71
|
+
#define ITEMSET_ITERS2 4 // output #iters2
|
72
|
+
#define ITEMSET_PRE_FREQ 8 // output frequency preceding to each itemset
|
73
|
+
#define ITEMSET_FREQ 16 // output frequency following to each itemset
|
74
|
+
#define ITEMSET_ALL 32 // concat all combinations of "add" to each itemset
|
75
|
+
|
76
|
+
#define ITEMSET_TRSACT_ID 64 // output transaction ID's in occurrences
|
77
|
+
#define ITEMSET_OUTPUT_EDGE 128 // output itemset as edge set (refer AGRAPH)
|
78
|
+
#define ITEMSET_IGNORE_BOUND 256 // ignore constraint for frequency
|
79
|
+
#define ITEMSET_RM_DUP_TRSACT 512 // remove duplicated transaction ID's
|
80
|
+
#define ITEMSET_MULTI_OCC_PRINT 1024 //print each component of occ
|
81
|
+
// TRSACT_ID+MULTI_OCC_PRINT means print first two components of occ
|
82
|
+
#define ITEMSET_NOT_ITEMSET 2048 // do not print itemset to the output file
|
83
|
+
#define ITEMSET_RULE_SUPP 4096 // output confidence and item frquency by abusolute value
|
84
|
+
#define ITEMSET_OUTPUT_POSINEGA 8192 // output negative/positive frequencies
|
85
|
+
#define ITEMSET_MULTI_OUTPUT 16384 // for multi-core mode
|
86
|
+
#define ITEMSET_USE_ORG 32768 // use item_max_org to the size of use
|
87
|
+
#define ITEMSET_ITEMFRQ 65536 // allocate item_frq
|
88
|
+
#define ITEMSET_ADD 131072 // allocate add
|
89
|
+
|
90
|
+
#define ITEMSET_RULE_FRQ 262144
|
91
|
+
#define ITEMSET_RULE_INFRQ 524288
|
92
|
+
#define ITEMSET_RULE_RFRQ 1048576
|
93
|
+
#define ITEMSET_RULE_RINFRQ 2097152
|
94
|
+
#define ITEMSET_RFRQ 4194304
|
95
|
+
#define ITEMSET_RINFRQ 8388608
|
96
|
+
#define ITEMSET_POSI_RATIO 16777216
|
97
|
+
#define ITEMSET_SET_RULE 134217728
|
98
|
+
|
99
|
+
#define ITEMSET_APPEND 268435456 // append the output to the fiile
|
100
|
+
#define ITEMSET_RULE_ADD 536870912 // append items in add to the solution, for rule output
|
101
|
+
|
102
|
+
//#define ITEMSET_RULE (ITEMSET_RULE_FRQ + ITEMSET_RULE_INFRQ + ITEMSET_RULE_RFRQ + ITEMSET_RULE_RINFRQ + ITEMSET_RFRQ + ITEMSET_RINFRQ + ITEMSET_SET_RULE) // for check any rule is true
|
103
|
+
#define ITEMSET_RULE (ITEMSET_RULE_FRQ + ITEMSET_RULE_INFRQ + ITEMSET_RULE_RFRQ + ITEMSET_RULE_RINFRQ + ITEMSET_SET_RULE) // for check any rule is true
|
104
|
+
|
105
|
+
#ifndef ITEMSET_INTERVAL
|
106
|
+
#define ITEMSET_INTERVAL 500000
|
107
|
+
#endif
|
108
|
+
|
109
|
+
/* Output information about ITEMSET structure. flag&1: print frequency constraint */
|
110
|
+
void ITEMSET_print (ITEMSET *II, int flag);
|
111
|
+
|
112
|
+
/* topk.end>0 => initialize heap for topk mining */
|
113
|
+
/* all pointers will be set to 0, but not for */
|
114
|
+
/* if topK mining, set topk.end to "K" */
|
115
|
+
void ITEMSET_init (ITEMSET *I);
|
116
|
+
void ITEMSET_alloc (ITEMSET *I, char *fname, PERM *perm, QUEUE_INT item_max, size_t item_max_org);
|
117
|
+
void ITEMSET_end (ITEMSET *I);
|
118
|
+
|
119
|
+
/* sum the counters computed by each thread */
|
120
|
+
void ITEMSET_merge_counters (ITEMSET *I);
|
121
|
+
|
122
|
+
/*******************************************************************/
|
123
|
+
/* output at the termination of the algorithm */
|
124
|
+
/* print #of itemsets of size k, for each k */
|
125
|
+
/*******************************************************************/
|
126
|
+
void ITEMSET_last_output (ITEMSET *I);
|
127
|
+
|
128
|
+
/* output frequency, coverage */
|
129
|
+
void ITEMSET_output_frequency (ITEMSET *I, int core_id);
|
130
|
+
|
131
|
+
/* output an itemset to the output file */
|
132
|
+
void ITEMSET_output_itemset (ITEMSET *I, QUEUE *occ, int core_id);
|
133
|
+
|
134
|
+
/* output itemsets with adding all combination of "add"
|
135
|
+
at the first call, i has to be "add->t" */
|
136
|
+
void ITEMSET_solution (ITEMSET *I, QUEUE *occ, int core_id);
|
137
|
+
|
138
|
+
/*************************************************************************/
|
139
|
+
/* ourput a rule */
|
140
|
+
/*************************************************************************/
|
141
|
+
void ITEMSET_output_rule (ITEMSET *I, QUEUE *occ, double p1, double p2, size_t item, int core_id);
|
142
|
+
|
143
|
+
/*************************************************************************/
|
144
|
+
/* check all rules for a pair of itemset and item */
|
145
|
+
/*************************************************************************/
|
146
|
+
void ITEMSET_check_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, size_t item, int core_id);
|
147
|
+
|
148
|
+
/*************************************************************************/
|
149
|
+
/* check all rules for an itemset and all items */
|
150
|
+
/*************************************************************************/
|
151
|
+
void ITEMSET_check_all_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, QUEUE *jump, WEIGHT total, int core_id);
|
152
|
+
|
153
|
+
#endif
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
|
@@ -0,0 +1,522 @@
|
|
1
|
+
/* itemset search input/output common routines
|
2
|
+
25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
|
3
|
+
homepage: http://research.nii.ac.jp/~uno/index.html */
|
4
|
+
/* This program is available for only academic use, basically.
|
5
|
+
Anyone can modify this program, but he/she has to write down
|
6
|
+
the change of the modification on the top of the source code.
|
7
|
+
Neither contact nor appointment to Takeaki Uno is needed.
|
8
|
+
If one wants to re-distribute this code, please
|
9
|
+
refer the newest code, and show the link to homepage of
|
10
|
+
Takeaki Uno, to notify the news about the codes for the users. */
|
11
|
+
|
12
|
+
/* routines for itemset mining */
|
13
|
+
|
14
|
+
#ifndef _itemset_c_
|
15
|
+
#define _itemset_c_
|
16
|
+
|
17
|
+
#include"itemset.h"
|
18
|
+
#include"queue.c"
|
19
|
+
#include"aheap.c"
|
20
|
+
|
21
|
+
/* flush the write buffer, available for multi-core mode */
|
22
|
+
void ITEMSET_flush (ITEMSET *I, FILE2 *fp){
|
23
|
+
if ( !(I->flag&ITEMSET_MULTI_OUTPUT) || (fp->buf-fp->buf_org) > FILE2_BUFSIZ/2 ){
|
24
|
+
SPIN_LOCK(I->multi_core, I->lock_output);
|
25
|
+
FILE2_flush (fp);
|
26
|
+
SPIN_UNLOCK(I->multi_core, I->lock_output);
|
27
|
+
}
|
28
|
+
}
|
29
|
+
|
30
|
+
/* Output information about ITEMSET structure. flag&1: print frequency constraint */
|
31
|
+
void ITEMSET_print (ITEMSET *I, int flag){
|
32
|
+
if ( I->lb>0 || I->ub<INTHUGE ){
|
33
|
+
if ( I->lb > 0 ) print_err ("%d <= ", I->lb);
|
34
|
+
print_err ("itemsets ");
|
35
|
+
if ( I->ub < INTHUGE ) print_err (" <= %d\n", I->ub);
|
36
|
+
print_err ("\n");
|
37
|
+
}
|
38
|
+
if ( flag&1 ){
|
39
|
+
if ( I->frq_lb > -WEIGHTHUGE ) print_err (WEIGHTF" <=", I->frq_lb);
|
40
|
+
print_err (" frequency ");
|
41
|
+
if ( I->frq_ub < WEIGHTHUGE ) print_err (" <="WEIGHTF, I->frq_ub);
|
42
|
+
print_err ("\n");
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
/* ITEMSET initialization */
|
47
|
+
void ITEMSET_init (ITEMSET *I){
|
48
|
+
I->flag = 0;
|
49
|
+
I->progress = 0;
|
50
|
+
I->iters = I->iters2 = I->iters3 = 0;
|
51
|
+
I->solutions = I->solutions2 = I->max_solutions = I->outputs = I->outputs2 = 0;
|
52
|
+
I->topk.end = 0;
|
53
|
+
I->item_max = I->item_max_org = 0;
|
54
|
+
I->ub = I->len_ub = I->gap_ub = INTHUGE;
|
55
|
+
I->lb = I->len_lb = I->gap_lb = 0;
|
56
|
+
I->frq = I->pfrq = I->total_weight = 0;
|
57
|
+
I->ratio = I->prob = 0.0;
|
58
|
+
I->posi_ub = I->nega_ub = I->frq_ub = WEIGHTHUGE;
|
59
|
+
I->posi_lb = I->nega_lb = I->frq_lb = I->setrule_lb = -WEIGHTHUGE;
|
60
|
+
I->dir = 0;
|
61
|
+
I->target = INTHUGE;
|
62
|
+
I->prob_ub = I->ratio_ub = I->rposi_ub = 1;
|
63
|
+
I->prob_lb = I->ratio_lb = I->rposi_lb = 0;
|
64
|
+
I->itemflag = NULL;
|
65
|
+
I->perm = NULL;
|
66
|
+
I->item_frq = NULL;
|
67
|
+
I->sc = NULL;
|
68
|
+
I->X = NULL;
|
69
|
+
I->fp = NULL;
|
70
|
+
I->separator = ' ';
|
71
|
+
I->topk = INIT_AHEAP;
|
72
|
+
I->itemset = I->add = INIT_QUEUE;
|
73
|
+
I->set_weight = NULL;
|
74
|
+
I->set_occ = NULL;
|
75
|
+
|
76
|
+
I->multi_iters = I->multi_iters2 = I->multi_iters3 = NULL;
|
77
|
+
I->multi_outputs = I->multi_outputs2 = NULL;
|
78
|
+
I->multi_solutions = I->multi_solutions2 = NULL;
|
79
|
+
I->multi_fp = NULL;
|
80
|
+
|
81
|
+
I->multi_core = 0;
|
82
|
+
}
|
83
|
+
|
84
|
+
|
85
|
+
/* second initialization
|
86
|
+
topk.end>0 => initialize heap for topk mining */
|
87
|
+
/* all pointers will be set to 0, but not for */
|
88
|
+
/* if topK mining, set topk.end to "K" */
|
89
|
+
void ITEMSET_alloc (ITEMSET *I, char *fname, PERM *perm, QUEUE_INT item_max, size_t item_max_org){
|
90
|
+
LONG i;
|
91
|
+
size_t siz = (I->flag&ITEMSET_USE_ORG)?item_max_org+2: item_max+2;
|
92
|
+
int j;
|
93
|
+
|
94
|
+
I->prob = I->ratio = 1.0;
|
95
|
+
I->frq = 0;
|
96
|
+
I->perm = perm;
|
97
|
+
if ( I->topk.end>0 ){
|
98
|
+
AHEAP_alloc (&I->topk, I->topk.end);
|
99
|
+
FLOOP (i, 0, I->topk.end) AHEAP_chg (&I->topk, (AHEAP_ID)i, -WEIGHTHUGE);
|
100
|
+
I->frq_lb = -WEIGHTHUGE;
|
101
|
+
} else I->topk.v = NULL;
|
102
|
+
QUEUE_alloc (&I->itemset, (QUEUE_ID)siz); I->itemset.end = (QUEUE_ID)siz;
|
103
|
+
if ( I->flag&ITEMSET_ADD ) QUEUE_alloc (&I->add, (QUEUE_ID)siz);
|
104
|
+
calloc2 (I->sc, siz+2, goto ERR);
|
105
|
+
if ( I->flag&ITEMSET_SET_RULE ){
|
106
|
+
calloc2 (I->set_weight, siz, goto ERR);
|
107
|
+
if ( I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT) )
|
108
|
+
calloc2 (I->set_occ, siz, goto ERR);
|
109
|
+
}
|
110
|
+
I->iters = I->iters2 = I->solutions = 0;
|
111
|
+
I->item_max = item_max;
|
112
|
+
I->item_max_org = (QUEUE_INT)item_max_org;
|
113
|
+
if ( fname ){
|
114
|
+
#ifdef _FILE2_LOAD_FROM_MEMORY_
|
115
|
+
I->fp = (FILE *)1;
|
116
|
+
#else
|
117
|
+
if ( strcmp (fname, "-") == 0 ) I->fp = stdout;
|
118
|
+
else fopen2 (I->fp, fname, (I->flag&ITEMSET_APPEND)?"a":"w", goto ERR);
|
119
|
+
#endif
|
120
|
+
} else I->fp = 0;
|
121
|
+
if ( I->flag&ITEMSET_ITEMFRQ )
|
122
|
+
malloc2 (I->item_frq, item_max+2, goto ERR);
|
123
|
+
if ( I->flag&ITEMSET_RULE ){
|
124
|
+
calloc2 (I->itemflag, item_max+2, goto ERR);
|
125
|
+
}
|
126
|
+
I->total_weight = 1;
|
127
|
+
j = MAX(I->multi_core,1);
|
128
|
+
calloc2 (I->multi_iters, j*7, goto ERR);
|
129
|
+
I->multi_iters2 = I->multi_iters + j;
|
130
|
+
I->multi_iters3 = I->multi_iters2 + j;
|
131
|
+
I->multi_outputs = I->multi_iters3 + j;
|
132
|
+
I->multi_outputs2 = I->multi_outputs + j;
|
133
|
+
I->multi_solutions = I->multi_outputs2 + j;
|
134
|
+
I->multi_solutions2 = I->multi_solutions + j;
|
135
|
+
|
136
|
+
calloc2 (I->multi_fp, j, goto ERR);
|
137
|
+
FLOOP (i, 0, j)
|
138
|
+
FILE2_open_ (I->multi_fp[i], I->fp, goto ERR);
|
139
|
+
#ifdef MULTI_CORE
|
140
|
+
if ( I->multi_core > 0 ){
|
141
|
+
pthread_spin_init (&I->lock_counter, PTHREAD_PROCESS_PRIVATE);
|
142
|
+
pthread_spin_init (&I->lock_sc, PTHREAD_PROCESS_PRIVATE);
|
143
|
+
pthread_spin_init (&I->lock_output, PTHREAD_PROCESS_PRIVATE);
|
144
|
+
}
|
145
|
+
#endif
|
146
|
+
return;
|
147
|
+
ERR:;
|
148
|
+
ITEMSET_end (I);
|
149
|
+
EXIT;
|
150
|
+
}
|
151
|
+
|
152
|
+
/* sum the counters computed by each thread */
|
153
|
+
void ITEMSET_merge_counters (ITEMSET *I){
|
154
|
+
int i;
|
155
|
+
FLOOP (i, 0, MAX(I->multi_core,1)){
|
156
|
+
I->iters += I->multi_iters[i];
|
157
|
+
I->iters2 += I->multi_iters2[i];
|
158
|
+
I->iters3 += I->multi_iters3[i];
|
159
|
+
I->outputs += I->multi_outputs[i];
|
160
|
+
I->outputs2 += I->multi_outputs2[i];
|
161
|
+
I->solutions += I->multi_solutions[i];
|
162
|
+
I->solutions2 += I->multi_solutions2[i];
|
163
|
+
if ( I->multi_fp[i].buf ) FILE2_flush_last (&I->multi_fp[i]);
|
164
|
+
}
|
165
|
+
ARY_FILL (I->multi_iters, 0, MAX(I->multi_core,1)*7, 0);
|
166
|
+
}
|
167
|
+
|
168
|
+
/*******************************************************************/
|
169
|
+
/* termination of ITEMSET */
|
170
|
+
/*******************************************************************/
|
171
|
+
void ITEMSET_end (ITEMSET *I){
|
172
|
+
int i;
|
173
|
+
QUEUE_end (&I->itemset);
|
174
|
+
QUEUE_end (&I->add);
|
175
|
+
AHEAP_end (&I->topk);
|
176
|
+
#ifndef _FILE2_LOAD_FROM_MEMORY_
|
177
|
+
fclose2 (I->fp);
|
178
|
+
#endif
|
179
|
+
mfree (I->sc, I->item_frq, I->itemflag, I->perm, I->set_weight, I->set_occ);
|
180
|
+
|
181
|
+
if ( I->multi_fp )
|
182
|
+
FLOOP (i, 0, MAX(I->multi_core,1)) free2 (I->multi_fp[i].buf_org);
|
183
|
+
mfree (I->multi_iters, I->multi_fp);
|
184
|
+
#ifdef MULTI_CORE
|
185
|
+
if ( I->multi_core>0 ){
|
186
|
+
pthread_spin_destroy(&I->lock_counter);
|
187
|
+
pthread_spin_destroy(&I->lock_sc);
|
188
|
+
pthread_spin_destroy(&I->lock_output);
|
189
|
+
}
|
190
|
+
#endif
|
191
|
+
ITEMSET_init (I);
|
192
|
+
}
|
193
|
+
|
194
|
+
/*******************************************************************/
|
195
|
+
/* output at the termination of the algorithm */
|
196
|
+
/* print #of itemsets of size k, for each k */
|
197
|
+
/*******************************************************************/
|
198
|
+
void ITEMSET_last_output (ITEMSET *I){
|
199
|
+
QUEUE_ID i;
|
200
|
+
LONG n=0, nn=0;
|
201
|
+
|
202
|
+
ITEMSET_merge_counters (I);
|
203
|
+
if ( !(I->flag&SHOW_MESSAGE) ) return; // "no message" is specified
|
204
|
+
if ( I->topk.end > 0 ){
|
205
|
+
i = AHEAP_findmin_head (&I->topk);
|
206
|
+
fprint_WEIGHT (stdout, AHEAP_H (I->topk, i));
|
207
|
+
printf ("\n");
|
208
|
+
return;
|
209
|
+
}
|
210
|
+
FLOOP (i, 0, I->itemset.end+1){
|
211
|
+
n += I->sc[i];
|
212
|
+
if ( I->sc[i] != 0 ) nn = i;
|
213
|
+
}
|
214
|
+
if ( n!=0 ){
|
215
|
+
printf (LONGF "\n", n);
|
216
|
+
FLOOP (i, 0, nn+1) printf (LONGF "\n", I->sc[i]);
|
217
|
+
}
|
218
|
+
print_err ("iters=" LONGF, I->iters);
|
219
|
+
if ( I->flag&ITEMSET_ITERS2 ) print_err (", iters2=" LONGF, I->iters2);
|
220
|
+
print_err ("\n");
|
221
|
+
}
|
222
|
+
|
223
|
+
/* output frequency, coverage */
|
224
|
+
void ITEMSET_output_frequency (ITEMSET *I, int core_id){
|
225
|
+
FILE2 *fp = &I->multi_fp[core_id];
|
226
|
+
if ( I->flag&(ITEMSET_FREQ+ITEMSET_PRE_FREQ) ){
|
227
|
+
if ( I->flag&ITEMSET_FREQ ) FILE2_putc (fp, ' ');
|
228
|
+
FILE2_print_WEIGHT (fp, I->frq, 4, '(');
|
229
|
+
FILE2_putc (fp, ')');
|
230
|
+
if ( I->flag&ITEMSET_PRE_FREQ ) FILE2_putc (fp, ' ');
|
231
|
+
}
|
232
|
+
if ( I->flag&ITEMSET_OUTPUT_POSINEGA ){ // output positive sum, negative sum in the occurrence
|
233
|
+
FILE2_putc (fp, ' ');
|
234
|
+
FILE2_print_WEIGHT (fp, I->pfrq, 4, '(');
|
235
|
+
FILE2_print_WEIGHT (fp, I->pfrq-I->frq, 4, ',');
|
236
|
+
FILE2_print_WEIGHT (fp, I->pfrq/(2*I->pfrq-I->frq), 4, ',');
|
237
|
+
FILE2_putc (fp, ')');
|
238
|
+
}
|
239
|
+
}
|
240
|
+
|
241
|
+
#ifdef _trsact_h_
|
242
|
+
void ITEMSET_output_occ (ITEMSET *I, QUEUE *occ, int core_id){
|
243
|
+
QUEUE_ID i;
|
244
|
+
QUEUE_INT *x;
|
245
|
+
FILE2 *fp = &I->multi_fp[core_id];
|
246
|
+
TRSACT *TT = (TRSACT *)(I->X);
|
247
|
+
VEC_ID j, ee = TT->rows_org;
|
248
|
+
int flag = I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT);
|
249
|
+
|
250
|
+
i=0; MQUE_FLOOP_ (*occ, x, TT->occ_unit){
|
251
|
+
if ( (I->flag&ITEMSET_RM_DUP_TRSACT)==0 || *x != ee ){
|
252
|
+
FILE2_print_int (fp, TT->trperm? TT->trperm[*x]: *x, I->separator);
|
253
|
+
if (flag == ITEMSET_MULTI_OCC_PRINT ){
|
254
|
+
FLOOP (j, 1, (VEC_ID)(TT->occ_unit/sizeof(QUEUE_INT)))
|
255
|
+
FILE2_print_int (fp, *(x+j), I->separator);
|
256
|
+
} else if ( flag == (ITEMSET_MULTI_OCC_PRINT+ITEMSET_TRSACT_ID) ){
|
257
|
+
FILE2_print_int (fp, *(x+1), I->separator);
|
258
|
+
}
|
259
|
+
}
|
260
|
+
ee = *x;
|
261
|
+
if ( (++i)%256==0 ) ITEMSET_flush (I, fp);
|
262
|
+
}
|
263
|
+
#ifdef _FILE2_LOAD_FROM_MEMORY_
|
264
|
+
*((int *)__write_to_memory__) = INTHUGE;
|
265
|
+
__write_to_memory__ = (char *)(((int *)__write_to_memory__) + 1);
|
266
|
+
#else
|
267
|
+
FILE2_putc (fp, '\n');
|
268
|
+
#endif
|
269
|
+
}
|
270
|
+
#endif
|
271
|
+
|
272
|
+
/* output an itemset to the output file */
|
273
|
+
void ITEMSET_output_itemset (ITEMSET *I, QUEUE *occ, int core_id){
|
274
|
+
QUEUE_ID i;
|
275
|
+
QUEUE_INT e;
|
276
|
+
#ifdef _agraph_h_
|
277
|
+
QUEUE_INT ee;
|
278
|
+
#endif
|
279
|
+
|
280
|
+
FLOOP (i, 0, I->itemset.t){ // add by NYSOL
|
281
|
+
if( I->itemset.v[i]==0) return; // add by NYSOL
|
282
|
+
} // add by NYSOL
|
283
|
+
|
284
|
+
FILE2 *fp = &I->multi_fp[core_id];
|
285
|
+
|
286
|
+
I->multi_outputs[core_id]++;
|
287
|
+
if ( (I->flag&SHOW_PROGRESS ) && (I->multi_outputs[core_id]%(ITEMSET_INTERVAL) == 0) )
|
288
|
+
print_err ("---- " LONGF " solutions in " LONGF " candidates\n",
|
289
|
+
I->multi_solutions[core_id], I->multi_outputs[core_id]);
|
290
|
+
if ( I->itemset.t < I->lb || I->itemset.t > I->ub ) return;
|
291
|
+
if ( (I->flag&ITEMSET_IGNORE_BOUND)==0 && (I->frq < I->frq_lb || I->frq > I->frq_ub) ) return;
|
292
|
+
if ( (I->flag&ITEMSET_IGNORE_BOUND)==0 && (I->pfrq < I->posi_lb || I->pfrq > I->posi_ub || (I->frq - I->pfrq) > I->nega_ub || (I->frq - I->pfrq) < I->nega_lb) ) return;
|
293
|
+
|
294
|
+
I->multi_solutions[core_id]++;
|
295
|
+
if ( I->max_solutions>0 && I->multi_solutions[core_id] > I->max_solutions ){
|
296
|
+
ITEMSET_last_output (I);
|
297
|
+
ERROR_MES = "reached to maximum number of solutions";
|
298
|
+
EXIT;
|
299
|
+
}
|
300
|
+
if ( I->topk.v ){
|
301
|
+
e = AHEAP_findmin_head (&(I->topk));
|
302
|
+
if ( I->frq > AHEAP_H (I->topk, e) ){
|
303
|
+
AHEAP_chg (&(I->topk), e, I->frq);
|
304
|
+
e = AHEAP_findmin_head (&(I->topk));
|
305
|
+
I->frq_lb = AHEAP_H (I->topk, e);
|
306
|
+
}
|
307
|
+
} else if ( I->fp ){
|
308
|
+
if ( I->flag&ITEMSET_PRE_FREQ ) ITEMSET_output_frequency (I, core_id);
|
309
|
+
if ( (I->flag & ITEMSET_NOT_ITEMSET) == 0 ){
|
310
|
+
#ifdef _agraph_h_
|
311
|
+
if ( I->flag&ITEMSET_OUTPUT_EDGE ){
|
312
|
+
FLOOP (i, 0, I->itemset.t){
|
313
|
+
e = I->itemset.v[i];
|
314
|
+
ee = AGRAPH_INC_FROM(*((AGRAPH *)(I->X)), e, I->dir);
|
315
|
+
FILE2_print_int (fp, I->perm? I->perm[ee]: ee, '(' );
|
316
|
+
ee = AGRAPH_INC_TO(*((AGRAPH *)(I->X)), e, I->dir);
|
317
|
+
FILE2_print_int (fp, I->perm? I->perm[ee]: ee, I->separator);
|
318
|
+
#ifdef _FILE2_LOAD_FROM_MEMORY_
|
319
|
+
FILE2_putc (fp, ')');
|
320
|
+
#endif
|
321
|
+
if ( i<I->itemset.t-1 ) FILE2_putc (fp, I->separator);
|
322
|
+
if ( (i+1)%256==0 ) ITEMSET_flush (I, fp);
|
323
|
+
}
|
324
|
+
goto NEXT;
|
325
|
+
}
|
326
|
+
#endif
|
327
|
+
FLOOP (i, 0, I->itemset.t){
|
328
|
+
e = I->itemset.v[i];
|
329
|
+
FILE2_print_int (fp, I->perm? I->perm[e]: e, i==0? 0: I->separator);
|
330
|
+
if ( (i+1)%256==0 ) ITEMSET_flush (I, fp);
|
331
|
+
}
|
332
|
+
#ifdef _agraph_h_
|
333
|
+
NEXT:;
|
334
|
+
#endif
|
335
|
+
}
|
336
|
+
if ( !(I->flag&ITEMSET_PRE_FREQ) ) ITEMSET_output_frequency (I, core_id);
|
337
|
+
if ( ((I->flag & ITEMSET_NOT_ITEMSET) == 0) || (I->flag&ITEMSET_FREQ) || (I->flag&ITEMSET_PRE_FREQ) ){
|
338
|
+
#ifdef _FILE2_LOAD_FROM_MEMORY_
|
339
|
+
FILE2_WRITE_MEMORY (QUEUE_INT, FILE2_LOAD_FROM_MEMORY_END);
|
340
|
+
#else
|
341
|
+
FILE2_putc (fp, '\n');
|
342
|
+
#endif
|
343
|
+
}
|
344
|
+
#ifdef _trsact_h_
|
345
|
+
if (I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT)) ITEMSET_output_occ (I, occ, core_id);
|
346
|
+
#endif
|
347
|
+
}
|
348
|
+
I->sc[I->itemset.t]++;
|
349
|
+
ITEMSET_flush (I, fp);
|
350
|
+
}
|
351
|
+
|
352
|
+
/* output itemsets with adding all combination of "add"
|
353
|
+
at the first call, i has to be "add->t" */
|
354
|
+
void ITEMSET_solution_iter (ITEMSET *I, QUEUE *occ, int core_id){
|
355
|
+
QUEUE_ID t=I->add.t;
|
356
|
+
if ( I->itemset.t > I->ub ) return;
|
357
|
+
ITEMSET_output_itemset (I, occ, core_id);
|
358
|
+
if ( ERROR_MES ) return;
|
359
|
+
BLOOP (I->add.t, I->add.t, 0){
|
360
|
+
QUE_INS (I->itemset, I->add.v[I->add.t]);
|
361
|
+
ITEMSET_solution_iter (I, occ, core_id);
|
362
|
+
if ( ERROR_MES ) return;
|
363
|
+
I->itemset.t--;
|
364
|
+
}
|
365
|
+
I->add.t = t;
|
366
|
+
}
|
367
|
+
|
368
|
+
void ITEMSET_solution (ITEMSET *I, QUEUE *occ, int core_id){
|
369
|
+
QUEUE_ID i;
|
370
|
+
LONG s;
|
371
|
+
if ( I->itemset.t > I->ub ) return;
|
372
|
+
if ( I->flag & ITEMSET_ALL ){
|
373
|
+
if ( I->fp || I->topk.v ) ITEMSET_solution_iter (I, occ, core_id);
|
374
|
+
else {
|
375
|
+
s=1; FLOOP (i, 0, I->add.t+1){
|
376
|
+
I->sc[I->itemset.t+i] += s;
|
377
|
+
s = s*(I->add.t-i)/(i+1);
|
378
|
+
}
|
379
|
+
}
|
380
|
+
} else {
|
381
|
+
FLOOP (i, 0, I->add.t) QUE_INS (I->itemset, I->add.v[i]);
|
382
|
+
ITEMSET_output_itemset (I, occ, core_id);
|
383
|
+
I->itemset.t -= I->add.t;
|
384
|
+
}
|
385
|
+
}
|
386
|
+
|
387
|
+
/*************************************************************************/
|
388
|
+
/* ourput a rule */
|
389
|
+
/*************************************************************************/
|
390
|
+
void ITEMSET_output_rule (ITEMSET *I, QUEUE *occ, double p1, double p2, size_t item, int core_id){
|
391
|
+
FILE2 *fp = &I->multi_fp[core_id];
|
392
|
+
if ( fp->fp && !(I->topk.v) ){
|
393
|
+
FILE2_print_real (fp, p1, 4, '(');
|
394
|
+
FILE2_print_real (fp, p2, 4, ',');
|
395
|
+
FILE2_putc (fp, ')');
|
396
|
+
FILE2_print_int (fp, I->perm[item], I->separator);
|
397
|
+
FILE2_puts (fp, " <= ");
|
398
|
+
}
|
399
|
+
if ( I->flag & ITEMSET_RULE ){
|
400
|
+
if ( I->flag & ITEMSET_RULE_ADD ) ITEMSET_solution (I, occ, core_id);
|
401
|
+
else ITEMSET_output_itemset (I, occ, core_id);
|
402
|
+
} else ITEMSET_solution (I, occ, core_id);
|
403
|
+
}
|
404
|
+
/*************************************************************************/
|
405
|
+
/* check all rules for a pair of itemset and item */
|
406
|
+
/*************************************************************************/
|
407
|
+
void ITEMSET_check_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, size_t item, int core_id){
|
408
|
+
double p = w[item]/I->frq, pp, ff;
|
409
|
+
// printf ("[ratio] %f, p=%f, (%f/ %f), %d(%d) <= ", I->ratio_lb, p, w[item], I->frq, I->perm[item], I->itemflag[item]);
|
410
|
+
if ( I->itemflag[item]==1 ) return;
|
411
|
+
if ( w[item] <= -WEIGHTHUGE ) p = 0;
|
412
|
+
pp = p; ff = I->item_frq[item];
|
413
|
+
if ( I->flag & ITEMSET_RULE_SUPP ){ pp = w[item]; ff *= I->total_weight; }
|
414
|
+
|
415
|
+
if ( I->flag & (ITEMSET_RULE_FRQ+ITEMSET_RULE_INFRQ)){
|
416
|
+
if ( (I->flag & ITEMSET_RULE_FRQ) && p < I->ratio_lb ) return;
|
417
|
+
if ( (I->flag & ITEMSET_RULE_INFRQ) && p > I->ratio_ub ) return;
|
418
|
+
ITEMSET_output_rule (I, occ, p, ff, item, core_id);
|
419
|
+
} else if ( I->flag & (ITEMSET_RULE_RFRQ+ITEMSET_RULE_RINFRQ) ){
|
420
|
+
if ( (I->flag & ITEMSET_RULE_RFRQ) && (1-p) > I->ratio_lb * (1-I->item_frq[item]) ) return;
|
421
|
+
if ( (I->flag & ITEMSET_RULE_RINFRQ) && p > I->ratio_ub * I->item_frq[item] ) return;
|
422
|
+
ITEMSET_output_rule (I, occ, pp, ff, item, core_id);
|
423
|
+
}
|
424
|
+
}
|
425
|
+
|
426
|
+
/*************************************************************************/
|
427
|
+
/* check all rules for an itemset and all items */
|
428
|
+
/*************************************************************************/
|
429
|
+
void ITEMSET_check_all_rule (ITEMSET *I, WEIGHT *w, QUEUE *occ, QUEUE *jump, WEIGHT total, int core_id){
|
430
|
+
QUEUE_ID i, t;
|
431
|
+
QUEUE_INT e, f=0, *x;
|
432
|
+
WEIGHT d = I->frq/total;
|
433
|
+
|
434
|
+
// checking out of range for itemset size and (posi/nega) frequency
|
435
|
+
if ( I->itemset.t+I->add.t < I->lb || I->itemset.t>I->ub || (!(I->flag&ITEMSET_ALL) && I->itemset.t+I->add.t>I->ub)) return;
|
436
|
+
if ( !(I->flag&ITEMSET_IGNORE_BOUND) && (I->frq < I->frq_lb || I->frq > I->frq_ub) ) return;
|
437
|
+
if ( !(I->flag&ITEMSET_IGNORE_BOUND) && (I->pfrq < I->posi_lb || I->pfrq > I->posi_ub || (I->frq - I->pfrq) > I->nega_ub || (I->frq - I->pfrq) < I->nega_lb) ) return;
|
438
|
+
|
439
|
+
if ( I->flag&ITEMSET_SET_RULE ){ // itemset->itemset rule for sequence mining
|
440
|
+
FLOOP (i, 0, I->itemset.t-1){
|
441
|
+
if ( I->frq/I->set_weight[i] >= I->setrule_lb && I->fp ){
|
442
|
+
I->sc[i]++;
|
443
|
+
if ( I->flag&ITEMSET_PRE_FREQ ) ITEMSET_output_frequency (I, core_id);
|
444
|
+
FLOOP (t, 0, I->itemset.t){
|
445
|
+
FILE2_print_int (&I->multi_fp[core_id], I->itemset.v[t], t?I->separator:0);
|
446
|
+
if ( t == i ){
|
447
|
+
FILE2_putc (&I->multi_fp[core_id], ' ');
|
448
|
+
FILE2_putc (&I->multi_fp[core_id], '=');
|
449
|
+
FILE2_putc (&I->multi_fp[core_id], '>');
|
450
|
+
}
|
451
|
+
}
|
452
|
+
if ( !(I->flag&ITEMSET_PRE_FREQ) ) ITEMSET_output_frequency ( I, core_id);
|
453
|
+
FILE2_putc (&I->multi_fp[core_id], ' ');
|
454
|
+
FILE2_print_real (&I->multi_fp[core_id], I->frq/I->set_weight[i], 4, '(');
|
455
|
+
FILE2_putc (&I->multi_fp[core_id], ')');
|
456
|
+
#ifdef _FILE2_LOAD_FROM_MEMORY_
|
457
|
+
FILE2_WRITE_MEMORY (QUEUE_INT, FILE2_LOAD_FROM_MEMORY_END);
|
458
|
+
#else
|
459
|
+
FILE2_putc (&I->multi_fp[core_id], '\n');
|
460
|
+
#endif
|
461
|
+
#ifdef _trsact_h_
|
462
|
+
if ( I->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT) ){
|
463
|
+
ITEMSET_output_occ (I, I->set_occ[i], core_id);
|
464
|
+
}
|
465
|
+
#endif
|
466
|
+
ITEMSET_flush (I, &I->multi_fp[core_id]);
|
467
|
+
}
|
468
|
+
}
|
469
|
+
}
|
470
|
+
// constraint of relational frequency
|
471
|
+
if ( ((I->flag&ITEMSET_RFRQ)==0 || d >= I->prob_lb * I->prob )
|
472
|
+
&& ((I->flag&ITEMSET_RINFRQ)==0 || d <= I->prob * I->prob_ub) ){
|
473
|
+
if ( I->flag&ITEMSET_RULE ){ // rule mining routines
|
474
|
+
if ( I->itemset.t == 0 ) return;
|
475
|
+
if ( I->target < I->item_max ){
|
476
|
+
MQUE_FLOOP (*jump, x){
|
477
|
+
if ( *x == I->target ){
|
478
|
+
ITEMSET_check_rule (I, w, occ, *x, core_id); if (ERROR_MES) return;
|
479
|
+
}
|
480
|
+
}
|
481
|
+
// ITEMSET_check_rule (I, w, occ, I->target, core_id); if (ERROR_MES) return;
|
482
|
+
} else {
|
483
|
+
if ( I->flag & (ITEMSET_RULE_FRQ + ITEMSET_RULE_RFRQ) ){
|
484
|
+
if ( I->add.t>0 ){
|
485
|
+
// if ( I->itemflag[I->add.v[0]] ) // for POSI_EQUISUPP (occ_w[e] may not be 100%, in the case)
|
486
|
+
f = I->add.v[I->add.t-1]; t = I->add.t; I->add.t--;
|
487
|
+
FLOOP (i, 0, t){
|
488
|
+
e = I->add.v[i];
|
489
|
+
I->add.v[i] = f;
|
490
|
+
ITEMSET_check_rule (I, w, occ, e, core_id); if (ERROR_MES) return;
|
491
|
+
I->add.v[i] = e;
|
492
|
+
}
|
493
|
+
I->add.t++;
|
494
|
+
}
|
495
|
+
MQUE_FLOOP (*jump, x)
|
496
|
+
ITEMSET_check_rule (I, w, occ, *x, core_id); if (ERROR_MES) return;
|
497
|
+
} else {
|
498
|
+
if ( I->flag & (ITEMSET_RULE_INFRQ + ITEMSET_RULE_RINFRQ) ){
|
499
|
+
// ARY_FLOOP ( *jump, i, e ) I->itemflag[e]--;
|
500
|
+
FLOOP (i, 0, I->item_max){
|
501
|
+
if ( I->itemflag[i] != 1 ){
|
502
|
+
ITEMSET_check_rule (I, w, occ, i, core_id); if (ERROR_MES) return;
|
503
|
+
}
|
504
|
+
}
|
505
|
+
// ARY_FLOOP ( *jump, i, e ) I->itemflag[e]++;
|
506
|
+
// }
|
507
|
+
// ARY_FLOOP ( *jump, i, e ) ITEMSET_check_rule (I, w, occ, e);
|
508
|
+
}
|
509
|
+
}
|
510
|
+
}
|
511
|
+
} else { // usual mining (not rule mining)
|
512
|
+
if ( I->fp && (I->flag&(ITEMSET_RFRQ+ITEMSET_RINFRQ))){
|
513
|
+
FILE2_print_real (&I->multi_fp[core_id], d, 4, '[');
|
514
|
+
FILE2_print_real (&I->multi_fp[core_id], I->prob, 4, ',');
|
515
|
+
FILE2_putc (&I->multi_fp[core_id], ']');
|
516
|
+
}
|
517
|
+
ITEMSET_solution (I, occ, core_id);
|
518
|
+
}
|
519
|
+
}
|
520
|
+
}
|
521
|
+
|
522
|
+
#endif
|