nysol-take 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/mbiclique.rb +317 -0
- data/bin/mbipolish.rb +362 -0
- data/bin/mccomp.rb +235 -0
- data/bin/mclique.rb +295 -0
- data/bin/mclique2g.rb +105 -0
- data/bin/mcliqueInfo.rb +203 -0
- data/bin/mfriends.rb +202 -0
- data/bin/mgdiff.rb +252 -0
- data/bin/mhifriend.rb +456 -0
- data/bin/mhipolish.rb +465 -0
- data/bin/mitemset.rb +168 -0
- data/bin/mpal.rb +410 -0
- data/bin/mpolishing.rb +399 -0
- data/bin/msequence.rb +165 -0
- data/bin/mtra2g.rb +476 -0
- data/bin/mtra2gc.rb +360 -0
- data/ext/grhfilrun/extconf.rb +12 -0
- data/ext/grhfilrun/grhfilrun.c +85 -0
- data/ext/grhfilrun/src/_sspc.c +358 -0
- data/ext/grhfilrun/src/aheap.c +545 -0
- data/ext/grhfilrun/src/aheap.h +251 -0
- data/ext/grhfilrun/src/base.c +92 -0
- data/ext/grhfilrun/src/base.h +59 -0
- data/ext/grhfilrun/src/fstar.c +497 -0
- data/ext/grhfilrun/src/fstar.h +80 -0
- data/ext/grhfilrun/src/grhfil.c +214 -0
- data/ext/grhfilrun/src/itemset.c +713 -0
- data/ext/grhfilrun/src/itemset.h +170 -0
- data/ext/grhfilrun/src/problem.c +415 -0
- data/ext/grhfilrun/src/problem.h +179 -0
- data/ext/grhfilrun/src/queue.c +533 -0
- data/ext/grhfilrun/src/queue.h +182 -0
- data/ext/grhfilrun/src/sample.c +19 -0
- data/ext/grhfilrun/src/sspc.c +597 -0
- data/ext/grhfilrun/src/sspc2.c +491 -0
- data/ext/grhfilrun/src/stdlib2.c +1482 -0
- data/ext/grhfilrun/src/stdlib2.h +892 -0
- data/ext/grhfilrun/src/trsact.c +817 -0
- data/ext/grhfilrun/src/trsact.h +160 -0
- data/ext/grhfilrun/src/vec.c +745 -0
- data/ext/grhfilrun/src/vec.h +172 -0
- data/ext/lcmrun/extconf.rb +20 -0
- data/ext/lcmrun/lcmrun.cpp +99 -0
- data/ext/lcmrun/src/aheap.c +216 -0
- data/ext/lcmrun/src/aheap.h +111 -0
- data/ext/lcmrun/src/base.c +92 -0
- data/ext/lcmrun/src/base.h +59 -0
- data/ext/lcmrun/src/itemset.c +496 -0
- data/ext/lcmrun/src/itemset.h +157 -0
- data/ext/lcmrun/src/lcm.c +427 -0
- data/ext/lcmrun/src/problem.c +349 -0
- data/ext/lcmrun/src/problem.h +177 -0
- data/ext/lcmrun/src/queue.c +528 -0
- data/ext/lcmrun/src/queue.h +176 -0
- data/ext/lcmrun/src/sgraph.c +359 -0
- data/ext/lcmrun/src/sgraph.h +173 -0
- data/ext/lcmrun/src/stdlib2.c +1282 -0
- data/ext/lcmrun/src/stdlib2.h +823 -0
- data/ext/lcmrun/src/trsact.c +747 -0
- data/ext/lcmrun/src/trsact.h +159 -0
- data/ext/lcmrun/src/vec.c +731 -0
- data/ext/lcmrun/src/vec.h +171 -0
- data/ext/lcmseq0run/extconf.rb +20 -0
- data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
- data/ext/lcmseq0run/src/aheap.c +216 -0
- data/ext/lcmseq0run/src/aheap.h +111 -0
- data/ext/lcmseq0run/src/base.c +92 -0
- data/ext/lcmseq0run/src/base.h +59 -0
- data/ext/lcmseq0run/src/itemset.c +518 -0
- data/ext/lcmseq0run/src/itemset.h +157 -0
- data/ext/lcmseq0run/src/itemset_zero.c +522 -0
- data/ext/lcmseq0run/src/lcm_seq.c +446 -0
- data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
- data/ext/lcmseq0run/src/problem.c +439 -0
- data/ext/lcmseq0run/src/problem.h +179 -0
- data/ext/lcmseq0run/src/problem_zero.c +439 -0
- data/ext/lcmseq0run/src/queue.c +533 -0
- data/ext/lcmseq0run/src/queue.h +182 -0
- data/ext/lcmseq0run/src/stdlib2.c +1350 -0
- data/ext/lcmseq0run/src/stdlib2.h +864 -0
- data/ext/lcmseq0run/src/trsact.c +747 -0
- data/ext/lcmseq0run/src/trsact.h +159 -0
- data/ext/lcmseq0run/src/vec.c +779 -0
- data/ext/lcmseq0run/src/vec.h +172 -0
- data/ext/lcmseqrun/extconf.rb +20 -0
- data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
- data/ext/lcmseqrun/src/aheap.c +216 -0
- data/ext/lcmseqrun/src/aheap.h +111 -0
- data/ext/lcmseqrun/src/base.c +92 -0
- data/ext/lcmseqrun/src/base.h +59 -0
- data/ext/lcmseqrun/src/itemset.c +518 -0
- data/ext/lcmseqrun/src/itemset.h +157 -0
- data/ext/lcmseqrun/src/itemset_zero.c +522 -0
- data/ext/lcmseqrun/src/lcm_seq.c +447 -0
- data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
- data/ext/lcmseqrun/src/problem.c +439 -0
- data/ext/lcmseqrun/src/problem.h +179 -0
- data/ext/lcmseqrun/src/problem_zero.c +439 -0
- data/ext/lcmseqrun/src/queue.c +533 -0
- data/ext/lcmseqrun/src/queue.h +182 -0
- data/ext/lcmseqrun/src/stdlib2.c +1350 -0
- data/ext/lcmseqrun/src/stdlib2.h +864 -0
- data/ext/lcmseqrun/src/trsact.c +747 -0
- data/ext/lcmseqrun/src/trsact.h +159 -0
- data/ext/lcmseqrun/src/vec.c +779 -0
- data/ext/lcmseqrun/src/vec.h +172 -0
- data/ext/lcmtransrun/extconf.rb +18 -0
- data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
- data/ext/macerun/extconf.rb +20 -0
- data/ext/macerun/macerun.cpp +57 -0
- data/ext/macerun/src/aheap.c +217 -0
- data/ext/macerun/src/aheap.h +112 -0
- data/ext/macerun/src/itemset.c +491 -0
- data/ext/macerun/src/itemset.h +158 -0
- data/ext/macerun/src/mace.c +503 -0
- data/ext/macerun/src/problem.c +346 -0
- data/ext/macerun/src/problem.h +174 -0
- data/ext/macerun/src/queue.c +529 -0
- data/ext/macerun/src/queue.h +177 -0
- data/ext/macerun/src/sgraph.c +360 -0
- data/ext/macerun/src/sgraph.h +174 -0
- data/ext/macerun/src/stdlib2.c +993 -0
- data/ext/macerun/src/stdlib2.h +811 -0
- data/ext/macerun/src/vec.c +634 -0
- data/ext/macerun/src/vec.h +170 -0
- data/ext/sspcrun/extconf.rb +20 -0
- data/ext/sspcrun/src/_sspc.c +358 -0
- data/ext/sspcrun/src/aheap.c +545 -0
- data/ext/sspcrun/src/aheap.h +251 -0
- data/ext/sspcrun/src/base.c +92 -0
- data/ext/sspcrun/src/base.h +59 -0
- data/ext/sspcrun/src/fstar.c +496 -0
- data/ext/sspcrun/src/fstar.h +80 -0
- data/ext/sspcrun/src/grhfil.c +213 -0
- data/ext/sspcrun/src/itemset.c +713 -0
- data/ext/sspcrun/src/itemset.h +170 -0
- data/ext/sspcrun/src/problem.c +415 -0
- data/ext/sspcrun/src/problem.h +179 -0
- data/ext/sspcrun/src/queue.c +533 -0
- data/ext/sspcrun/src/queue.h +182 -0
- data/ext/sspcrun/src/sample.c +19 -0
- data/ext/sspcrun/src/sspc.c +598 -0
- data/ext/sspcrun/src/sspc2.c +491 -0
- data/ext/sspcrun/src/stdlib2.c +1482 -0
- data/ext/sspcrun/src/stdlib2.h +892 -0
- data/ext/sspcrun/src/trsact.c +817 -0
- data/ext/sspcrun/src/trsact.h +160 -0
- data/ext/sspcrun/src/vec.c +745 -0
- data/ext/sspcrun/src/vec.h +172 -0
- data/ext/sspcrun/sspcrun.cpp +54 -0
- data/lib/nysol/enumLcmEp.rb +338 -0
- data/lib/nysol/enumLcmEsp.rb +284 -0
- data/lib/nysol/enumLcmIs.rb +275 -0
- data/lib/nysol/enumLcmSeq.rb +143 -0
- data/lib/nysol/items.rb +201 -0
- data/lib/nysol/seqDB.rb +256 -0
- data/lib/nysol/take.rb +39 -0
- data/lib/nysol/taxonomy.rb +113 -0
- data/lib/nysol/traDB.rb +257 -0
- metadata +239 -0
@@ -0,0 +1,446 @@
|
|
1
|
+
/* frequent appearing item sequence enumeration algorithm based on LCM */
|
2
|
+
/* 2004/4/10 Takeaki Uno e-mail:uno@nii.jp,
|
3
|
+
homepage: http://research.nii.ac.jp/~uno/index.html */
|
4
|
+
/* This program is available for only academic use, basically.
|
5
|
+
Anyone can modify this program, but he/she has to write down
|
6
|
+
the change of the modification on the top of the source code.
|
7
|
+
Neither contact nor appointment to Takeaki Uno is needed.
|
8
|
+
If one wants to re-distribute this code, do not forget to
|
9
|
+
refer the newest code, and show the link to homepage of
|
10
|
+
Takeaki Uno, to notify the news about the codes for the users.
|
11
|
+
For the commercial use, please make a contact to Takeaki Uno. */
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
#ifndef _lcm_seq_c_
|
16
|
+
#define _lcm_seq_c_
|
17
|
+
|
18
|
+
#define WEIGHT_DOUBLE
|
19
|
+
|
20
|
+
#include"trsact.c"
|
21
|
+
#include"problem_zero.c"
|
22
|
+
|
23
|
+
#define LCMSEQ_LEFTMOST 134217728
|
24
|
+
#define LCMSEQ_SET_RULE 268435456
|
25
|
+
|
26
|
+
typedef struct {
|
27
|
+
QUEUE_INT t; // transaction ID
|
28
|
+
QUEUE_INT s; // previous position
|
29
|
+
QUEUE_INT org; // original position
|
30
|
+
} LCMSEQ_ELM;
|
31
|
+
|
32
|
+
typedef struct {
|
33
|
+
unsigned char type; // type of the structure
|
34
|
+
LCMSEQ_ELM *v; // pointer to the array
|
35
|
+
QUEUE_ID end; // the length of the array
|
36
|
+
QUEUE_ID t; // end position+1
|
37
|
+
QUEUE_ID s; // start position
|
38
|
+
} LCMSEQ_QUE;
|
39
|
+
|
40
|
+
|
41
|
+
void LCMseq_error (){
|
42
|
+
ERROR_MES = "command explanation";
|
43
|
+
print_err ("LCMseq: [FCfQIq] [options] input-filename support [output-filename]\n\
|
44
|
+
%%:show progress, _:no message, +:write solutions in append mode\n\
|
45
|
+
F:position occurrence, C:document occurrence\n\
|
46
|
+
m:output extension maximal patterns only, c:output extension closed patterns only\n \
|
47
|
+
f,Q:output frequency following/preceding to each output sequence\n\
|
48
|
+
A:output coverages for positive/negative transactions\n\
|
49
|
+
I(J):output ID's of transactions including each pattern, if J is given, an occurrence is written in a complete stype; transaction ID, starting position and ending position\n\
|
50
|
+
i:do not output itemset to the output file (only rules)\n\
|
51
|
+
s:output confidence and item frequency by absolute values\n\
|
52
|
+
t:transpose the input database (item i will be i-th transaction, and i-th transaction will be item i)\n\
|
53
|
+
[options]\n\
|
54
|
+
-K [num]: output [num] most frequent sequences\n\
|
55
|
+
-l,-u [num]: output sequences with size at least/most [num]\n\
|
56
|
+
-U [num]: upper bound for support(maximum support)\n\
|
57
|
+
-g [num]: restrict gap length of each consequtive items by [num]\n\
|
58
|
+
-G [num]: restrict window size of the occurrence by [num]\n\
|
59
|
+
-w [filename]:read weights of transactions from the file\n\
|
60
|
+
-i [num]: find association rule for item [num]\n\
|
61
|
+
-a,-A [ratio]: find association rules of confidence at least/most [ratio]\n\
|
62
|
+
-r,-R [ratio]: find association rules of relational confidence at least/most [ratio]\n\
|
63
|
+
-f,-F [ratio]: output sequences with frequency no less/greater than [ratio] times the frequency given by the product of appearance probability of each item\n\
|
64
|
+
-p,-P [num]: output sequence only if (frequency)/(abusolute frequency) is no less/no greater than [num]\n\
|
65
|
+
-n,-N [num]: output sequence only if its negative frequency is no less/no greater than [num] (negative frequency is the sum of weights of transactions having negative weights)\n\
|
66
|
+
-o,-O [num]: output sequence only if its positive frequency is no less/no greater than [num] (positive frequency is the sum of weights of transactions having positive weights)\n\
|
67
|
+
-s [num]: output itemset rule (of the form (a,b,c) => (d,e)) with confidence at least [num] (only those whose frequency of the result is no less than the support)\n\
|
68
|
+
-# [num]: stop after outputting [num] solutions\n\
|
69
|
+
-, [char]:give the separator of the numbers in the output\n\
|
70
|
+
-Q [filename]:replace the output numbers according to the permutation table given by [filename]\n\
|
71
|
+
# the 1st letter of input-filename cannot be '-'.\n\
|
72
|
+
# if the output file name is -, the solutions will be output to standard output.\n");
|
73
|
+
EXIT;
|
74
|
+
}
|
75
|
+
|
76
|
+
/***********************************************************************/
|
77
|
+
/* read parameters given by command line */
|
78
|
+
/***********************************************************************/
|
79
|
+
void LCMseq_read_param (int argc, char *argv[], PROBLEM *PP){
|
80
|
+
ITEMSET *II = &PP->II;
|
81
|
+
int c=1, f=0;
|
82
|
+
if ( argc < c+3 ){ LCMseq_error (); return; }
|
83
|
+
|
84
|
+
if ( strchr (argv[c], 'C') ){ PP->problem |= PROBLEM_CLOSED+LCMSEQ_LEFTMOST; II->flag |= ITEMSET_RM_DUP_TRSACT;
|
85
|
+
} else if (strchr( argv[c], 'F') ){ PP->problem |= PROBLEM_FREQSET;
|
86
|
+
} else error ("F or C command has to be specified", EXIT);
|
87
|
+
|
88
|
+
if ( !strchr (argv[c], '_') ){ II->flag |= SHOW_MESSAGE; PP->TT.flag |= SHOW_MESSAGE; }
|
89
|
+
if ( strchr (argv[c], '%') ) II->flag |= SHOW_PROGRESS;
|
90
|
+
if ( strchr (argv[c], '+') ) II->flag |= ITEMSET_APPEND;
|
91
|
+
if ( strchr (argv[c], 'f') ) II->flag |= ITEMSET_FREQ;
|
92
|
+
if ( strchr (argv[c], 'A') ) II->flag |= ITEMSET_OUTPUT_POSINEGA;
|
93
|
+
if ( strchr (argv[c], 'R') ){ PP->problem |= ITEMSET_POSI_RATIO; II->flag |= ITEMSET_IGNORE_BOUND; }
|
94
|
+
if ( strchr (argv[c], 'Q') ) II->flag |= ITEMSET_PRE_FREQ;
|
95
|
+
if ( strchr (argv[c], 'I') || strchr (argv[c], 'J') ){
|
96
|
+
II->flag |= ITEMSET_TRSACT_ID; // single occurrence
|
97
|
+
if ( PP->problem & PROBLEM_FREQSET ) II->flag |= ITEMSET_MULTI_OCC_PRINT; // output pair
|
98
|
+
if ( strchr (argv[c], 'J') ){
|
99
|
+
II->flag -= ITEMSET_TRSACT_ID; // for outputting tuple
|
100
|
+
II->flag |= ITEMSET_MULTI_OCC_PRINT;
|
101
|
+
}
|
102
|
+
}
|
103
|
+
if ( strchr (argv[c], 'i') ) II->flag |= ITEMSET_NOT_ITEMSET;
|
104
|
+
if ( strchr (argv[c], 's') ) II->flag |= ITEMSET_RULE_SUPP;
|
105
|
+
if ( strchr (argv[c], 't') ) PP->TT.flag |= LOAD_TPOSE;
|
106
|
+
if ( strchr (argv[c], 'm') ) PP->problem |= PROBLEM_EX_MAXIMAL;
|
107
|
+
if ( strchr (argv[c], 'c') ) PP->problem |= PROBLEM_EX_CLOSED;
|
108
|
+
c++;
|
109
|
+
|
110
|
+
while ( argv[c][0] == '-' ){
|
111
|
+
switch (argv[c][1]){
|
112
|
+
case 'K': II->topk.end = atoi (argv[c+1]);
|
113
|
+
break; case 'l': II->lb = atoi (argv[c+1]);
|
114
|
+
break; case 'u': II->ub = atoi(argv[c+1]);
|
115
|
+
break; case 'U': II->frq_ub = (WEIGHT)atof(argv[c+1]);
|
116
|
+
break; case 'g': II->gap_ub = atoi(argv[c+1]);
|
117
|
+
break; case 'G': II->len_ub = atoi(argv[c+1]);
|
118
|
+
break; case 'w': PP->TT.wfname = argv[c+1];
|
119
|
+
break; case 'f': II->prob_lb = atof(argv[c+1]); II->flag |= ITEMSET_RFRQ; f++;
|
120
|
+
break; case 'F': II->prob_ub = atof(argv[c+1]); II->flag |= ITEMSET_RINFRQ; f++;
|
121
|
+
break; case 'i': II->target = atoi(argv[c+1]);
|
122
|
+
break; case 'a': II->ratio_lb = atof(argv[c+1]); II->flag |= ITEMSET_RULE_FRQ; f|=1;
|
123
|
+
break; case 'A': II->ratio_ub = atof(argv[c+1]); II->flag |= ITEMSET_RULE_INFRQ; f|=1;
|
124
|
+
break; case 'r': II->ratio_lb = atof(argv[c+1]); II->flag |= ITEMSET_RULE_RFRQ; f|=2;
|
125
|
+
break; case 'R': II->ratio_ub = atof(argv[c+1]); II->flag |= ITEMSET_RULE_RINFRQ; f|=2;
|
126
|
+
break; case 'P': II->flag |= ITEMSET_POSI_RATIO; II->flag |= ITEMSET_IGNORE_BOUND; II->rposi_ub = atof(argv[c+1]); f|=4;
|
127
|
+
break; case 'p': II->flag |= ITEMSET_POSI_RATIO; II->flag |= ITEMSET_IGNORE_BOUND; II->rposi_lb = atof(argv[c+1]); f|=4;
|
128
|
+
break; case 'n': II->nega_lb = atof(argv[c+1]);
|
129
|
+
break; case 'N': II->nega_ub = atof(argv[c+1]);
|
130
|
+
break; case 'o': II->posi_lb = atof(argv[c+1]);
|
131
|
+
break; case 'O': II->posi_ub = atof(argv[c+1]);
|
132
|
+
break; case 's': II->setrule_lb = atof(argv[c+1]); II->flag |= ITEMSET_SET_RULE;
|
133
|
+
break; case '#': II->max_solutions = atoi(argv[c+1]);
|
134
|
+
break; case ',': II->separator = argv[c+1][0];
|
135
|
+
break; case 'Q': PP->outperm_fname = argv[c+1];
|
136
|
+
break; default: goto NEXT;
|
137
|
+
}
|
138
|
+
c += 2;
|
139
|
+
if ( argc < c+2 ){ LCMseq_error (); return; }
|
140
|
+
}
|
141
|
+
|
142
|
+
NEXT:;
|
143
|
+
if ( (f&3)==3 || (f&5)==5 || (f&6)==6 )
|
144
|
+
error ("-f, -F, -a, -A, -p, -P, -r and -R can not specified simultaneously", EXIT);
|
145
|
+
if ( f ) BITRM (II->flag, ITEMSET_PRE_FREQ);
|
146
|
+
|
147
|
+
if ( II->len_ub<INTHUGE || II->gap_ub<INTHUGE ) BITRM (PP->problem, LCMSEQ_LEFTMOST);
|
148
|
+
PP->TT.fname = argv[c];
|
149
|
+
if ( II->topk.end==0 ) II->frq_lb = (WEIGHT)atof(argv[c+1]);
|
150
|
+
if ( argc>c+2 ) PP->output_fname = argv[c+2];
|
151
|
+
}
|
152
|
+
|
153
|
+
|
154
|
+
|
155
|
+
/*******************************************************/
|
156
|
+
/* compute occurrences of all frequent items */
|
157
|
+
/* if flag!=NULL, construct each OQ[i] (derivery), o.w., compute frequency of i */
|
158
|
+
/*******************************************************/
|
159
|
+
void LCMseq_occ_delivery (PROBLEM *PP, LCMSEQ_QUE *occ, int flag){
|
160
|
+
ITEMSET *II = &PP->II;
|
161
|
+
TRSACT *TT = &PP->TT;
|
162
|
+
QUEUE_ID j;
|
163
|
+
QUEUE_INT e, m;
|
164
|
+
WEIGHT w;
|
165
|
+
LCMSEQ_ELM *u, *uu, *u_end = occ->v + (occ->t-1);
|
166
|
+
int f = TT->flag&TRSACT_NEGATIVE;
|
167
|
+
int fl = (!(PP->problem&PROBLEM_CLOSED)&&!flag) || (!(PP->problem&LCMSEQ_LEFTMOST)&&flag);
|
168
|
+
|
169
|
+
MQUE_FLOOP (*occ, u){ // loop for occurrences
|
170
|
+
m = MAX (MAX(0, u->s -II->gap_ub), u->org -(II->len_ub-1));
|
171
|
+
if ( u < u_end && u->t == (u+1)->t ) ENMAX (m, (u+1)->s);
|
172
|
+
w = TT->w[u->t];
|
173
|
+
if ( II->itemset.t == 0 ) m = 0;
|
174
|
+
if ( !fl ) BLOOP (j, u->s, m) TT->sc[TT->T.v[u->t].v[j]] = 0; // clear marks of all letters in scanning part of the current transaction
|
175
|
+
BLOOP (j, u->s, m){
|
176
|
+
e = TT->T.v[u->t].v[j]; // e:= letter
|
177
|
+
if ( fl || TT->sc[e] == 0 ){ // not leftmost, or the first appearance of the letter
|
178
|
+
TT->sc[e] = 1; // mark the letter
|
179
|
+
if ( flag ){ // if occurrence computing
|
180
|
+
uu = &((LCMSEQ_ELM *)(TT->OQ[e].v))[TT->OQ[e].t]; // insert new element to occurrence
|
181
|
+
uu->t = u->t;
|
182
|
+
uu->s = j;
|
183
|
+
uu->org = II->itemset.t? u->org: j;
|
184
|
+
TT->OQ[e].t++;
|
185
|
+
} else { // for just frequency counting
|
186
|
+
if ( TT->OQ[e].end == 0 ){ // initialize weights if this is the first insertion
|
187
|
+
QUE_INS (PP->itemcand, e);
|
188
|
+
PP->occ_w[e] = PP->occ_pw[e] = 0;
|
189
|
+
}
|
190
|
+
TT->OQ[e].end++;
|
191
|
+
PP->occ_w[e] += w;
|
192
|
+
if ( f && w>0 ) PP->occ_pw[e] += w;
|
193
|
+
}
|
194
|
+
// TT->sc[e] = !fl;
|
195
|
+
}
|
196
|
+
}
|
197
|
+
}
|
198
|
+
}
|
199
|
+
|
200
|
+
|
201
|
+
/* remove infrequent items from jump, and set active/in-active marks */
|
202
|
+
void LCMseq_rm_infreq (PROBLEM *PP){
|
203
|
+
ITEMSET *II = &PP->II;
|
204
|
+
TRSACT *TT = &PP->TT;
|
205
|
+
QUEUE_ID ii=PP->itemcand.s;
|
206
|
+
QUEUE_INT *e;
|
207
|
+
|
208
|
+
MQUE_FLOOP (PP->itemcand, e){
|
209
|
+
if ( PP->occ_pw[*e] >= II->frq_lb ){
|
210
|
+
PP->itemcand.v[ii++] = *e;
|
211
|
+
TT->sc[*e] = 0;
|
212
|
+
} else {
|
213
|
+
TT->OQ[*e].t = TT->OQ[*e].end = 0;
|
214
|
+
if ( PP->root ) TT->sc[*e] = 3; // remove infrequent items only when gap constraint is not given
|
215
|
+
}
|
216
|
+
}
|
217
|
+
PP->itemcand.t = ii;
|
218
|
+
}
|
219
|
+
|
220
|
+
/* remove merged occurrences from occ, and re-set temporary end-marks marked in each occurrence */
|
221
|
+
void LCMseq_reduce_occ (TRSACT *TT, LCMSEQ_QUE *occ, QUEUE_INT item){
|
222
|
+
LCMSEQ_ELM *u, *uu=occ->v;
|
223
|
+
MQUE_FLOOP (*occ, u){
|
224
|
+
TT->T.v[u->t].v[u->s] = item;
|
225
|
+
if ( TT->mark[u->t] == 0 ) continue;
|
226
|
+
*uu = *u;
|
227
|
+
// update positions in occ, for shrinked transactions
|
228
|
+
if ( TT->mark[u->t] > 1 ){
|
229
|
+
uu->t = TT->mark[u->t] -2;
|
230
|
+
uu->org = TT->T.v[uu->t].t + u->org - u->s; // actually, org is not used when database is shrinked
|
231
|
+
uu->s = TT->T.v[uu->t].t;
|
232
|
+
}
|
233
|
+
uu++;
|
234
|
+
}
|
235
|
+
occ->t = (VEC_ID)(uu - occ->v);
|
236
|
+
}
|
237
|
+
|
238
|
+
/***************************************************************/
|
239
|
+
/* iteration of LCMseq */
|
240
|
+
/* INPUT: occurrences of current sequence */
|
241
|
+
/*************************************************************************/
|
242
|
+
void LCMseq (PROBLEM *PP, QUEUE_INT item, LCMSEQ_QUE *occ){
|
243
|
+
ITEMSET *II = &PP->II;
|
244
|
+
TRSACT *TT = &PP->TT;
|
245
|
+
QUEUE_ID js=PP->itemcand.s, i, j;
|
246
|
+
VEC_ID new_t = TT->new_t;
|
247
|
+
int bnum = TT->buf.num, bblock = TT->buf.block_num;
|
248
|
+
int output_flag = 1;
|
249
|
+
QUEUE_INT *x, cnt=0, tt=TT->rows_org;
|
250
|
+
WEIGHT *w=NULL, *pw=NULL;
|
251
|
+
double prob = II->prob;
|
252
|
+
LCMSEQ_ELM *u, L;
|
253
|
+
QUEUE *Q = NULL;
|
254
|
+
|
255
|
+
//QUEUE_print__ (&II->itemset);
|
256
|
+
// MQUE_FLOOP (*occ, u){ printf ("(%d, %d, %d) ", u->t, u->s, u->org); } printf ("\n");
|
257
|
+
|
258
|
+
// re-computing frequency, for (document occurrence & non-leftmost)
|
259
|
+
if ( (PP->problem & PROBLEM_CLOSED) && !(PP->problem & LCMSEQ_LEFTMOST)){
|
260
|
+
II->frq = II->pfrq = 0;
|
261
|
+
MQUE_FLOOP (*occ, u){
|
262
|
+
if ( u->t != tt ){
|
263
|
+
II->frq += TT->w[u->t];
|
264
|
+
if ( TT->w[u->t] > 0 ) II->pfrq += TT->w[u->t];
|
265
|
+
}
|
266
|
+
tt = u->t;
|
267
|
+
}
|
268
|
+
}
|
269
|
+
|
270
|
+
PP->itemcand.s = PP->itemcand.t; // initilization for the re-use of queue
|
271
|
+
II->iters++;
|
272
|
+
if ( PP->problem & PROBLEM_EX_CLOSED ) PP->th = II->frq; // threshold value for for ex_maximal/ex_closed check; in the case of maximal, it is always II->frq_lb
|
273
|
+
if ( II->flag&ITEMSET_POSI_RATIO && II->pfrq!=0 ) II->frq /= (II->pfrq+II->pfrq-II->frq);
|
274
|
+
|
275
|
+
// if the itemset is empty, set frq to the original #trsactions, and compute item_frq's
|
276
|
+
LCMseq_occ_delivery (PP, occ, 0);
|
277
|
+
if ( II->itemset.t == 0 ){
|
278
|
+
if ( (II->frq = TT->total_w_org) != 0 )
|
279
|
+
FLOOP (i, 0, TT->T.clms) II->item_frq[i] = PP->occ_w[i]/TT->total_w_org;
|
280
|
+
}
|
281
|
+
|
282
|
+
II->prob = 1.0;
|
283
|
+
MQUE_FLOOP (II->itemset, x) II->prob *= II->item_frq[*x];
|
284
|
+
|
285
|
+
// extending maximality/closedness check
|
286
|
+
if ( PP->problem & (PROBLEM_EX_MAXIMAL+PROBLEM_EX_CLOSED) )
|
287
|
+
MQUE_FLOOP (PP->itemcand, x) if ( PP->occ_w[*x] >= PP->th ) output_flag = 0;
|
288
|
+
|
289
|
+
if ( output_flag )
|
290
|
+
ITEMSET_check_all_rule (II, PP->occ_w, (QUEUE *)occ, &PP->itemcand, TT->total_pw_org, 0);
|
291
|
+
MQUE_FLOOP (PP->itemcand, x) if ( PP->occ_pw[*x] >= II->frq_lb ) cnt++;
|
292
|
+
|
293
|
+
if ( cnt == 0 || II->itemset.t >= II->ub ) goto END;
|
294
|
+
/////// database reduction ///////////
|
295
|
+
if ( 0&& PP->dir && cnt>10 && occ->t>2 && II->itemset.t>0 ){
|
296
|
+
// put end-mark to each occurrence transaction
|
297
|
+
//printf ("AAA: "); MQUE_FLOOP (*occ, u){ printf ("(%d, %d, %d) ", u->t, u->s, u->org); } printf ("\n");
|
298
|
+
Q = &TT->OQ[TT->T.clms];
|
299
|
+
Q->s = Q->t = 0;
|
300
|
+
MQUE_FLOOP (*occ, u){
|
301
|
+
TT->T.v[u->t].v[u->s] = TT->T.clms;
|
302
|
+
QUE_INS (*Q, u->t);
|
303
|
+
}
|
304
|
+
//printf ("========\n");
|
305
|
+
//TRSACT_print (TT, Q, NULL);
|
306
|
+
//printf ("BBB: "); MQUE_FLOOP (*occ, u){ printf ("(%d, %d, %d) ", u->t, u->s, u->org); } printf ("\n");
|
307
|
+
TRSACT_find_same (TT, Q, TT->T.clms);
|
308
|
+
TRSACT_merge_trsact (TT, Q, TT->T.clms);
|
309
|
+
// erase end-mark of each occurrence transaction, and remove unified occurrences
|
310
|
+
//MQUE_FLOOP (*occ, u){ printf ("(%d, %d, %d) ", u->t, u->s, u->org); } printf ("\n");
|
311
|
+
LCMseq_reduce_occ (TT, occ, item);
|
312
|
+
TRSACT_print (TT, Q, NULL);
|
313
|
+
printf ("-------\n");
|
314
|
+
//MQUE_FLOOP (*occ, u){ printf ("(%d, %d, %d) ", u->t, u->s, u->org); } printf ("\n");
|
315
|
+
}
|
316
|
+
|
317
|
+
///////////// deliverly /////////////
|
318
|
+
LCMseq_occ_delivery (PP, occ, 1);
|
319
|
+
LCMseq_rm_infreq (PP);
|
320
|
+
cnt = QUEUE_LENGTH_ (PP->itemcand);
|
321
|
+
QUEUE_occ_dup (&PP->itemcand, &Q, TT->OQ, &w, PP->occ_w, &pw, PP->occ_pw, sizeof(LCMSEQ_ELM));
|
322
|
+
if ( Q == NULL ) goto END;
|
323
|
+
MQUE_FLOOP (PP->itemcand, x) TT->OQ[*x].end = TT->OQ[*x].t = 0;
|
324
|
+
PP->itemcand.t = PP->itemcand.s;
|
325
|
+
|
326
|
+
/************ recursive calls ***************/
|
327
|
+
FLOOP (i, 0, cnt){
|
328
|
+
II->frq = w[i];
|
329
|
+
II->pfrq = pw[i];
|
330
|
+
if ( II->flag & ITEMSET_SET_RULE ){
|
331
|
+
II->set_weight[II->itemset.t] = II->frq;
|
332
|
+
II->set_occ[II->itemset.t] = &Q[i];
|
333
|
+
}
|
334
|
+
QUE_INS (II->itemset, Q[i].end);
|
335
|
+
// prob2 = II->prob;
|
336
|
+
// II->prob *= frqs[e+TT->item_max*2];
|
337
|
+
if ( II->itemset.t == 0 ){ // reverse occurrence order for the first iteration (for the process of overlapping occurrences)
|
338
|
+
u = (LCMSEQ_ELM *)TT->OQ[i].v;
|
339
|
+
FLOOP (j, 0, TT->OQ[i].t/2){
|
340
|
+
L = u[j]; u[j] = u[TT->OQ[i].t-j-1]; u[TT->OQ[i].t-j-1] = L;
|
341
|
+
}
|
342
|
+
}
|
343
|
+
LCMseq (PP, Q[i].end, (LCMSEQ_QUE *)&Q[i]);
|
344
|
+
// II->prob = prob2;
|
345
|
+
II->itemset.t--;
|
346
|
+
}
|
347
|
+
free2 (Q);
|
348
|
+
TT->new_t = new_t;
|
349
|
+
TT->buf.num = bnum, TT->buf.block_num = bblock;
|
350
|
+
|
351
|
+
END:;
|
352
|
+
MQUE_FLOOP (PP->itemcand, x) TT->OQ[*x].end = TT->OQ[*x].t = 0;
|
353
|
+
PP->itemcand.t = PP->itemcand.s;
|
354
|
+
PP->itemcand.s = js;
|
355
|
+
II->prob = prob;
|
356
|
+
}
|
357
|
+
|
358
|
+
/*************************************************************************/
|
359
|
+
/* initialization of LCMseq ver. 2 */
|
360
|
+
/*************************************************************************/
|
361
|
+
void LCMseq_init (PROBLEM *PP, LCMSEQ_QUE *occ){
|
362
|
+
ITEMSET *II = &PP->II;
|
363
|
+
TRSACT *TT = &PP->TT;
|
364
|
+
VEC_ID i;
|
365
|
+
QUEUE_ID j;
|
366
|
+
QUEUE_INT *x;
|
367
|
+
LCMSEQ_ELM L;
|
368
|
+
|
369
|
+
II->X = TT;
|
370
|
+
II->frq = TT->total_w_org; II->pfrq = TT->total_pw_org;
|
371
|
+
II->flag |= ITEMSET_USE_ORG +ITEMSET_ITEMFRQ;
|
372
|
+
PROBLEM_alloc (PP, TT->T.clms, TT->T.t, TT->row_max, TT->perm, PROBLEM_ITEMCAND +((TT->flag&TRSACT_NEGATIVE)?PROBLEM_OCC_PW: PROBLEM_OCC_W));
|
373
|
+
malloc2 (occ->v, TT->T.t, EXIT);
|
374
|
+
occ->end = TT->clm_max; occ->s = occ->t = 0;
|
375
|
+
TT->perm = NULL;
|
376
|
+
if ( II->perm && RANGE(0, II->target, II->item_max) ) II->target = II->perm[II->target];
|
377
|
+
|
378
|
+
if ( !(TT->sc) ) calloc2 (TT->sc, TT->T.clms+2, return);
|
379
|
+
free2 (II->itemflag); II->itemflag = TT->sc; // II->itemflag and TT->sc shares the same memory
|
380
|
+
// TT->occ_unit = sizeof(QUEUE_INT)*3; // one occ is composed of 3 intgers
|
381
|
+
|
382
|
+
// make occurrence & reverse each transaction
|
383
|
+
FLOOP (i, 0, TT->T.t){
|
384
|
+
FLOOP (j, 0, TT->T.v[i].t/2)
|
385
|
+
SWAP_QUEUE_INT (TT->T.v[i].v[j], TT->T.v[i].v[TT->T.v[i].t-1-j]);
|
386
|
+
}
|
387
|
+
if ( II->len_ub >= INTHUGE ) II->len_ub = TT->row_max;
|
388
|
+
if ( II->gap_ub >= INTHUGE ) II->gap_ub = TT->row_max;
|
389
|
+
II->total_weight = TT->total_w_org;
|
390
|
+
i=0;MQUE_FLOOP (TT->OQ[TT->T.clms], x){
|
391
|
+
L.t = *x;
|
392
|
+
L.s = L.org = TT->T.v[*x].t; // !! org is originally -1
|
393
|
+
QUE_INS (*occ, L);
|
394
|
+
}
|
395
|
+
PP->dir = (PP->problem&LCMSEQ_LEFTMOST) && II->len_ub>=TT->row_max && !(II->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT)); // flag for shrink or not
|
396
|
+
PP->root = II->gap_ub>=TT->row_max && II->len_ub>=TT->row_max; // flag for removing infrequent item or not
|
397
|
+
PP->th = II->frq_lb;
|
398
|
+
}
|
399
|
+
|
400
|
+
/*************************************************************************/
|
401
|
+
/* main of LCMseq ver. 2 */
|
402
|
+
/*************************************************************************/
|
403
|
+
int LCMseq_main (int argc, char *argv[]){
|
404
|
+
PROBLEM PP;
|
405
|
+
ITEMSET *II = &PP.II;
|
406
|
+
TRSACT *TT = &PP.TT;
|
407
|
+
LCMSEQ_QUE occ;
|
408
|
+
occ.v = NULL;
|
409
|
+
|
410
|
+
PROBLEM_init (&PP);
|
411
|
+
LCMseq_read_param (argc, argv, &PP);
|
412
|
+
if ( ERROR_MES ) return (1);
|
413
|
+
|
414
|
+
TT->occ_unit = sizeof(LCMSEQ_ELM);
|
415
|
+
TT->flag |= TRSACT_MAKE_NEW +TRSACT_ALLOC_OCC + ((II->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT))?0: (TRSACT_SHRINK+TRSACT_1ST_SHRINK)) ;
|
416
|
+
TT->w_lb = (((II->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT)) && (PP.problem & PROBLEM_FREQSET)) || (II->flag&ITEMSET_RULE) || II->gap_ub<INTHUGE || II->len_ub<INTHUGE )? -WEIGHTHUGE: II->frq_lb;
|
417
|
+
PROBLEM_load (&PP);
|
418
|
+
|
419
|
+
if ( !ERROR_MES ){
|
420
|
+
LCMseq_init (&PP, &occ);
|
421
|
+
if ( !ERROR_MES ){
|
422
|
+
LCMseq (&PP, TT->T.clms, &occ);
|
423
|
+
ITEMSET_last_output (II);
|
424
|
+
}
|
425
|
+
}
|
426
|
+
|
427
|
+
free2 (occ.v);
|
428
|
+
TT->sc = NULL;
|
429
|
+
PROBLEM_end (&PP);
|
430
|
+
return (ERROR_MES?1:0);
|
431
|
+
}
|
432
|
+
|
433
|
+
/*******************************************************************************/
|
434
|
+
#ifndef _NO_MAIN_
|
435
|
+
#define _NO_MAIN_
|
436
|
+
int main (int argc, char *argv[]){
|
437
|
+
return (LCMseq_main (argc, argv));
|
438
|
+
}
|
439
|
+
#endif
|
440
|
+
/*******************************************************************************/
|
441
|
+
|
442
|
+
#endif
|
443
|
+
|
444
|
+
|
445
|
+
|
446
|
+
|