nysol-take 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. checksums.yaml +7 -0
  2. data/bin/mbiclique.rb +317 -0
  3. data/bin/mbipolish.rb +362 -0
  4. data/bin/mccomp.rb +235 -0
  5. data/bin/mclique.rb +295 -0
  6. data/bin/mclique2g.rb +105 -0
  7. data/bin/mcliqueInfo.rb +203 -0
  8. data/bin/mfriends.rb +202 -0
  9. data/bin/mgdiff.rb +252 -0
  10. data/bin/mhifriend.rb +456 -0
  11. data/bin/mhipolish.rb +465 -0
  12. data/bin/mitemset.rb +168 -0
  13. data/bin/mpal.rb +410 -0
  14. data/bin/mpolishing.rb +399 -0
  15. data/bin/msequence.rb +165 -0
  16. data/bin/mtra2g.rb +476 -0
  17. data/bin/mtra2gc.rb +360 -0
  18. data/ext/grhfilrun/extconf.rb +12 -0
  19. data/ext/grhfilrun/grhfilrun.c +85 -0
  20. data/ext/grhfilrun/src/_sspc.c +358 -0
  21. data/ext/grhfilrun/src/aheap.c +545 -0
  22. data/ext/grhfilrun/src/aheap.h +251 -0
  23. data/ext/grhfilrun/src/base.c +92 -0
  24. data/ext/grhfilrun/src/base.h +59 -0
  25. data/ext/grhfilrun/src/fstar.c +497 -0
  26. data/ext/grhfilrun/src/fstar.h +80 -0
  27. data/ext/grhfilrun/src/grhfil.c +214 -0
  28. data/ext/grhfilrun/src/itemset.c +713 -0
  29. data/ext/grhfilrun/src/itemset.h +170 -0
  30. data/ext/grhfilrun/src/problem.c +415 -0
  31. data/ext/grhfilrun/src/problem.h +179 -0
  32. data/ext/grhfilrun/src/queue.c +533 -0
  33. data/ext/grhfilrun/src/queue.h +182 -0
  34. data/ext/grhfilrun/src/sample.c +19 -0
  35. data/ext/grhfilrun/src/sspc.c +597 -0
  36. data/ext/grhfilrun/src/sspc2.c +491 -0
  37. data/ext/grhfilrun/src/stdlib2.c +1482 -0
  38. data/ext/grhfilrun/src/stdlib2.h +892 -0
  39. data/ext/grhfilrun/src/trsact.c +817 -0
  40. data/ext/grhfilrun/src/trsact.h +160 -0
  41. data/ext/grhfilrun/src/vec.c +745 -0
  42. data/ext/grhfilrun/src/vec.h +172 -0
  43. data/ext/lcmrun/extconf.rb +20 -0
  44. data/ext/lcmrun/lcmrun.cpp +99 -0
  45. data/ext/lcmrun/src/aheap.c +216 -0
  46. data/ext/lcmrun/src/aheap.h +111 -0
  47. data/ext/lcmrun/src/base.c +92 -0
  48. data/ext/lcmrun/src/base.h +59 -0
  49. data/ext/lcmrun/src/itemset.c +496 -0
  50. data/ext/lcmrun/src/itemset.h +157 -0
  51. data/ext/lcmrun/src/lcm.c +427 -0
  52. data/ext/lcmrun/src/problem.c +349 -0
  53. data/ext/lcmrun/src/problem.h +177 -0
  54. data/ext/lcmrun/src/queue.c +528 -0
  55. data/ext/lcmrun/src/queue.h +176 -0
  56. data/ext/lcmrun/src/sgraph.c +359 -0
  57. data/ext/lcmrun/src/sgraph.h +173 -0
  58. data/ext/lcmrun/src/stdlib2.c +1282 -0
  59. data/ext/lcmrun/src/stdlib2.h +823 -0
  60. data/ext/lcmrun/src/trsact.c +747 -0
  61. data/ext/lcmrun/src/trsact.h +159 -0
  62. data/ext/lcmrun/src/vec.c +731 -0
  63. data/ext/lcmrun/src/vec.h +171 -0
  64. data/ext/lcmseq0run/extconf.rb +20 -0
  65. data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
  66. data/ext/lcmseq0run/src/aheap.c +216 -0
  67. data/ext/lcmseq0run/src/aheap.h +111 -0
  68. data/ext/lcmseq0run/src/base.c +92 -0
  69. data/ext/lcmseq0run/src/base.h +59 -0
  70. data/ext/lcmseq0run/src/itemset.c +518 -0
  71. data/ext/lcmseq0run/src/itemset.h +157 -0
  72. data/ext/lcmseq0run/src/itemset_zero.c +522 -0
  73. data/ext/lcmseq0run/src/lcm_seq.c +446 -0
  74. data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
  75. data/ext/lcmseq0run/src/problem.c +439 -0
  76. data/ext/lcmseq0run/src/problem.h +179 -0
  77. data/ext/lcmseq0run/src/problem_zero.c +439 -0
  78. data/ext/lcmseq0run/src/queue.c +533 -0
  79. data/ext/lcmseq0run/src/queue.h +182 -0
  80. data/ext/lcmseq0run/src/stdlib2.c +1350 -0
  81. data/ext/lcmseq0run/src/stdlib2.h +864 -0
  82. data/ext/lcmseq0run/src/trsact.c +747 -0
  83. data/ext/lcmseq0run/src/trsact.h +159 -0
  84. data/ext/lcmseq0run/src/vec.c +779 -0
  85. data/ext/lcmseq0run/src/vec.h +172 -0
  86. data/ext/lcmseqrun/extconf.rb +20 -0
  87. data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
  88. data/ext/lcmseqrun/src/aheap.c +216 -0
  89. data/ext/lcmseqrun/src/aheap.h +111 -0
  90. data/ext/lcmseqrun/src/base.c +92 -0
  91. data/ext/lcmseqrun/src/base.h +59 -0
  92. data/ext/lcmseqrun/src/itemset.c +518 -0
  93. data/ext/lcmseqrun/src/itemset.h +157 -0
  94. data/ext/lcmseqrun/src/itemset_zero.c +522 -0
  95. data/ext/lcmseqrun/src/lcm_seq.c +447 -0
  96. data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
  97. data/ext/lcmseqrun/src/problem.c +439 -0
  98. data/ext/lcmseqrun/src/problem.h +179 -0
  99. data/ext/lcmseqrun/src/problem_zero.c +439 -0
  100. data/ext/lcmseqrun/src/queue.c +533 -0
  101. data/ext/lcmseqrun/src/queue.h +182 -0
  102. data/ext/lcmseqrun/src/stdlib2.c +1350 -0
  103. data/ext/lcmseqrun/src/stdlib2.h +864 -0
  104. data/ext/lcmseqrun/src/trsact.c +747 -0
  105. data/ext/lcmseqrun/src/trsact.h +159 -0
  106. data/ext/lcmseqrun/src/vec.c +779 -0
  107. data/ext/lcmseqrun/src/vec.h +172 -0
  108. data/ext/lcmtransrun/extconf.rb +18 -0
  109. data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
  110. data/ext/macerun/extconf.rb +20 -0
  111. data/ext/macerun/macerun.cpp +57 -0
  112. data/ext/macerun/src/aheap.c +217 -0
  113. data/ext/macerun/src/aheap.h +112 -0
  114. data/ext/macerun/src/itemset.c +491 -0
  115. data/ext/macerun/src/itemset.h +158 -0
  116. data/ext/macerun/src/mace.c +503 -0
  117. data/ext/macerun/src/problem.c +346 -0
  118. data/ext/macerun/src/problem.h +174 -0
  119. data/ext/macerun/src/queue.c +529 -0
  120. data/ext/macerun/src/queue.h +177 -0
  121. data/ext/macerun/src/sgraph.c +360 -0
  122. data/ext/macerun/src/sgraph.h +174 -0
  123. data/ext/macerun/src/stdlib2.c +993 -0
  124. data/ext/macerun/src/stdlib2.h +811 -0
  125. data/ext/macerun/src/vec.c +634 -0
  126. data/ext/macerun/src/vec.h +170 -0
  127. data/ext/sspcrun/extconf.rb +20 -0
  128. data/ext/sspcrun/src/_sspc.c +358 -0
  129. data/ext/sspcrun/src/aheap.c +545 -0
  130. data/ext/sspcrun/src/aheap.h +251 -0
  131. data/ext/sspcrun/src/base.c +92 -0
  132. data/ext/sspcrun/src/base.h +59 -0
  133. data/ext/sspcrun/src/fstar.c +496 -0
  134. data/ext/sspcrun/src/fstar.h +80 -0
  135. data/ext/sspcrun/src/grhfil.c +213 -0
  136. data/ext/sspcrun/src/itemset.c +713 -0
  137. data/ext/sspcrun/src/itemset.h +170 -0
  138. data/ext/sspcrun/src/problem.c +415 -0
  139. data/ext/sspcrun/src/problem.h +179 -0
  140. data/ext/sspcrun/src/queue.c +533 -0
  141. data/ext/sspcrun/src/queue.h +182 -0
  142. data/ext/sspcrun/src/sample.c +19 -0
  143. data/ext/sspcrun/src/sspc.c +598 -0
  144. data/ext/sspcrun/src/sspc2.c +491 -0
  145. data/ext/sspcrun/src/stdlib2.c +1482 -0
  146. data/ext/sspcrun/src/stdlib2.h +892 -0
  147. data/ext/sspcrun/src/trsact.c +817 -0
  148. data/ext/sspcrun/src/trsact.h +160 -0
  149. data/ext/sspcrun/src/vec.c +745 -0
  150. data/ext/sspcrun/src/vec.h +172 -0
  151. data/ext/sspcrun/sspcrun.cpp +54 -0
  152. data/lib/nysol/enumLcmEp.rb +338 -0
  153. data/lib/nysol/enumLcmEsp.rb +284 -0
  154. data/lib/nysol/enumLcmIs.rb +275 -0
  155. data/lib/nysol/enumLcmSeq.rb +143 -0
  156. data/lib/nysol/items.rb +201 -0
  157. data/lib/nysol/seqDB.rb +256 -0
  158. data/lib/nysol/take.rb +39 -0
  159. data/lib/nysol/taxonomy.rb +113 -0
  160. data/lib/nysol/traDB.rb +257 -0
  161. metadata +239 -0
@@ -0,0 +1,446 @@
1
+ /* frequent appearing item sequence enumeration algorithm based on LCM */
2
+ /* 2004/4/10 Takeaki Uno e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, do not forget to
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about the codes for the users.
11
+ For the commercial use, please make a contact to Takeaki Uno. */
12
+
13
+
14
+
15
+ #ifndef _lcm_seq_c_
16
+ #define _lcm_seq_c_
17
+
18
+ #define WEIGHT_DOUBLE
19
+
20
+ #include"trsact.c"
21
+ #include"problem.c"
22
+
23
+ #define LCMSEQ_LEFTMOST 134217728
24
+ #define LCMSEQ_SET_RULE 268435456
25
+
26
+ typedef struct {
27
+ QUEUE_INT t; // transaction ID
28
+ QUEUE_INT s; // previous position
29
+ QUEUE_INT org; // original position
30
+ } LCMSEQ_ELM;
31
+
32
+ typedef struct {
33
+ unsigned char type; // type of the structure
34
+ LCMSEQ_ELM *v; // pointer to the array
35
+ QUEUE_ID end; // the length of the array
36
+ QUEUE_ID t; // end position+1
37
+ QUEUE_ID s; // start position
38
+ } LCMSEQ_QUE;
39
+
40
+
41
+ void LCMseq_error (){
42
+ ERROR_MES = "command explanation";
43
+ print_err ("LCMseq: [FCfQIq] [options] input-filename support [output-filename]\n\
44
+ %%:show progress, _:no message, +:write solutions in append mode\n\
45
+ F:position occurrence, C:document occurrence\n\
46
+ m:output extension maximal patterns only, c:output extension closed patterns only\n \
47
+ f,Q:output frequency following/preceding to each output sequence\n\
48
+ A:output coverages for positive/negative transactions\n\
49
+ I(J):output ID's of transactions including each pattern, if J is given, an occurrence is written in a complete stype; transaction ID, starting position and ending position\n\
50
+ i:do not output itemset to the output file (only rules)\n\
51
+ s:output confidence and item frequency by absolute values\n\
52
+ t:transpose the input database (item i will be i-th transaction, and i-th transaction will be item i)\n\
53
+ [options]\n\
54
+ -K [num]: output [num] most frequent sequences\n\
55
+ -l,-u [num]: output sequences with size at least/most [num]\n\
56
+ -U [num]: upper bound for support(maximum support)\n\
57
+ -g [num]: restrict gap length of each consequtive items by [num]\n\
58
+ -G [num]: restrict window size of the occurrence by [num]\n\
59
+ -w [filename]:read weights of transactions from the file\n\
60
+ -i [num]: find association rule for item [num]\n\
61
+ -a,-A [ratio]: find association rules of confidence at least/most [ratio]\n\
62
+ -r,-R [ratio]: find association rules of relational confidence at least/most [ratio]\n\
63
+ -f,-F [ratio]: output sequences with frequency no less/greater than [ratio] times the frequency given by the product of appearance probability of each item\n\
64
+ -p,-P [num]: output sequence only if (frequency)/(abusolute frequency) is no less/no greater than [num]\n\
65
+ -n,-N [num]: output sequence only if its negative frequency is no less/no greater than [num] (negative frequency is the sum of weights of transactions having negative weights)\n\
66
+ -o,-O [num]: output sequence only if its positive frequency is no less/no greater than [num] (positive frequency is the sum of weights of transactions having positive weights)\n\
67
+ -s [num]: output itemset rule (of the form (a,b,c) => (d,e)) with confidence at least [num] (only those whose frequency of the result is no less than the support)\n\
68
+ -# [num]: stop after outputting [num] solutions\n\
69
+ -, [char]:give the separator of the numbers in the output\n\
70
+ -Q [filename]:replace the output numbers according to the permutation table given by [filename]\n\
71
+ # the 1st letter of input-filename cannot be '-'.\n\
72
+ # if the output file name is -, the solutions will be output to standard output.\n");
73
+ EXIT;
74
+ }
75
+
76
+ /***********************************************************************/
77
+ /* read parameters given by command line */
78
+ /***********************************************************************/
79
+ void LCMseq_read_param (int argc, char *argv[], PROBLEM *PP){
80
+ ITEMSET *II = &PP->II;
81
+ int c=1, f=0;
82
+ if ( argc < c+3 ){ LCMseq_error (); return; }
83
+
84
+ if ( strchr (argv[c], 'C') ){ PP->problem |= PROBLEM_CLOSED+LCMSEQ_LEFTMOST; II->flag |= ITEMSET_RM_DUP_TRSACT;
85
+ } else if (strchr( argv[c], 'F') ){ PP->problem |= PROBLEM_FREQSET;
86
+ } else error ("F or C command has to be specified", EXIT);
87
+
88
+ if ( !strchr (argv[c], '_') ){ II->flag |= SHOW_MESSAGE; PP->TT.flag |= SHOW_MESSAGE; }
89
+ if ( strchr (argv[c], '%') ) II->flag |= SHOW_PROGRESS;
90
+ if ( strchr (argv[c], '+') ) II->flag |= ITEMSET_APPEND;
91
+ if ( strchr (argv[c], 'f') ) II->flag |= ITEMSET_FREQ;
92
+ if ( strchr (argv[c], 'A') ) II->flag |= ITEMSET_OUTPUT_POSINEGA;
93
+ if ( strchr (argv[c], 'R') ){ PP->problem |= ITEMSET_POSI_RATIO; II->flag |= ITEMSET_IGNORE_BOUND; }
94
+ if ( strchr (argv[c], 'Q') ) II->flag |= ITEMSET_PRE_FREQ;
95
+ if ( strchr (argv[c], 'I') || strchr (argv[c], 'J') ){
96
+ II->flag |= ITEMSET_TRSACT_ID; // single occurrence
97
+ if ( PP->problem & PROBLEM_FREQSET ) II->flag |= ITEMSET_MULTI_OCC_PRINT; // output pair
98
+ if ( strchr (argv[c], 'J') ){
99
+ II->flag -= ITEMSET_TRSACT_ID; // for outputting tuple
100
+ II->flag |= ITEMSET_MULTI_OCC_PRINT;
101
+ }
102
+ }
103
+ if ( strchr (argv[c], 'i') ) II->flag |= ITEMSET_NOT_ITEMSET;
104
+ if ( strchr (argv[c], 's') ) II->flag |= ITEMSET_RULE_SUPP;
105
+ if ( strchr (argv[c], 't') ) PP->TT.flag |= LOAD_TPOSE;
106
+ if ( strchr (argv[c], 'm') ) PP->problem |= PROBLEM_EX_MAXIMAL;
107
+ if ( strchr (argv[c], 'c') ) PP->problem |= PROBLEM_EX_CLOSED;
108
+ c++;
109
+
110
+ while ( argv[c][0] == '-' ){
111
+ switch (argv[c][1]){
112
+ case 'K': II->topk.end = atoi (argv[c+1]);
113
+ break; case 'l': II->lb = atoi (argv[c+1]);
114
+ break; case 'u': II->ub = atoi(argv[c+1]);
115
+ break; case 'U': II->frq_ub = (WEIGHT)atof(argv[c+1]);
116
+ break; case 'g': II->gap_ub = atoi(argv[c+1]);
117
+ break; case 'G': II->len_ub = atoi(argv[c+1]);
118
+ break; case 'w': PP->TT.wfname = argv[c+1];
119
+ break; case 'f': II->prob_lb = atof(argv[c+1]); II->flag |= ITEMSET_RFRQ; f++;
120
+ break; case 'F': II->prob_ub = atof(argv[c+1]); II->flag |= ITEMSET_RINFRQ; f++;
121
+ break; case 'i': II->target = atoi(argv[c+1]);
122
+ break; case 'a': II->ratio_lb = atof(argv[c+1]); II->flag |= ITEMSET_RULE_FRQ; f|=1;
123
+ break; case 'A': II->ratio_ub = atof(argv[c+1]); II->flag |= ITEMSET_RULE_INFRQ; f|=1;
124
+ break; case 'r': II->ratio_lb = atof(argv[c+1]); II->flag |= ITEMSET_RULE_RFRQ; f|=2;
125
+ break; case 'R': II->ratio_ub = atof(argv[c+1]); II->flag |= ITEMSET_RULE_RINFRQ; f|=2;
126
+ break; case 'P': II->flag |= ITEMSET_POSI_RATIO; II->flag |= ITEMSET_IGNORE_BOUND; II->rposi_ub = atof(argv[c+1]); f|=4;
127
+ break; case 'p': II->flag |= ITEMSET_POSI_RATIO; II->flag |= ITEMSET_IGNORE_BOUND; II->rposi_lb = atof(argv[c+1]); f|=4;
128
+ break; case 'n': II->nega_lb = atof(argv[c+1]);
129
+ break; case 'N': II->nega_ub = atof(argv[c+1]);
130
+ break; case 'o': II->posi_lb = atof(argv[c+1]);
131
+ break; case 'O': II->posi_ub = atof(argv[c+1]);
132
+ break; case 's': II->setrule_lb = atof(argv[c+1]); II->flag |= ITEMSET_SET_RULE;
133
+ break; case '#': II->max_solutions = atoi(argv[c+1]);
134
+ break; case ',': II->separator = argv[c+1][0];
135
+ break; case 'Q': PP->outperm_fname = argv[c+1];
136
+ break; default: goto NEXT;
137
+ }
138
+ c += 2;
139
+ if ( argc < c+2 ){ LCMseq_error (); return; }
140
+ }
141
+
142
+ NEXT:;
143
+ if ( (f&3)==3 || (f&5)==5 || (f&6)==6 )
144
+ error ("-f, -F, -a, -A, -p, -P, -r and -R can not specified simultaneously", EXIT);
145
+ if ( f ) BITRM (II->flag, ITEMSET_PRE_FREQ);
146
+
147
+ if ( II->len_ub<INTHUGE || II->gap_ub<INTHUGE ) BITRM (PP->problem, LCMSEQ_LEFTMOST);
148
+ PP->TT.fname = argv[c];
149
+ if ( II->topk.end==0 ) II->frq_lb = (WEIGHT)atof(argv[c+1]);
150
+ if ( argc>c+2 ) PP->output_fname = argv[c+2];
151
+ }
152
+
153
+
154
+
155
+ /*******************************************************/
156
+ /* compute occurrences of all frequent items */
157
+ /* if flag!=NULL, construct each OQ[i] (derivery), o.w., compute frequency of i */
158
+ /*******************************************************/
159
+ void LCMseq_occ_delivery (PROBLEM *PP, LCMSEQ_QUE *occ, int flag){
160
+ ITEMSET *II = &PP->II;
161
+ TRSACT *TT = &PP->TT;
162
+ QUEUE_ID j;
163
+ QUEUE_INT e, m;
164
+ WEIGHT w;
165
+ LCMSEQ_ELM *u, *uu, *u_end = occ->v + (occ->t-1);
166
+ int f = TT->flag&TRSACT_NEGATIVE;
167
+ int fl = (!(PP->problem&PROBLEM_CLOSED)&&!flag) || (!(PP->problem&LCMSEQ_LEFTMOST)&&flag);
168
+
169
+ MQUE_FLOOP (*occ, u){ // loop for occurrences
170
+ m = MAX (MAX(0, u->s -II->gap_ub), u->org -(II->len_ub-1));
171
+ if ( u < u_end && u->t == (u+1)->t ) ENMAX (m, (u+1)->s);
172
+ w = TT->w[u->t];
173
+ if ( II->itemset.t == 0 ) m = 0;
174
+ if ( !fl ) BLOOP (j, u->s, m) TT->sc[TT->T.v[u->t].v[j]] = 0; // clear marks of all letters in scanning part of the current transaction
175
+ BLOOP (j, u->s, m){
176
+ e = TT->T.v[u->t].v[j]; // e:= letter
177
+ if ( fl || TT->sc[e] == 0 ){ // not leftmost, or the first appearance of the letter
178
+ TT->sc[e] = 1; // mark the letter
179
+ if ( flag ){ // if occurrence computing
180
+ uu = &((LCMSEQ_ELM *)(TT->OQ[e].v))[TT->OQ[e].t]; // insert new element to occurrence
181
+ uu->t = u->t;
182
+ uu->s = j;
183
+ uu->org = II->itemset.t? u->org: j;
184
+ TT->OQ[e].t++;
185
+ } else { // for just frequency counting
186
+ if ( TT->OQ[e].end == 0 ){ // initialize weights if this is the first insertion
187
+ QUE_INS (PP->itemcand, e);
188
+ PP->occ_w[e] = PP->occ_pw[e] = 0;
189
+ }
190
+ TT->OQ[e].end++;
191
+ PP->occ_w[e] += w;
192
+ if ( f && w>0 ) PP->occ_pw[e] += w;
193
+ }
194
+ // TT->sc[e] = !fl;
195
+ }
196
+ }
197
+ }
198
+ }
199
+
200
+
201
+ /* remove infrequent items from jump, and set active/in-active marks */
202
+ void LCMseq_rm_infreq (PROBLEM *PP){
203
+ ITEMSET *II = &PP->II;
204
+ TRSACT *TT = &PP->TT;
205
+ QUEUE_ID ii=PP->itemcand.s;
206
+ QUEUE_INT *e;
207
+
208
+ MQUE_FLOOP (PP->itemcand, e){
209
+ if ( PP->occ_pw[*e] >= II->frq_lb ){
210
+ PP->itemcand.v[ii++] = *e;
211
+ TT->sc[*e] = 0;
212
+ } else {
213
+ TT->OQ[*e].t = TT->OQ[*e].end = 0;
214
+ if ( PP->root ) TT->sc[*e] = 3; // remove infrequent items only when gap constraint is not given
215
+ }
216
+ }
217
+ PP->itemcand.t = ii;
218
+ }
219
+
220
+ /* remove merged occurrences from occ, and re-set temporary end-marks marked in each occurrence */
221
+ void LCMseq_reduce_occ (TRSACT *TT, LCMSEQ_QUE *occ, QUEUE_INT item){
222
+ LCMSEQ_ELM *u, *uu=occ->v;
223
+ MQUE_FLOOP (*occ, u){
224
+ TT->T.v[u->t].v[u->s] = item;
225
+ if ( TT->mark[u->t] == 0 ) continue;
226
+ *uu = *u;
227
+ // update positions in occ, for shrinked transactions
228
+ if ( TT->mark[u->t] > 1 ){
229
+ uu->t = TT->mark[u->t] -2;
230
+ uu->org = TT->T.v[uu->t].t + u->org - u->s; // actually, org is not used when database is shrinked
231
+ uu->s = TT->T.v[uu->t].t;
232
+ }
233
+ uu++;
234
+ }
235
+ occ->t = (VEC_ID)(uu - occ->v);
236
+ }
237
+
238
+ /***************************************************************/
239
+ /* iteration of LCMseq */
240
+ /* INPUT: occurrences of current sequence */
241
+ /*************************************************************************/
242
+ void LCMseq (PROBLEM *PP, QUEUE_INT item, LCMSEQ_QUE *occ){
243
+ ITEMSET *II = &PP->II;
244
+ TRSACT *TT = &PP->TT;
245
+ QUEUE_ID js=PP->itemcand.s, i, j;
246
+ VEC_ID new_t = TT->new_t;
247
+ int bnum = TT->buf.num, bblock = TT->buf.block_num;
248
+ int output_flag = 1;
249
+ QUEUE_INT *x, cnt=0, tt=TT->rows_org;
250
+ WEIGHT *w=NULL, *pw=NULL;
251
+ double prob = II->prob;
252
+ LCMSEQ_ELM *u, L;
253
+ QUEUE *Q = NULL;
254
+
255
+ //QUEUE_print__ (&II->itemset);
256
+ // MQUE_FLOOP (*occ, u){ printf ("(%d, %d, %d) ", u->t, u->s, u->org); } printf ("\n");
257
+
258
+ // re-computing frequency, for (document occurrence & non-leftmost)
259
+ if ( (PP->problem & PROBLEM_CLOSED) && !(PP->problem & LCMSEQ_LEFTMOST)){
260
+ II->frq = II->pfrq = 0;
261
+ MQUE_FLOOP (*occ, u){
262
+ if ( u->t != tt ){
263
+ II->frq += TT->w[u->t];
264
+ if ( TT->w[u->t] > 0 ) II->pfrq += TT->w[u->t];
265
+ }
266
+ tt = u->t;
267
+ }
268
+ }
269
+
270
+ PP->itemcand.s = PP->itemcand.t; // initilization for the re-use of queue
271
+ II->iters++;
272
+ if ( PP->problem & PROBLEM_EX_CLOSED ) PP->th = II->frq; // threshold value for for ex_maximal/ex_closed check; in the case of maximal, it is always II->frq_lb
273
+ if ( II->flag&ITEMSET_POSI_RATIO && II->pfrq!=0 ) II->frq /= (II->pfrq+II->pfrq-II->frq);
274
+
275
+ // if the itemset is empty, set frq to the original #trsactions, and compute item_frq's
276
+ LCMseq_occ_delivery (PP, occ, 0);
277
+ if ( II->itemset.t == 0 ){
278
+ if ( (II->frq = TT->total_w_org) != 0 )
279
+ FLOOP (i, 0, TT->T.clms) II->item_frq[i] = PP->occ_w[i]/TT->total_w_org;
280
+ }
281
+
282
+ II->prob = 1.0;
283
+ MQUE_FLOOP (II->itemset, x) II->prob *= II->item_frq[*x];
284
+
285
+ // extending maximality/closedness check
286
+ if ( PP->problem & (PROBLEM_EX_MAXIMAL+PROBLEM_EX_CLOSED) )
287
+ MQUE_FLOOP (PP->itemcand, x) if ( PP->occ_w[*x] >= PP->th ) output_flag = 0;
288
+
289
+ if ( output_flag )
290
+ ITEMSET_check_all_rule (II, PP->occ_w, (QUEUE *)occ, &PP->itemcand, TT->total_pw_org, 0);
291
+ MQUE_FLOOP (PP->itemcand, x) if ( PP->occ_pw[*x] >= II->frq_lb ) cnt++;
292
+
293
+ if ( cnt == 0 || II->itemset.t >= II->ub ) goto END;
294
+ /////// database reduction ///////////
295
+ if ( 0&& PP->dir && cnt>10 && occ->t>2 && II->itemset.t>0 ){
296
+ // put end-mark to each occurrence transaction
297
+ //printf ("AAA: "); MQUE_FLOOP (*occ, u){ printf ("(%d, %d, %d) ", u->t, u->s, u->org); } printf ("\n");
298
+ Q = &TT->OQ[TT->T.clms];
299
+ Q->s = Q->t = 0;
300
+ MQUE_FLOOP (*occ, u){
301
+ TT->T.v[u->t].v[u->s] = TT->T.clms;
302
+ QUE_INS (*Q, u->t);
303
+ }
304
+ //printf ("========\n");
305
+ //TRSACT_print (TT, Q, NULL);
306
+ //printf ("BBB: "); MQUE_FLOOP (*occ, u){ printf ("(%d, %d, %d) ", u->t, u->s, u->org); } printf ("\n");
307
+ TRSACT_find_same (TT, Q, TT->T.clms);
308
+ TRSACT_merge_trsact (TT, Q, TT->T.clms);
309
+ // erase end-mark of each occurrence transaction, and remove unified occurrences
310
+ //MQUE_FLOOP (*occ, u){ printf ("(%d, %d, %d) ", u->t, u->s, u->org); } printf ("\n");
311
+ LCMseq_reduce_occ (TT, occ, item);
312
+ TRSACT_print (TT, Q, NULL);
313
+ printf ("-------\n");
314
+ //MQUE_FLOOP (*occ, u){ printf ("(%d, %d, %d) ", u->t, u->s, u->org); } printf ("\n");
315
+ }
316
+
317
+ ///////////// deliverly /////////////
318
+ LCMseq_occ_delivery (PP, occ, 1);
319
+ LCMseq_rm_infreq (PP);
320
+ cnt = QUEUE_LENGTH_ (PP->itemcand);
321
+ QUEUE_occ_dup (&PP->itemcand, &Q, TT->OQ, &w, PP->occ_w, &pw, PP->occ_pw, sizeof(LCMSEQ_ELM));
322
+ if ( Q == NULL ) goto END;
323
+ MQUE_FLOOP (PP->itemcand, x) TT->OQ[*x].end = TT->OQ[*x].t = 0;
324
+ PP->itemcand.t = PP->itemcand.s;
325
+
326
+ /************ recursive calls ***************/
327
+ FLOOP (i, 0, cnt){
328
+ II->frq = w[i];
329
+ II->pfrq = pw[i];
330
+ if ( II->flag & ITEMSET_SET_RULE ){
331
+ II->set_weight[II->itemset.t] = II->frq;
332
+ II->set_occ[II->itemset.t] = &Q[i];
333
+ }
334
+ QUE_INS (II->itemset, Q[i].end);
335
+ // prob2 = II->prob;
336
+ // II->prob *= frqs[e+TT->item_max*2];
337
+ if ( II->itemset.t == 0 ){ // reverse occurrence order for the first iteration (for the process of overlapping occurrences)
338
+ u = (LCMSEQ_ELM *)TT->OQ[i].v;
339
+ FLOOP (j, 0, TT->OQ[i].t/2){
340
+ L = u[j]; u[j] = u[TT->OQ[i].t-j-1]; u[TT->OQ[i].t-j-1] = L;
341
+ }
342
+ }
343
+ LCMseq (PP, Q[i].end, (LCMSEQ_QUE *)&Q[i]);
344
+ // II->prob = prob2;
345
+ II->itemset.t--;
346
+ }
347
+ free2 (Q);
348
+ TT->new_t = new_t;
349
+ TT->buf.num = bnum, TT->buf.block_num = bblock;
350
+
351
+ END:;
352
+ MQUE_FLOOP (PP->itemcand, x) TT->OQ[*x].end = TT->OQ[*x].t = 0;
353
+ PP->itemcand.t = PP->itemcand.s;
354
+ PP->itemcand.s = js;
355
+ II->prob = prob;
356
+ }
357
+
358
+ /*************************************************************************/
359
+ /* initialization of LCMseq ver. 2 */
360
+ /*************************************************************************/
361
+ void LCMseq_init (PROBLEM *PP, LCMSEQ_QUE *occ){
362
+ ITEMSET *II = &PP->II;
363
+ TRSACT *TT = &PP->TT;
364
+ VEC_ID i;
365
+ QUEUE_ID j;
366
+ QUEUE_INT *x;
367
+ LCMSEQ_ELM L;
368
+
369
+ II->X = TT;
370
+ II->frq = TT->total_w_org; II->pfrq = TT->total_pw_org;
371
+ II->flag |= ITEMSET_USE_ORG +ITEMSET_ITEMFRQ;
372
+ PROBLEM_alloc (PP, TT->T.clms, TT->T.t, TT->row_max, TT->perm, PROBLEM_ITEMCAND +((TT->flag&TRSACT_NEGATIVE)?PROBLEM_OCC_PW: PROBLEM_OCC_W));
373
+ malloc2 (occ->v, TT->T.t, EXIT);
374
+ occ->end = TT->clm_max; occ->s = occ->t = 0;
375
+ TT->perm = NULL;
376
+ if ( II->perm && RANGE(0, II->target, II->item_max) ) II->target = II->perm[II->target];
377
+
378
+ if ( !(TT->sc) ) calloc2 (TT->sc, TT->T.clms+2, return);
379
+ free2 (II->itemflag); II->itemflag = TT->sc; // II->itemflag and TT->sc shares the same memory
380
+ // TT->occ_unit = sizeof(QUEUE_INT)*3; // one occ is composed of 3 intgers
381
+
382
+ // make occurrence & reverse each transaction
383
+ FLOOP (i, 0, TT->T.t){
384
+ FLOOP (j, 0, TT->T.v[i].t/2)
385
+ SWAP_QUEUE_INT (TT->T.v[i].v[j], TT->T.v[i].v[TT->T.v[i].t-1-j]);
386
+ }
387
+ if ( II->len_ub >= INTHUGE ) II->len_ub = TT->row_max;
388
+ if ( II->gap_ub >= INTHUGE ) II->gap_ub = TT->row_max;
389
+ II->total_weight = TT->total_w_org;
390
+ i=0;MQUE_FLOOP (TT->OQ[TT->T.clms], x){
391
+ L.t = *x;
392
+ L.s = L.org = TT->T.v[*x].t; // !! org is originally -1
393
+ QUE_INS (*occ, L);
394
+ }
395
+ PP->dir = (PP->problem&LCMSEQ_LEFTMOST) && II->len_ub>=TT->row_max && !(II->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT)); // flag for shrink or not
396
+ PP->root = II->gap_ub>=TT->row_max && II->len_ub>=TT->row_max; // flag for removing infrequent item or not
397
+ PP->th = II->frq_lb;
398
+ }
399
+
400
+ /*************************************************************************/
401
+ /* main of LCMseq ver. 2 */
402
+ /*************************************************************************/
403
+ int LCMseq_main (int argc, char *argv[]){
404
+ PROBLEM PP;
405
+ ITEMSET *II = &PP.II;
406
+ TRSACT *TT = &PP.TT;
407
+ LCMSEQ_QUE occ;
408
+ occ.v = NULL;
409
+
410
+ PROBLEM_init (&PP);
411
+ LCMseq_read_param (argc, argv, &PP);
412
+ if ( ERROR_MES ) return (1);
413
+
414
+ TT->occ_unit = sizeof(LCMSEQ_ELM);
415
+ TT->flag |= TRSACT_MAKE_NEW +TRSACT_ALLOC_OCC + ((II->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT))?0: (TRSACT_SHRINK+TRSACT_1ST_SHRINK)) ;
416
+ TT->w_lb = (((II->flag&(ITEMSET_TRSACT_ID+ITEMSET_MULTI_OCC_PRINT)) && (PP.problem & PROBLEM_FREQSET)) || (II->flag&ITEMSET_RULE) || II->gap_ub<INTHUGE || II->len_ub<INTHUGE )? -WEIGHTHUGE: II->frq_lb;
417
+ PROBLEM_load (&PP);
418
+
419
+ if ( !ERROR_MES ){
420
+ LCMseq_init (&PP, &occ);
421
+ if ( !ERROR_MES ){
422
+ LCMseq (&PP, TT->T.clms, &occ);
423
+ ITEMSET_last_output (II);
424
+ }
425
+ }
426
+
427
+ free2 (occ.v);
428
+ TT->sc = NULL;
429
+ PROBLEM_end (&PP);
430
+ return (ERROR_MES?1:0);
431
+ }
432
+
433
+ /*******************************************************************************/
434
+ #ifndef _NO_MAIN_
435
+ #define _NO_MAIN_
436
+ int main (int argc, char *argv[]){
437
+ return (LCMseq_main (argc, argv));
438
+ }
439
+ #endif
440
+ /*******************************************************************************/
441
+
442
+ #endif
443
+
444
+
445
+
446
+