nysol-take 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. checksums.yaml +7 -0
  2. data/bin/mbiclique.rb +317 -0
  3. data/bin/mbipolish.rb +362 -0
  4. data/bin/mccomp.rb +235 -0
  5. data/bin/mclique.rb +295 -0
  6. data/bin/mclique2g.rb +105 -0
  7. data/bin/mcliqueInfo.rb +203 -0
  8. data/bin/mfriends.rb +202 -0
  9. data/bin/mgdiff.rb +252 -0
  10. data/bin/mhifriend.rb +456 -0
  11. data/bin/mhipolish.rb +465 -0
  12. data/bin/mitemset.rb +168 -0
  13. data/bin/mpal.rb +410 -0
  14. data/bin/mpolishing.rb +399 -0
  15. data/bin/msequence.rb +165 -0
  16. data/bin/mtra2g.rb +476 -0
  17. data/bin/mtra2gc.rb +360 -0
  18. data/ext/grhfilrun/extconf.rb +12 -0
  19. data/ext/grhfilrun/grhfilrun.c +85 -0
  20. data/ext/grhfilrun/src/_sspc.c +358 -0
  21. data/ext/grhfilrun/src/aheap.c +545 -0
  22. data/ext/grhfilrun/src/aheap.h +251 -0
  23. data/ext/grhfilrun/src/base.c +92 -0
  24. data/ext/grhfilrun/src/base.h +59 -0
  25. data/ext/grhfilrun/src/fstar.c +497 -0
  26. data/ext/grhfilrun/src/fstar.h +80 -0
  27. data/ext/grhfilrun/src/grhfil.c +214 -0
  28. data/ext/grhfilrun/src/itemset.c +713 -0
  29. data/ext/grhfilrun/src/itemset.h +170 -0
  30. data/ext/grhfilrun/src/problem.c +415 -0
  31. data/ext/grhfilrun/src/problem.h +179 -0
  32. data/ext/grhfilrun/src/queue.c +533 -0
  33. data/ext/grhfilrun/src/queue.h +182 -0
  34. data/ext/grhfilrun/src/sample.c +19 -0
  35. data/ext/grhfilrun/src/sspc.c +597 -0
  36. data/ext/grhfilrun/src/sspc2.c +491 -0
  37. data/ext/grhfilrun/src/stdlib2.c +1482 -0
  38. data/ext/grhfilrun/src/stdlib2.h +892 -0
  39. data/ext/grhfilrun/src/trsact.c +817 -0
  40. data/ext/grhfilrun/src/trsact.h +160 -0
  41. data/ext/grhfilrun/src/vec.c +745 -0
  42. data/ext/grhfilrun/src/vec.h +172 -0
  43. data/ext/lcmrun/extconf.rb +20 -0
  44. data/ext/lcmrun/lcmrun.cpp +99 -0
  45. data/ext/lcmrun/src/aheap.c +216 -0
  46. data/ext/lcmrun/src/aheap.h +111 -0
  47. data/ext/lcmrun/src/base.c +92 -0
  48. data/ext/lcmrun/src/base.h +59 -0
  49. data/ext/lcmrun/src/itemset.c +496 -0
  50. data/ext/lcmrun/src/itemset.h +157 -0
  51. data/ext/lcmrun/src/lcm.c +427 -0
  52. data/ext/lcmrun/src/problem.c +349 -0
  53. data/ext/lcmrun/src/problem.h +177 -0
  54. data/ext/lcmrun/src/queue.c +528 -0
  55. data/ext/lcmrun/src/queue.h +176 -0
  56. data/ext/lcmrun/src/sgraph.c +359 -0
  57. data/ext/lcmrun/src/sgraph.h +173 -0
  58. data/ext/lcmrun/src/stdlib2.c +1282 -0
  59. data/ext/lcmrun/src/stdlib2.h +823 -0
  60. data/ext/lcmrun/src/trsact.c +747 -0
  61. data/ext/lcmrun/src/trsact.h +159 -0
  62. data/ext/lcmrun/src/vec.c +731 -0
  63. data/ext/lcmrun/src/vec.h +171 -0
  64. data/ext/lcmseq0run/extconf.rb +20 -0
  65. data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
  66. data/ext/lcmseq0run/src/aheap.c +216 -0
  67. data/ext/lcmseq0run/src/aheap.h +111 -0
  68. data/ext/lcmseq0run/src/base.c +92 -0
  69. data/ext/lcmseq0run/src/base.h +59 -0
  70. data/ext/lcmseq0run/src/itemset.c +518 -0
  71. data/ext/lcmseq0run/src/itemset.h +157 -0
  72. data/ext/lcmseq0run/src/itemset_zero.c +522 -0
  73. data/ext/lcmseq0run/src/lcm_seq.c +446 -0
  74. data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
  75. data/ext/lcmseq0run/src/problem.c +439 -0
  76. data/ext/lcmseq0run/src/problem.h +179 -0
  77. data/ext/lcmseq0run/src/problem_zero.c +439 -0
  78. data/ext/lcmseq0run/src/queue.c +533 -0
  79. data/ext/lcmseq0run/src/queue.h +182 -0
  80. data/ext/lcmseq0run/src/stdlib2.c +1350 -0
  81. data/ext/lcmseq0run/src/stdlib2.h +864 -0
  82. data/ext/lcmseq0run/src/trsact.c +747 -0
  83. data/ext/lcmseq0run/src/trsact.h +159 -0
  84. data/ext/lcmseq0run/src/vec.c +779 -0
  85. data/ext/lcmseq0run/src/vec.h +172 -0
  86. data/ext/lcmseqrun/extconf.rb +20 -0
  87. data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
  88. data/ext/lcmseqrun/src/aheap.c +216 -0
  89. data/ext/lcmseqrun/src/aheap.h +111 -0
  90. data/ext/lcmseqrun/src/base.c +92 -0
  91. data/ext/lcmseqrun/src/base.h +59 -0
  92. data/ext/lcmseqrun/src/itemset.c +518 -0
  93. data/ext/lcmseqrun/src/itemset.h +157 -0
  94. data/ext/lcmseqrun/src/itemset_zero.c +522 -0
  95. data/ext/lcmseqrun/src/lcm_seq.c +447 -0
  96. data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
  97. data/ext/lcmseqrun/src/problem.c +439 -0
  98. data/ext/lcmseqrun/src/problem.h +179 -0
  99. data/ext/lcmseqrun/src/problem_zero.c +439 -0
  100. data/ext/lcmseqrun/src/queue.c +533 -0
  101. data/ext/lcmseqrun/src/queue.h +182 -0
  102. data/ext/lcmseqrun/src/stdlib2.c +1350 -0
  103. data/ext/lcmseqrun/src/stdlib2.h +864 -0
  104. data/ext/lcmseqrun/src/trsact.c +747 -0
  105. data/ext/lcmseqrun/src/trsact.h +159 -0
  106. data/ext/lcmseqrun/src/vec.c +779 -0
  107. data/ext/lcmseqrun/src/vec.h +172 -0
  108. data/ext/lcmtransrun/extconf.rb +18 -0
  109. data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
  110. data/ext/macerun/extconf.rb +20 -0
  111. data/ext/macerun/macerun.cpp +57 -0
  112. data/ext/macerun/src/aheap.c +217 -0
  113. data/ext/macerun/src/aheap.h +112 -0
  114. data/ext/macerun/src/itemset.c +491 -0
  115. data/ext/macerun/src/itemset.h +158 -0
  116. data/ext/macerun/src/mace.c +503 -0
  117. data/ext/macerun/src/problem.c +346 -0
  118. data/ext/macerun/src/problem.h +174 -0
  119. data/ext/macerun/src/queue.c +529 -0
  120. data/ext/macerun/src/queue.h +177 -0
  121. data/ext/macerun/src/sgraph.c +360 -0
  122. data/ext/macerun/src/sgraph.h +174 -0
  123. data/ext/macerun/src/stdlib2.c +993 -0
  124. data/ext/macerun/src/stdlib2.h +811 -0
  125. data/ext/macerun/src/vec.c +634 -0
  126. data/ext/macerun/src/vec.h +170 -0
  127. data/ext/sspcrun/extconf.rb +20 -0
  128. data/ext/sspcrun/src/_sspc.c +358 -0
  129. data/ext/sspcrun/src/aheap.c +545 -0
  130. data/ext/sspcrun/src/aheap.h +251 -0
  131. data/ext/sspcrun/src/base.c +92 -0
  132. data/ext/sspcrun/src/base.h +59 -0
  133. data/ext/sspcrun/src/fstar.c +496 -0
  134. data/ext/sspcrun/src/fstar.h +80 -0
  135. data/ext/sspcrun/src/grhfil.c +213 -0
  136. data/ext/sspcrun/src/itemset.c +713 -0
  137. data/ext/sspcrun/src/itemset.h +170 -0
  138. data/ext/sspcrun/src/problem.c +415 -0
  139. data/ext/sspcrun/src/problem.h +179 -0
  140. data/ext/sspcrun/src/queue.c +533 -0
  141. data/ext/sspcrun/src/queue.h +182 -0
  142. data/ext/sspcrun/src/sample.c +19 -0
  143. data/ext/sspcrun/src/sspc.c +598 -0
  144. data/ext/sspcrun/src/sspc2.c +491 -0
  145. data/ext/sspcrun/src/stdlib2.c +1482 -0
  146. data/ext/sspcrun/src/stdlib2.h +892 -0
  147. data/ext/sspcrun/src/trsact.c +817 -0
  148. data/ext/sspcrun/src/trsact.h +160 -0
  149. data/ext/sspcrun/src/vec.c +745 -0
  150. data/ext/sspcrun/src/vec.h +172 -0
  151. data/ext/sspcrun/sspcrun.cpp +54 -0
  152. data/lib/nysol/enumLcmEp.rb +338 -0
  153. data/lib/nysol/enumLcmEsp.rb +284 -0
  154. data/lib/nysol/enumLcmIs.rb +275 -0
  155. data/lib/nysol/enumLcmSeq.rb +143 -0
  156. data/lib/nysol/items.rb +201 -0
  157. data/lib/nysol/seqDB.rb +256 -0
  158. data/lib/nysol/take.rb +39 -0
  159. data/lib/nysol/taxonomy.rb +113 -0
  160. data/lib/nysol/traDB.rb +257 -0
  161. metadata +239 -0
@@ -0,0 +1,817 @@
1
+ /* QUEUE based Transaction library, including database reduction.
2
+ 25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, please
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about the codes for the users. */
11
+
12
+ #ifndef _trsact_c_
13
+ #define _trsact_c_
14
+
15
+ #include"trsact.h"
16
+ #include"base.c"
17
+ #include"vec.c"
18
+
19
+ /***********************************/
20
+ /* print transactions */
21
+ /***********************************/
22
+ void TRSACT_print (TRSACT *T, QUEUE *occ, PERM *p){
23
+ VEC_ID i, t;
24
+ QUEUE_ID j;
25
+ QUEUE_INT e;
26
+ FLOOP (i, 0, occ? occ->t: T->T.t){
27
+ t = occ? *((QUEUE_INT *)(&(((char *)(occ->v))[i*T->occ_unit]))): i;
28
+ if ( occ ) printf (QUEUE_INTF "::: ", t);
29
+ for (j=0; j<T->T.v[t].t ; j++){
30
+ e = T->T.v[t].v[j];
31
+ printf (QUEUE_INTF, p? p[e]: e);
32
+ if ( T->T.w ) printf ("(" WEIGHTF ")", T->T.w[t][j]);
33
+ printf (",");
34
+ }
35
+ if ( T->w ) printf (" :" WEIGHTF " ", T->w[t]);
36
+ printf (" (" QUEUE_INTF ")\n", T->T.v[t].end);
37
+ }
38
+ }
39
+
40
+ /* print the profiles of the transaction database */
41
+ void TRSACT_prop_print (TRSACT *T){
42
+ if ( !(T->flag & SHOW_MESSAGE) ) return;
43
+ print_err ("trsact: %s", T->fname);
44
+ // print_fname (" ,2nd-trsact %s (from ID %d)", T->fname2, T->end1);
45
+ print_err (" ,#transactions %d ,#items %d ,size %zd", T->rows_org, T->clms_org, T->eles_org);
46
+ print_err (" extracted database: #transactions %d ,#items %d ,size %zd", T->T.t, T->T.clms, T->T.eles);
47
+ print_fname (" ,weightfile %s", T->wfname);
48
+ print_fname (" ,2nd-weightfile %s", T->wfname2);
49
+ print_fname (" ,itemweightfile %s", T->item_wfname);
50
+ print_fname (" ,2nd-itemweightfile %s", T->item_wfname2);
51
+ print_fname (" ,item-order-file %s", T->pfname);
52
+ print_err ("\n");
53
+ }
54
+
55
+
56
+ /* initialization of structure TRSACT */
57
+ void TRSACT_init (TRSACT *T){
58
+ T->type = TYPE_TRSACT;
59
+ T->fname = T->fname2 = T->wfname = T->wfname2 = T->item_wfname = T->item_wfname2 = T->pfname =NULL;
60
+ T->flag = T->flag2 = 0;
61
+ T->T = INIT_SETFAMILY;
62
+ T->clms_org = T->clm_max = T->clms_end = 0;
63
+ T->rows_org = T->row_max = T->end1 = T->sep = 0;
64
+ T->perm = NULL;
65
+ T->trperm = NULL;
66
+ T->w = T->pw = NULL;
67
+
68
+ T->clm_lb = 0;
69
+ T->clm_ub = VEC_ID_END;
70
+ T->row_lb = 0;
71
+ T->row_ub = QUEUE_IDHUGE;
72
+ T->w_lb = -WEIGHTHUGE; T->w_ub = WEIGHTHUGE;
73
+
74
+ T->eles_org = 0;
75
+ T->total_w = T->total_pw = T->total_w_org = T->total_pw_org =0;
76
+
77
+ T->jump = INIT_QUEUE;
78
+ T->str_num = 0;
79
+ T->head = T->strID = NULL;
80
+
81
+ T->th = 1;
82
+ T->mark = NULL;
83
+ T->shift = NULL;
84
+ T->occ_unit = sizeof(QUEUE_INT);
85
+ T->OQ = NULL;
86
+ T->sc = NULL;
87
+
88
+ T->new_t = 0;
89
+ T->buf = INIT_BASE;
90
+ T->wbuf = INIT_BASE;
91
+ }
92
+
93
+ /**************************************************************/
94
+ void TRSACT_end (TRSACT *T){
95
+ if ( T->OQ ){ free2 (T->OQ->v ); free2 (T->OQ[T->T.clms].v); }
96
+ free2 (T->T.w);
97
+ SETFAMILY_end (&T->T);
98
+ if ( T->w != T->pw ) free2 (T->pw);
99
+ mfree (T->w, T->perm, T->trperm);
100
+ mfree (T->mark, T->shift, T->sc, T->OQ, T->head, T->strID);
101
+ QUEUE_end (&T->jump);
102
+ BASE_end (&T->buf);
103
+ BASE_end (&T->wbuf);
104
+ TRSACT_init (T);
105
+ }
106
+
107
+ #ifndef TRSACT_MAXNUM
108
+ #define TRSACT_MAXNUM 20000000LL
109
+ #endif
110
+
111
+ /*****************************************/
112
+ /* scan file "fp" with weight file wfp and count #items, #transactions in the file. */
113
+ /* count weight only if wfp!=NULL */
114
+ /* T->rows_org, clms_org, eles_org := #items, #transactions, #all items */
115
+ /* ignore the transactions of size not in range T->clm_lb - clm_ub */
116
+ /* T->total_w, total_pw := sum of (positive) weights of transactions */
117
+ /* C->clmt[i],C->cw[i] := the number/(sum of weights) of transactions including i */
118
+ /****************************************/
119
+ void TRSACT_file_count (TRSACT *T, FILE_COUNT *C, FILE2 *fp, char *wf){
120
+ QUEUE_INT i, item, kk=0, k, jump_end=0;
121
+ WEIGHT w, s;
122
+ VEC_ID *jump=NULL;
123
+ FILE2 wfp = INIT_FILE2;
124
+ LONG jj;
125
+
126
+ if ( wf ){
127
+ FILE2_open (wfp, wf, "r", goto ERR);
128
+ ARY_SCAN (kk, WEIGHT, wfp, 1);
129
+ kk += T->rows_org;
130
+ realloc2 (C->rw, kk+1, goto ERR);
131
+ FILE2_reset (&wfp);
132
+ ARY_READ (C->rw, double, kk, wfp);
133
+ ARY_MIN (w, i, C->rw, 0, kk);
134
+ if ( w<0 ) T->flag2 |= TRSACT_NEGATIVE;
135
+ FILE2_close (&wfp);
136
+ }
137
+ do {
138
+ s=0;
139
+ k=0;
140
+ w = wf? (T->rows_org<kk? C->rw[T->rows_org]: TRSACT_DEFAULT_WEIGHT): 1;
141
+ do {
142
+ jj = FILE2_read_int (fp);
143
+ item = (QUEUE_INT)jj;
144
+ if ( (FILE_err&4)==0 && jj<TRSACT_MAXNUM && jj>=0 ){
145
+ ENMAX (T->clms_org, item+1); // update #items
146
+ reallocx (jump, jump_end, k, 0, goto ERR);
147
+ jump[k] = item;
148
+ k++;
149
+ s += wf? (item<kk? MAX(C->rw[item],0): TRSACT_DEFAULT_WEIGHT): 1;
150
+
151
+ // count/weight-sum for the transpose mode
152
+ reallocx (C->clmt, C->clm_end, item, 0, goto ERR);
153
+ C->clmt[item]++;
154
+ if ( !(T->flag&LOAD_TPOSE) ){
155
+ reallocx (C->cw, C->cw_end, item, 0, goto ERR);
156
+ C->cw[item] += MAX(w,0); // sum up positive weights
157
+ }
158
+ }
159
+ } while ( (FILE_err&3)==0);
160
+
161
+ // count/weight-sum for the transpose mode
162
+ reallocx (C->rowt, C->row_end, T->rows_org, 0, goto ERR);
163
+ C->rowt[T->rows_org] = k;
164
+ if ( T->flag&LOAD_TPOSE ){
165
+ reallocx (C->cw, C->cw_end, T->rows_org, 0, goto ERR);
166
+ C->cw[T->rows_org] = s; // sum up positive weights
167
+ }
168
+ if ( k==0 && FILE_err&2 ) break;
169
+ T->rows_org++; // increase #transaction
170
+
171
+ if ( !wf ) s = k; // un-weighted case; weighted sum is #included-items
172
+ if ( k==0 ){
173
+ T->str_num++; // increase #streams if empty transaction is read
174
+ } else {
175
+ T->eles_org += k;
176
+ if ( (!(T->flag&LOAD_TPOSE) && !RANGE (T->row_lb, k, T->row_ub))
177
+ || ((T->flag&LOAD_TPOSE) && (!RANGE(T->w_lb, s, T->w_ub) || !RANGE (T->clm_lb, k, T->clm_ub)) ) ) FLOOP (i, 0, k) C->clmt[jump[i]]--;
178
+ }
179
+ } while ( (FILE_err&2)==0);
180
+ free2 (jump);
181
+ // swap the variables in transpose mode
182
+ if ( C->rw == NULL ){ T->total_w_org = T->total_pw_org = T->rows_org; return; }
183
+ C->clm_btm = MIN(kk, T->rows_org);
184
+ reallocx (C->rw, kk, T->rows_org, TRSACT_DEFAULT_WEIGHT, goto ERR);
185
+ FLOOP (k, 0, T->rows_org){
186
+ T->total_w_org += C->rw[k];
187
+ T->total_pw_org += MAX(C->rw[k],0);
188
+ }
189
+ return;
190
+ ERR:;
191
+ FILE2_close (&wfp);
192
+ mfree (C->rw, C->cw, C->clmt, C->rowt, jump);
193
+ EXIT;
194
+ }
195
+
196
+ /* allocate memory, set permutation, and free C.clmt,rowt,rw,cw */
197
+ int TRSACT_alloc (TRSACT *T, FILE_COUNT *C){
198
+ VEC_ID t, tt=0, ttt, ttt_max, h, flag, org;
199
+ FILE_COUNT_INT *ct;
200
+ size_t s=0;
201
+ PERM *q, *p=NULL;
202
+ char *buf;
203
+
204
+ // swap variables in the case of transpose
205
+ if ( T->flag & LOAD_TPOSE ){
206
+ common_QUEUE_INT = T->clms_org; T->clms_org = (QUEUE_INT)T->rows_org; T->rows_org = (VEC_ID)common_QUEUE_INT;
207
+ SWAP_PNT (C->clmt, C->rowt);
208
+ }
209
+ ttt_max = ttt = T->clms_org;
210
+
211
+ if ( T->flag2&TRSACT_SHRINK ) T->flag |= LOAD_DBLBUF;
212
+ // count valid columns/elements
213
+
214
+ if ( T->pfname && !(T->flag2&TRSACT_WRITE_PERM) ){
215
+ ARY_LOAD (p, QUEUE_INT, ttt, T->pfname, 1, EXIT0);
216
+ ARY_MAX (ttt_max, tt, p, 0, ttt);
217
+ // ENMAX (T->clms_org, ttt_max+1);
218
+ T->T.clms = ttt_max+1;
219
+ } else {
220
+ if ( T->flag&LOAD_PERM ){
221
+ if ( T->flag2&TRSACT_FRQSORT )
222
+ p = qsort_perm_WEIGHT (C->cw, T->clms_org, (T->flag&LOAD_INCSORT)?1:-1);
223
+ else p = qsort_perm_FILE_COUNT_INT (C->clmt, T->clms_org, (T->flag&LOAD_INCSORT)?1:-1);
224
+ }
225
+ if ( T->pfname ) ARY_WRITE (T->pfname, p, T->clms_org, PERMF " ", EXIT0);
226
+ }
227
+ T->clms_end = MAX (T->clms_org, T->T.clms);
228
+
229
+ malloc2 (C->cperm, T->clms_org+1, EXIT0);
230
+ ARY_FILL (C->cperm, 0, T->clms_org, T->clms_org+1);
231
+ FLOOP (t, 0, ttt){
232
+ tt = p? p[t]: t;
233
+ if ( tt >= T->clms_org ) continue;
234
+ if ( RANGE(T->w_lb, C->cw[tt], T->w_ub) && RANGE (T->clm_lb, C->clmt[tt], T->clm_ub)){
235
+ s += C->clmt[tt];
236
+ C->cperm[tt] = (T->pfname && !(T->flag2&TRSACT_WRITE_PERM))? t: T->T.clms++;
237
+ } else C->cperm[tt] = T->clms_end+1;
238
+ }
239
+ free2 (p);
240
+ if ( T->T.clms == 0 ) error ("there is no frequent item", return 0);
241
+
242
+ // count valid rows/elements
243
+ if ( T->flag&(LOAD_SIZSORT+LOAD_WSORT) ){
244
+ if ( T->flag&LOAD_WSORT && C->rw )
245
+ p = qsort_perm_WEIGHT (C->rw, T->rows_org, (T->flag&LOAD_DECROWSORT)?-1:1);
246
+ else p = qsort_perm_FILE_COUNT_INT (C->rowt, T->rows_org, (T->flag&LOAD_DECROWSORT)?-1:1);
247
+ }
248
+ malloc2 (C->rperm, T->rows_org, EXIT0);
249
+ FLOOP (t, 0, T->rows_org){ // compute #elements according to rowt, and set rperm
250
+ tt = p? p[t]: t;
251
+ if ( RANGE (T->row_lb, C->rowt[tt], T->row_ub) ){
252
+ C->rperm[tt] = T->T.t++;
253
+ T->T.eles += C->rowt[t];
254
+ } else C->rperm[tt] = T->rows_org+1;
255
+ }
256
+
257
+ free2 (p); free2 (C->cw);
258
+ flag = (T->T.eles > s && !(T->flag & LOAD_TPOSE) );
259
+ if ( flag ) T->T.eles = s;
260
+ T->T.ele_end = T->T.eles;
261
+
262
+ T->T.end = T->T.t * ((T->flag&LOAD_DBLBUF)? 2: 1)+1;
263
+ malloc2 (T->w, T->T.end, EXIT0);
264
+ if ( TRSACT_NEGATIVE ) malloc2 (T->pw, T->T.end, EXIT0);
265
+ else T->pw = NULL;
266
+ malloc2 (T->trperm, T->T.t, EXIT0);
267
+ malloc2 (T->T.v, T->T.end, EXIT0);
268
+ malloc2 (buf, (T->T.eles+T->T.end+1)*T->T.unit, EXIT0);
269
+ T->T.buf = (QUEUE_INT *)buf;
270
+ calloc2 (T->perm, T->T.clms+1, EXIT0);
271
+ QUEUE_alloc (&T->jump, T->T.clms+1);
272
+ BASE_alloc (&T->buf, sizeof(QUEUE_INT), MAX((int)T->row_max*4, (int)(T->T.eles+1)/10+T->T.clms+100));
273
+ BASE_alloc (&T->wbuf, sizeof(WEIGHT), MAX((int)T->row_max*4, (int)(T->T.eles+1)/10+T->T.clms+100));
274
+ if ( T->flag2&TRSACT_SHRINK ){
275
+ malloc2 (T->mark, T->T.end, EXIT0);
276
+ malloc2 (T->shift, T->T.end, EXIT0);
277
+ calloc2 (T->sc, T->T.clms, EXIT0);
278
+ }
279
+ if ( T->flag2&TRSACT_MULTI_STREAM ){
280
+ malloc2 (T->head, T->str_num+2, EXIT0);
281
+ malloc2 (T->strID, (T->flag&LOAD_TPOSE)?T->T.clms:T->T.end, EXIT0);
282
+ }
283
+ if ( T->T.w==NULL && (T->flag2&TRSACT_UNION)) calloc2 (T->T.w, T->T.end, EXIT0);
284
+ if ( T->T.w==NULL && T->item_wfname ) SETFAMILY_alloc_weight (&T->T, C->rowt);
285
+
286
+ if ( ERROR_MES ) return(0);
287
+
288
+ // set variables w.r.t rows
289
+ tt=0; FLOOP (t, 0, T->rows_org){
290
+ if ( C->rperm[t] <= T->rows_org ){
291
+ T->T.v[tt] = INIT_QUEUE;
292
+ T->trperm[tt] = t;
293
+ C->rperm[t] = tt;
294
+ T->w[tt] = C->rw? C->rw[t]: 1;
295
+ if ( T->pw ) T->pw[tt] = MAX (T->w[tt], 0);
296
+ if ( !flag ){
297
+ T->T.v[tt].v = (QUEUE_INT *)buf;
298
+ buf += (C->rowt[t]+1)*T->T.unit;
299
+ }
300
+ tt++;
301
+ }
302
+ }
303
+ free2 (C->rw);
304
+ // make the inverse perm of items
305
+ FLOOP (t, 0, T->clms_org)
306
+ if ( C->cperm[t] <= T->clms_end ) T->perm[C->cperm[t]] = t;
307
+
308
+ // set head of each stream, and stream ID of each transaction
309
+ if ( T->flag2&TRSACT_MULTI_STREAM ){
310
+ malloc2 (T->head, T->str_num+2, EXIT0);
311
+ malloc2 (T->strID, (T->flag&LOAD_TPOSE)?T->T.clms:T->T.end, EXIT0);
312
+ }
313
+ org = (T->flag&LOAD_TPOSE)? T->clms_org: T->rows_org;
314
+ q = (T->flag&LOAD_TPOSE)? C->cperm: C->rperm;
315
+ ct = (T->flag&LOAD_TPOSE)? C->clmt: C->rowt;
316
+ h=1; tt=0; FLOOP (t, 0, org){
317
+ if ( q[t] <= org ){
318
+ if ( t == T->end1 && T->sep==0 ) T->sep = tt;
319
+ if ( t == T->sep && T->sep>0 ) T->sep = tt;
320
+ if ( T->strID ) T->strID[tt] = h;
321
+ tt++;
322
+ }
323
+ if ( T->head && ct[t]==0 ) T->head[h++] = tt+1;
324
+ }
325
+
326
+ T->new_t = T->T.t;
327
+ return ( flag );
328
+ }
329
+
330
+
331
+ void TRSACT_file_read_ (TRSACT *T, FILE2 *fp, FILE_COUNT *C, VEC_ID *t, int flag, char *iwfname){
332
+ QUEUE_INT item;
333
+ QUEUE_ID tt;
334
+ LONG jj;
335
+ FILE2 fp2;
336
+ WEIGHT w=0;
337
+ int fc=0, f, ff;
338
+
339
+ FILE2_reset (fp);
340
+ if ( iwfname ) FILE2_open (fp2, iwfname, "r", EXIT);
341
+ do {
342
+ if ( flag ) T->T.v[*t].v = *t? T->T.v[*t-1].v + T->T.v[*t-1].t +1: T->T.buf;
343
+ do {
344
+ jj = FILE2_read_int (fp);
345
+ item = (QUEUE_INT)jj;
346
+ if ( (FILE_err&4)==0 && jj<TRSACT_MAXNUM && jj>=0 ){
347
+ // printf ("%d %d %d %d\n", C->rperm[*t], T->rows_org, C->cperm[item], T->clms_org );
348
+ if ( iwfname ){
349
+ f = FILE_err;
350
+ w = FILE2_read_double (&fp2);
351
+ if ( (FILE_err&4) && fc ) w = FILE2_read_double (&fp2);
352
+ ff = FILE_err;
353
+ FILE_err = f;
354
+ }
355
+ if ( T->flag&LOAD_TPOSE ){
356
+ if ( C->rperm[item]<=T->rows_org && C->cperm[*t]<=T->clms_end ){
357
+ if ( iwfname) T->T.w[C->rperm[item]][T->T.v[ C->rperm[item] ].t] = w;
358
+ QUE_INS (T->T.v[ C->rperm[item] ], C->cperm[*t]);
359
+ }
360
+ } else if ( C->rperm[*t]<=T->rows_org && C->cperm[item]<=T->clms_end ){
361
+ if ( iwfname) T->T.w[C->rperm[*t]][T->T.v[ C->rperm[*t] ].t] = w;
362
+ QUE_INS (T->T.v[ C->rperm[*t] ], C->cperm[item]);
363
+ }
364
+ }
365
+ fc = 0;
366
+ } while ( (FILE_err&3)==0);
367
+ (*t)++;
368
+ fc = ff? 0: 1; ff=0; // even if next weight is not written, it is the rest of the previous line
369
+ } while ( (FILE_err&2)==0 );
370
+ FLOOP (tt, 0, T->T.t) T->T.v[tt].v[T->T.v[tt].t] = T->T.clms;
371
+ if ( T->item_wfname ) FILE2_close (&fp2);
372
+ }
373
+ /* load the file to allocated memory according to permutation */
374
+ void TRSACT_file_read (TRSACT *T, FILE2 *fp, FILE_COUNT *C, VEC_ID *t, int flag, char *iwfname){
375
+ QUEUE_ID tt;
376
+ LONG x, y;
377
+ FILE2 wfp;
378
+ WEIGHT w=0;
379
+ int fc=0, FILE_err_=0;
380
+
381
+ FILE2_reset (fp);
382
+ if ( T->flag&(LOAD_NUM+LOAD_GRAPHNUM) ) FILE2_read_until_newline (fp);
383
+ if ( iwfname ) FILE2_open (wfp, iwfname, "r", EXIT);
384
+ if ( flag ) T->T.v[0].v = T->T.buf;
385
+
386
+ do {
387
+ //if ( flag ) printf ("t = %d; %d\n", *t, T->flag&LOAD_TPOSE);
388
+
389
+ if ( flag ){
390
+ if ( C->rperm[*t] < T->rows_org ){
391
+ if ( C->rperm[*t] > 0 ) T->T.v[C->rperm[*t]].v = T->T.v[C->rperm[*t]-1].v + T->T.v[C->rperm[*t]-1].t +1;
392
+ }
393
+ }
394
+
395
+ // jj = FILE2_read_int (fp);
396
+ // item = (QUEUE_INT)jj;
397
+
398
+ x = *t;
399
+ FILE_err_ = FILE2_read_item (fp, iwfname?&wfp:NULL, &x, &y, &w, fc, T->flag);
400
+ if ( FILE_err&4 ) goto LOOP_END;
401
+ //printf ("%d %d %d %d\n", x, y, w, T->flag&LOAD_ID1);
402
+ // printf ("%d %d %d %d\n", C->rperm[*t], T->rows_org, C->cperm[x], T->clms_org );
403
+ if ( C->rperm[x]<=T->rows_org && C->cperm[y]<=T->clms_end ){
404
+ //printf ("%d %d :: %d %d\n", cnt, T->T.v[*t].v - T->T.buf + T->T.v[*t].t, T->T.eles, T->eles_org);
405
+ if ( iwfname ) T->T.w[C->rperm[x]][T->T.v[ C->rperm[x] ].t] = w;
406
+ QUE_INS (T->T.v[ C->rperm[x] ], C->cperm[y]);
407
+ }
408
+
409
+ if ( FILE_err&3 ){
410
+ LOOP_END:;
411
+ (*t)++;
412
+ fc = FILE_err_? 0: 1; FILE_err_=0; // even if next weight is not written, it is the rest of the previous line
413
+ }
414
+ } while ( (FILE_err&2)==0 );
415
+ FLOOP (tt, 0, T->T.t) T->T.v[tt].v[T->T.v[tt].t] = T->T.clms;
416
+ if ( iwfname ) FILE2_close (&wfp);
417
+ mfree (C->rowt, C->clmt);
418
+ }
419
+
420
+ /* sort the transactions and items according to the flag, allocate OQ, and database reduction */
421
+ /* causion! not adopt for itemweights!!!!! */
422
+ void TRSACT_sort (TRSACT *T, FILE_COUNT *C, int flag){
423
+ VEC_ID t, *p;
424
+ int f;
425
+ PERM pp;
426
+ QUEUE Q;
427
+ QUEUE_ID i;
428
+ WEIGHT *ww;
429
+
430
+ FLOOP (t, 0, T->T.t) T->T.v[t].v[T->T.v[t].t] = T->T.clms;
431
+ /* if ( flag )
432
+ flag = (T->flag&(LOAD_SIZSORT+LOAD_WSORT)? ((T->flag&LOAD_DECROWSORT)? -1:1):0) *sizeof(QUEUE);
433
+ if ( flag ){ // sort rows for the case that some columns are not read
434
+ qsort_perm__VECt ((VEC *)T->T.v, T->T.t, C->rperm, flag);
435
+ ARY_INVPERMUTE (T->T.v, C->rperm, Q, T->T.t, "TRSACT_sort: ARY_INVPERMUTE", EXIT);
436
+ ARY_INVPERMUTE_ (T->trperm, C->rperm, pp, T->T.t);
437
+ }
438
+ */
439
+ flag = (T->flag&(LOAD_SIZSORT+LOAD_WSORT)? ((T->flag&LOAD_DECROWSORT)? -1:1):0) *sizeof(QUEUE);
440
+ if ( flag ){ // sort rows for the case that some columns are not read
441
+ qsort_perm__VECt ((VEC *)T->T.v, T->T.t, C->rperm, flag); // determine the order of transactions
442
+ ARY_INVPERMUTE (T->T.v, C->rperm, Q, T->T.t, EXIT); // sort transactions
443
+ if ( T->T.w ) ARY_INVPERMUTE (T->T.w, C->rperm, ww, T->T.t, EXIT); // sort rows of itemweighs
444
+ ARY_INVPERMUTE_ (T->trperm, C->rperm, pp, T->T.t);
445
+ }
446
+
447
+ free2 (C->rperm); free2 (C->cperm);
448
+
449
+ ///////////////////
450
+ if ( T->flag & LOAD_PERM ) flag = 1;
451
+ else flag = (T->flag&LOAD_INCSORT)? 1: ((T->flag&LOAD_DECSORT)? -1: 0);
452
+ if ( flag ){
453
+ FLOOP (t, 0, T->T.t) qsort_QUEUE_INT (T->T.v[t].v, T->T.v[t].t, flag);
454
+ }
455
+ if ( T->flag & LOAD_RM_DUP ){
456
+ FLOOP (t, 0, T->T.t){ MQUE_RM_DUP (T->T.v[t]); T->T.v[t].v[T->T.v[t].t] = T->T.clms; }
457
+ }
458
+ ST_MAX (T->row_max, i, T->T.v, t, 0, T->T.t);
459
+ /////////////////////
460
+ if ( T->flag2&(TRSACT_ALLOC_OCC+TRSACT_SHRINK) ){
461
+ calloc2 (p, T->T.clms, EXIT);
462
+ QUEUE_delivery (NULL, p, NULL, T->T.v, NULL, T->T.t, T->T.clms);
463
+ ARY_MAX (T->clm_max, i, p, 0, T->T.clms);
464
+ MQUE_ALLOC (T->OQ, T->T.clms, p, T->occ_unit, 1, EXIT);
465
+ QUEUE_alloc (&T->OQ[T->T.clms], MAX(T->T.t, T->clm_max));
466
+ FLOOP (i, 0, T->T.clms+1) T->OQ[i].end = 0; // end is illegally set to 0, for the use in "TRSACT_find_same"
467
+ ARY_INIT_PERM (T->OQ[T->T.clms].v, T->T.t); // initial occurrence := all transactions
468
+ T->OQ[T->T.clms].t = T->T.t;
469
+ free (p);
470
+ }
471
+
472
+ // shrinking database
473
+ if ( T->flag2&TRSACT_1ST_SHRINK ){
474
+ Q = T->OQ[T->T.clms];
475
+ T->OQ[T->T.clms].t = 0;
476
+ TRSACT_find_same (T, &Q, T->T.clms);
477
+ f = T->flag2; // preserve the flag
478
+ BITRM (T->flag2 ,TRSACT_MAKE_NEW +TRSACT_UNION +TRSACT_INTSEC);
479
+ TRSACT_merge_trsact (T, &T->OQ[T->T.clms], T->T.clms); // just remove duplicated trsacts
480
+ T->flag2 = f; // recover flag
481
+ T->OQ[T->T.clms].t = 0;
482
+ FLOOP (t, 0, T->T.t) if ( T->mark[t]>0 ) QUE_INS(T->OQ[T->T.clms], t); // make resulted occ
483
+ }
484
+
485
+ // QUEUE_delivery (T->OQ, NULL, NULL, T->T.v, &T->OQ[T->T.clms], T->T.t, T->T.clms);
486
+ }
487
+
488
+ /*****************************************/
489
+ /* load transaction file and its weight */
490
+ /*****************************************/
491
+ void TRSACT_load (TRSACT *T){
492
+ FILE2 fp = INIT_FILE2, fp2 = INIT_FILE2;
493
+ FILE_COUNT C = INIT_FILE_COUNT;
494
+ VEC_ID t=0;
495
+ int f;
496
+
497
+ FILE2_open (fp, T->fname, "r", EXIT);
498
+ if ( T->fname2 ) FILE2_open (fp2, T->fname2, "r", EXIT);
499
+ TRSACT_file_count (T, &C, &fp, T->wfname); if (ERROR_MES) goto END;
500
+ T->end1 = T->rows_org;
501
+ if ( T->fname2 ) TRSACT_file_count (T, &C, &fp2, T->wfname2); if (ERROR_MES) goto END;
502
+ f = TRSACT_alloc (T, &C); if (ERROR_MES){ mfree (C.clmt, C.cperm, C.rowt, C.cw); goto END; }
503
+ TRSACT_file_read (T, &fp, &C, &t, f, T->item_wfname); if (ERROR_MES) goto END;
504
+ if ( T->fname2 ) TRSACT_file_read (T, &fp2, &C, &t, f, T->item_wfname2); if (ERROR_MES){ mfree (C.rowt, C.clmt); goto END; }
505
+ TRSACT_sort (T, &C, f);
506
+
507
+ END:;
508
+ FILE2_close (&fp);
509
+ FILE2_close (&fp2);
510
+ if (ERROR_MES) TRSACT_end (T); else TRSACT_prop_print (T);
511
+ return;
512
+ }
513
+
514
+ /* iteration of delivery; operate one transaction */
515
+ /* use OQ.end to count the number of items */
516
+ /* jump will be cleared (t := s) at the beginning */
517
+ void TRSACT_delivery_iter (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, VEC_ID t, QUEUE_INT m){
518
+ WEIGHT *y=0;
519
+ QUEUE_INT *x;
520
+ int f = T->flag2&TRSACT_NEGATIVE;
521
+
522
+ if ( T->T.w ) y = T->T.w[t];
523
+ MQUE_MLOOP (T->T.v[t], x, m){
524
+ if ( T->OQ[*x].end == 0 ){ QUE_INS (*jump, *x); w[*x] = 0; if ( f ) pw[*x] = 0; }
525
+ T->OQ[*x].end++;
526
+ if ( y ){
527
+ w[*x] += *y; if ( *y>0 && f) pw[*x] += *y;
528
+ y++;
529
+ } else {
530
+ w[*x] += T->w[t]; if ( f ) pw[*x] += T->pw[t];
531
+ }
532
+ }
533
+ }
534
+
535
+ void TRSACT_delivery (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, QUEUE *occ, QUEUE_INT m){
536
+ VEC_ID i, t;
537
+ char *b = (char *)(occ?occ->v: NULL);
538
+ jump->t = jump->s;
539
+ FLOOP (i, occ?occ->s:0, occ?occ->t:T->T.t){
540
+ t = occ? *((QUEUE_INT *)b): i;
541
+ TRSACT_delivery_iter (T, jump, w, pw, t, m);
542
+ b += T->occ_unit;
543
+ }
544
+ }
545
+
546
+ /* usual delivery (make transpose) with checking sc
547
+ don't touch jump */
548
+ /* if (T->flag2&TRSACT_DELIV_SC), do not stack to items e with non-zero T->sc[e] */
549
+ void TRSACT_deliv (TRSACT *T, QUEUE *occ, QUEUE_INT m){
550
+ VEC_ID i, t;
551
+ QUEUE_INT *x;
552
+ char *b = (char *)(occ?occ->v: NULL);
553
+ if ( T->flag2&TRSACT_DELIV_SC ){
554
+ FLOOP (i, occ?occ->s:0, occ?occ->t:T->T.t){
555
+ t = occ? *((QUEUE_INT *)b): i;
556
+ MQUE_MLOOP (T->T.v[t], x, m)
557
+ if ( !T->sc[*x] ) QUE_INS (T->OQ[*x], t);
558
+ b += T->occ_unit;
559
+ }
560
+ } else {
561
+ FLOOP (i, occ?occ->s:0, occ?occ->t:T->T.t){
562
+ t = occ? *((QUEUE_INT *)b): i;
563
+ MQUE_MLOOP (T->T.v[t], x, m) QUE_INS (T->OQ[*x], t);
564
+ b += T->occ_unit;
565
+ }
566
+ }
567
+ }
568
+
569
+ /**************************************************************/
570
+ /* Find identical transactions in a subset of transactions, by radix-sort like method */
571
+ /* infrequent items (refer LCM_occ) and items larger than item_max are ignored */
572
+ /* INPUT: T:transactions, occ:subset of T represented by indices, result:array for output, end:largest item not to be ignored */
573
+ /* OUTPUT: if transactions i1, i2,..., ik are the same, they have same value in T->mark[i]
574
+ (not all) isolated transaction may have mark 1 */
575
+ /* use 0 to end-1 of T->mark, T->jump, T->shift and T->OQ temporary
576
+ T->OQ[i].t and OQ[i].s have to be 0. */
577
+ /*************************************************************************/
578
+ void TRSACT_find_same (TRSACT *T, QUEUE *occ, QUEUE_INT end){
579
+ VEC_ID mark=2, t_end;
580
+ QUEUE *o=occ, *Q = T->T.v, *EQ, *QQ = T->OQ;
581
+ QUEUE_INT *x, *y, e;
582
+ QUEUE_ID ot = occ->t;
583
+
584
+ // initialization
585
+ MQUE_FLOOP (*occ, x){ T->mark[*x] = mark; T->shift[*x] = Q[*x].v; }
586
+ T->jump.t = T->jump.s; QQ[T->T.clms].s = 0;
587
+
588
+ while (1){
589
+ if ( o->t - o->s == 1 ) T->mark[o->v[--o->t]] = 1; // no same transactions; mark by 1
590
+ if ( o->t == 0 ) goto END;
591
+ // if previously inserted transactions are in different group, then change their marks with incrementing mark by one
592
+ mark++; for (x=&o->v[o->s] ; x < &o->v[o->t] ; x++) T->mark[*x] = mark;
593
+ t_end = o->t;
594
+ o->s = o->t = 0;
595
+
596
+ // insert each t to buckets
597
+ for (x=o->v ; x<o->v+t_end ; x++){
598
+ // get next item in transaction t
599
+ do {
600
+ e = *(T->shift[*x]);
601
+ T->shift[*x]++;
602
+ if ( e >= end ){ e = T->T.clms; break; }
603
+ } while ( T->sc[e] );
604
+ EQ = &QQ[e];
605
+ // if previously inserted transactions are in different group, then change their mark to the transaction ID of top transacion.
606
+ y = &(EQ->v[EQ->s]);
607
+ if ( EQ->s < EQ->t && T->mark[*y] != T->mark[*x] ){
608
+ if ( EQ->t - EQ->s == 1 ) T->mark[EQ->v[--EQ->t]] = 1; // the tail of the queue has no same transaction; mark the tail by 1
609
+ else {
610
+ mark++; for ( ; y< EQ->v + EQ->t ; y++) T->mark[*y] = mark;
611
+ EQ->s = EQ->t;
612
+ }
613
+ } else if ( EQ->t == 0 && e<T->T.clms ) QUE_INS (T->jump, e);
614
+ QUE_INS (*EQ, *x); // insert t to bucket of e
615
+ }
616
+ END:;
617
+ if ( QUEUE_LENGTH_(T->jump) == 0 ) break;
618
+ o = &QQ[QUEUE_ext_tail_ (&T->jump)];
619
+ }
620
+
621
+ // same transactions are in queue of item_max
622
+ if ( QQ[T->T.clms].t -QQ[T->T.clms].s == 1 ) T->mark[QQ[T->T.clms].v[--QQ[T->T.clms].t]] = 1;
623
+ if ( occ != &QQ[T->T.clms] ) occ->t = ot;
624
+ }
625
+
626
+
627
+ /****************************************************************************/
628
+ /* copy transaction t to tt (only items i s.t. sc[i]==0) **/
629
+ /* T->w has to be allocated. itemweight will be alocated even if T->w[t] == NULL */
630
+ /****************************************************************************/
631
+ void TRSACT_copy (TRSACT *T, VEC_ID tt, VEC_ID t, QUEUE_INT end){
632
+ QUEUE_INT *x, *buf;
633
+ WEIGHT *wbuf = NULL, tw = T->w[t], *w = T->T.w? T->T.w[t]: NULL;
634
+ int bnum = T->buf.num, bblock = T->buf.block_num, wflag = (w || (T->flag2&TRSACT_UNION));
635
+
636
+ buf = (QUEUE_INT *)BASE_get_memory (&T->buf, T->T.v[t].t+1);
637
+ if ( ERROR_MES ) return;
638
+ if ( wflag ) T->T.w[tt] = wbuf = (WEIGHT *)BASE_get_memory (&T->wbuf, T->T.v[t].t+1);
639
+ if ( ERROR_MES ){ T->buf.num = bnum; T->buf.block_num = bblock; return; }
640
+ T->T.v[tt].v = buf;
641
+ T->w[tt] = T->w[t];
642
+ if ( T->flag2&TRSACT_NEGATIVE ) T->pw[tt] = T->pw[t];
643
+ MQUE_MLOOP (T->T.v[t], x, end){
644
+ if ( !T->sc[*x] ){
645
+ *buf = *x; buf++;
646
+ if ( wflag ){ *wbuf = w? *w: tw; wbuf++; }
647
+ }
648
+ if ( w ) w++;
649
+ }
650
+ T->T.v[tt].t = (VEC_ID)(buf - T->T.v[tt].v);
651
+ *buf = T->T.clms;
652
+ T->buf.num = (int)(buf - ((QUEUE_INT *)T->buf.base[T->buf.block_num]) + 1);
653
+ if ( wflag ) T->wbuf.num = (int)(wbuf - ((WEIGHT *)T->wbuf.base[T->wbuf.block_num]) + 1);
654
+ }
655
+
656
+ /****************************************************************************/
657
+ /* intersection of transaction t and tt (only items i s.t. sc[i]==0) **/
658
+ /* shift is the array of pointers indicates the start of each transaction **/
659
+ /****************************************************************************/
660
+ void TRSACT_suffix_and (TRSACT *T, VEC_ID tt, VEC_ID t){
661
+ QUEUE_INT *x=T->shift[tt], *y=T->shift[t], *xx=T->shift[tt];
662
+ while ( *x < T->T.clms && *y < T->T.clms ){
663
+ if ( *x > *y ) y++;
664
+ else {
665
+ if ( *x == *y ){
666
+ if ( !T->sc[*x] ){ *xx = *x; xx++; }
667
+ y++;
668
+ }
669
+ x++;
670
+ }
671
+ }
672
+ T->T.v[tt].t = (VEC_ID)(xx - T->T.v[tt].v);
673
+ *xx = T->T.clms;
674
+ T->buf.num = (int)(xx - ((QUEUE_INT *)T->buf.base[T->buf.block_num]) + 1);
675
+ }
676
+
677
+
678
+ /***************************************************************************/
679
+ /* take union of transaction t to tt (only items i s.t. pw[i]>=th) */
680
+ /* CAUSION: t has to be placed at the last of trsact_buf2. */
681
+ /* if the size of t inclreases, the following memory will be overwrited */
682
+ /* if memory (T->buf, T->wbuf) is short, do nothing and return 1 */
683
+ /* T->T.w[t] can be NULL, but T->T.w[x] can not */
684
+ /***************************************************************************/
685
+ void TRSACT_itemweight_union (TRSACT *T, VEC_ID tt, VEC_ID t){
686
+ int bnum = T->buf.num, bblock = T->buf.block_num;
687
+ QUEUE_ID siz = T->T.v[tt].t +T->T.v[t].t;
688
+ QUEUE_INT *xx_end = T->T.v[tt].v + siz, *xx = xx_end;
689
+ QUEUE_INT *x = T->T.v[tt].v + T->T.v[tt].t-1, *y = T->T.v[t].v + T->T.v[t].t-1;
690
+ WEIGHT *ww = T->T.w[tt] +siz, *wx = T->T.w[tt] +T->T.v[tt].t-1, *wy = T->T.w[t] +T->T.v[t].t-1;
691
+ WEIGHT tw = T->w[t];
692
+ int flag=0, wf = (T->T.w[t]!=NULL);
693
+
694
+ // if sufficiently large memory can not be taken from the current memory block, use the next block
695
+ if ( xx_end >= (QUEUE_INT *)T->buf.base[T->buf.block_num] +T->buf.block_siz ){
696
+ xx_end = xx = ((QUEUE_INT*)BASE_get_memory (&T->buf, T->buf.block_siz)) +siz;
697
+ if (ERROR_MES) return;
698
+ ww = ((WEIGHT *)BASE_get_memory (&T->wbuf, T->wbuf.block_siz)) +siz;
699
+ if ( ERROR_MES ){ T->buf.num = bnum; T->buf.block_num = bblock; return; }
700
+ flag =1;
701
+ }
702
+ if ( ERROR_MES ) return;
703
+
704
+ // take union and store it in the allocated memory
705
+ while ( x >= T->T.v[tt].v && y >= T->T.v[t].v ){
706
+ if ( *x > *y ){
707
+ if ( !T->sc[*x] ){ *xx = *x; *ww = *wx; xx--; ww--; }
708
+ x--; wx--;
709
+ if ( x < T->T.v[tt].v ){
710
+ while ( y >= T->T.v[t].v ){
711
+ if ( !T->sc[*y] ){ *xx = *y; *ww = wf? *wy: tw; xx--; ww--; }
712
+ y--; wy--;
713
+ }
714
+ }
715
+ } else {
716
+ if ( !T->sc[*y] ){
717
+ *ww = wf? *wy: tw; *xx = *y;
718
+ if ( *x == *y ){ *ww += *wx; x--; wx--; }
719
+ xx--; ww--;
720
+ }
721
+ y--; wy--;
722
+ if ( y < T->T.v[t].v ){
723
+ while ( x >= T->T.v[tt].v ){
724
+ if ( !T->sc[*x] ){ *xx = *x; *ww = *wx; xx--; ww--; }
725
+ x--; wx--;
726
+ }
727
+ }
728
+ }
729
+ }
730
+ T->T.v[tt].t = (VEC_ID)(xx_end -xx);
731
+
732
+ // if [tt].v will overflow, set [tt].v to the top of next memory block
733
+ if ( flag ){
734
+ if ( T->T.v[tt].v + T->T.v[tt].t+1 >= (QUEUE_INT *)T->buf.base[T->buf.block_num-1] +T->buf.block_siz ){
735
+ T->T.v[tt].v = (QUEUE_INT *)T->buf.base[T->buf.block_num];
736
+ T->T.w[tt] = (WEIGHT *)T->wbuf.base[T->wbuf.block_num];
737
+ } else { // new memory block is allocated, but the transaction fits in the previous block
738
+ T->buf.block_num--;
739
+ T->wbuf.block_num--;
740
+ }
741
+ }
742
+
743
+ // copy the union to the original position
744
+ for ( x=T->T.v[tt].v,wx=T->T.w[tt] ; xx<xx_end ; ){
745
+ xx++; ww++;
746
+ *x = *xx; *wx = *ww;
747
+ x++; wx++;
748
+ }
749
+ *x = T->T.clms;
750
+ T->wbuf.num = T->buf.num = (int)(x - ((QUEUE_INT *)T->buf.base[T->buf.block_num]) +1);
751
+ return;
752
+ }
753
+
754
+
755
+
756
+ /*****/
757
+ /* merge duplicated transactions in occ according to those having same value in T->mark
758
+ the mark except for the representative will be zero, for each group of the same transactions
759
+ the mark of the representative will be its (new) ID +2 (one is for identical transaction) */
760
+ /* T->flag2&TRSACT_MAKE_NEW: make new trsact for representative
761
+ T->flag2&TRSACT_INTSEC: take suffix intersection of the same trsacts
762
+ T->flag2&TRSACT_UNION: take union of the same trsacts */
763
+ /* o will be cleard after the execution */
764
+ void TRSACT_merge_trsact (TRSACT *T, QUEUE *o, QUEUE_INT end){
765
+ VEC_ID mark = 0, tt=0;
766
+ QUEUE_INT *x;
767
+
768
+ MQUE_FLOOP (*o, x){
769
+ if ( mark == T->mark[*x] ){
770
+ T->mark[*x] = 0; // mark of unified (deleted) transaction
771
+ T->w[tt] += T->w[*x]; if ( T->pw ) T->pw[tt] += T->pw[*x];
772
+ if ( T->flag2 & TRSACT_INTSEC ){
773
+ TRSACT_suffix_and (T, tt, *x);
774
+ T->buf.num = (int)(T->T.v[tt].v - (QUEUE_INT *)T->buf.base[T->buf.block_num] +T->T.v[tt].t +1);
775
+ }
776
+ if ( T->flag2 & TRSACT_UNION ){
777
+ TRSACT_itemweight_union (T, tt, *x);
778
+ if ( ERROR_MES ) T->mark[*x] = *x+2; // do not merge if not enough memory
779
+ }
780
+ }
781
+ if ( mark != T->mark[*x] && T->mark[*x] > 1 ){ // *x is not the same to the previous, or memory short
782
+ mark = T->mark[*x];
783
+ if ( T->flag2&TRSACT_MAKE_NEW ){
784
+ tt = T->new_t++;
785
+ TRSACT_copy (T, tt, *x, (T->flag2&(TRSACT_INTSEC+TRSACT_UNION))? T->T.clms: end);
786
+ if ( ERROR_MES ){ T->new_t--; tt = *x; }
787
+ else for (T->shift[tt]=T->T.v[tt].v ; *(T->shift[tt])<end ; T->shift[tt]++);
788
+ } else tt = *x;
789
+ T->mark[*x] = tt+2;
790
+ }
791
+ }
792
+ o->t = o->s = 0;
793
+ }
794
+
795
+ /* remove the unified transactions from occ (consider T->occ_unit) */
796
+ void TRSACT_reduce_occ (TRSACT *T, QUEUE *occ){
797
+ QUEUE_INT *x, *y=occ->v;
798
+ QUEUE_ID i=0;
799
+ if ( T->occ_unit == sizeof(QUEUE_INT) ){
800
+ MQUE_FLOOP (*occ, x){
801
+ if ( T->mark[*x] == 0 ) continue;
802
+ *y = T->mark[*x]>1? T->mark[*x]-2: *x;
803
+ y++; i++;
804
+ }
805
+ } else {
806
+ MQUE_FLOOP_ (*occ, x, T->occ_unit){
807
+ if ( T->mark[*x] == 0 ) continue;
808
+ memcpy (y, x, T->occ_unit);
809
+ *y = T->mark[*x]>1? T->mark[*x]-2: *x;
810
+ y = (QUEUE_INT *)(((char *)y)+T->occ_unit);
811
+ i++;
812
+ }
813
+ }
814
+ occ->t = i;
815
+ }
816
+
817
+ #endif