nysol-take 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. checksums.yaml +7 -0
  2. data/bin/mbiclique.rb +317 -0
  3. data/bin/mbipolish.rb +362 -0
  4. data/bin/mccomp.rb +235 -0
  5. data/bin/mclique.rb +295 -0
  6. data/bin/mclique2g.rb +105 -0
  7. data/bin/mcliqueInfo.rb +203 -0
  8. data/bin/mfriends.rb +202 -0
  9. data/bin/mgdiff.rb +252 -0
  10. data/bin/mhifriend.rb +456 -0
  11. data/bin/mhipolish.rb +465 -0
  12. data/bin/mitemset.rb +168 -0
  13. data/bin/mpal.rb +410 -0
  14. data/bin/mpolishing.rb +399 -0
  15. data/bin/msequence.rb +165 -0
  16. data/bin/mtra2g.rb +476 -0
  17. data/bin/mtra2gc.rb +360 -0
  18. data/ext/grhfilrun/extconf.rb +12 -0
  19. data/ext/grhfilrun/grhfilrun.c +85 -0
  20. data/ext/grhfilrun/src/_sspc.c +358 -0
  21. data/ext/grhfilrun/src/aheap.c +545 -0
  22. data/ext/grhfilrun/src/aheap.h +251 -0
  23. data/ext/grhfilrun/src/base.c +92 -0
  24. data/ext/grhfilrun/src/base.h +59 -0
  25. data/ext/grhfilrun/src/fstar.c +497 -0
  26. data/ext/grhfilrun/src/fstar.h +80 -0
  27. data/ext/grhfilrun/src/grhfil.c +214 -0
  28. data/ext/grhfilrun/src/itemset.c +713 -0
  29. data/ext/grhfilrun/src/itemset.h +170 -0
  30. data/ext/grhfilrun/src/problem.c +415 -0
  31. data/ext/grhfilrun/src/problem.h +179 -0
  32. data/ext/grhfilrun/src/queue.c +533 -0
  33. data/ext/grhfilrun/src/queue.h +182 -0
  34. data/ext/grhfilrun/src/sample.c +19 -0
  35. data/ext/grhfilrun/src/sspc.c +597 -0
  36. data/ext/grhfilrun/src/sspc2.c +491 -0
  37. data/ext/grhfilrun/src/stdlib2.c +1482 -0
  38. data/ext/grhfilrun/src/stdlib2.h +892 -0
  39. data/ext/grhfilrun/src/trsact.c +817 -0
  40. data/ext/grhfilrun/src/trsact.h +160 -0
  41. data/ext/grhfilrun/src/vec.c +745 -0
  42. data/ext/grhfilrun/src/vec.h +172 -0
  43. data/ext/lcmrun/extconf.rb +20 -0
  44. data/ext/lcmrun/lcmrun.cpp +99 -0
  45. data/ext/lcmrun/src/aheap.c +216 -0
  46. data/ext/lcmrun/src/aheap.h +111 -0
  47. data/ext/lcmrun/src/base.c +92 -0
  48. data/ext/lcmrun/src/base.h +59 -0
  49. data/ext/lcmrun/src/itemset.c +496 -0
  50. data/ext/lcmrun/src/itemset.h +157 -0
  51. data/ext/lcmrun/src/lcm.c +427 -0
  52. data/ext/lcmrun/src/problem.c +349 -0
  53. data/ext/lcmrun/src/problem.h +177 -0
  54. data/ext/lcmrun/src/queue.c +528 -0
  55. data/ext/lcmrun/src/queue.h +176 -0
  56. data/ext/lcmrun/src/sgraph.c +359 -0
  57. data/ext/lcmrun/src/sgraph.h +173 -0
  58. data/ext/lcmrun/src/stdlib2.c +1282 -0
  59. data/ext/lcmrun/src/stdlib2.h +823 -0
  60. data/ext/lcmrun/src/trsact.c +747 -0
  61. data/ext/lcmrun/src/trsact.h +159 -0
  62. data/ext/lcmrun/src/vec.c +731 -0
  63. data/ext/lcmrun/src/vec.h +171 -0
  64. data/ext/lcmseq0run/extconf.rb +20 -0
  65. data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
  66. data/ext/lcmseq0run/src/aheap.c +216 -0
  67. data/ext/lcmseq0run/src/aheap.h +111 -0
  68. data/ext/lcmseq0run/src/base.c +92 -0
  69. data/ext/lcmseq0run/src/base.h +59 -0
  70. data/ext/lcmseq0run/src/itemset.c +518 -0
  71. data/ext/lcmseq0run/src/itemset.h +157 -0
  72. data/ext/lcmseq0run/src/itemset_zero.c +522 -0
  73. data/ext/lcmseq0run/src/lcm_seq.c +446 -0
  74. data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
  75. data/ext/lcmseq0run/src/problem.c +439 -0
  76. data/ext/lcmseq0run/src/problem.h +179 -0
  77. data/ext/lcmseq0run/src/problem_zero.c +439 -0
  78. data/ext/lcmseq0run/src/queue.c +533 -0
  79. data/ext/lcmseq0run/src/queue.h +182 -0
  80. data/ext/lcmseq0run/src/stdlib2.c +1350 -0
  81. data/ext/lcmseq0run/src/stdlib2.h +864 -0
  82. data/ext/lcmseq0run/src/trsact.c +747 -0
  83. data/ext/lcmseq0run/src/trsact.h +159 -0
  84. data/ext/lcmseq0run/src/vec.c +779 -0
  85. data/ext/lcmseq0run/src/vec.h +172 -0
  86. data/ext/lcmseqrun/extconf.rb +20 -0
  87. data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
  88. data/ext/lcmseqrun/src/aheap.c +216 -0
  89. data/ext/lcmseqrun/src/aheap.h +111 -0
  90. data/ext/lcmseqrun/src/base.c +92 -0
  91. data/ext/lcmseqrun/src/base.h +59 -0
  92. data/ext/lcmseqrun/src/itemset.c +518 -0
  93. data/ext/lcmseqrun/src/itemset.h +157 -0
  94. data/ext/lcmseqrun/src/itemset_zero.c +522 -0
  95. data/ext/lcmseqrun/src/lcm_seq.c +447 -0
  96. data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
  97. data/ext/lcmseqrun/src/problem.c +439 -0
  98. data/ext/lcmseqrun/src/problem.h +179 -0
  99. data/ext/lcmseqrun/src/problem_zero.c +439 -0
  100. data/ext/lcmseqrun/src/queue.c +533 -0
  101. data/ext/lcmseqrun/src/queue.h +182 -0
  102. data/ext/lcmseqrun/src/stdlib2.c +1350 -0
  103. data/ext/lcmseqrun/src/stdlib2.h +864 -0
  104. data/ext/lcmseqrun/src/trsact.c +747 -0
  105. data/ext/lcmseqrun/src/trsact.h +159 -0
  106. data/ext/lcmseqrun/src/vec.c +779 -0
  107. data/ext/lcmseqrun/src/vec.h +172 -0
  108. data/ext/lcmtransrun/extconf.rb +18 -0
  109. data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
  110. data/ext/macerun/extconf.rb +20 -0
  111. data/ext/macerun/macerun.cpp +57 -0
  112. data/ext/macerun/src/aheap.c +217 -0
  113. data/ext/macerun/src/aheap.h +112 -0
  114. data/ext/macerun/src/itemset.c +491 -0
  115. data/ext/macerun/src/itemset.h +158 -0
  116. data/ext/macerun/src/mace.c +503 -0
  117. data/ext/macerun/src/problem.c +346 -0
  118. data/ext/macerun/src/problem.h +174 -0
  119. data/ext/macerun/src/queue.c +529 -0
  120. data/ext/macerun/src/queue.h +177 -0
  121. data/ext/macerun/src/sgraph.c +360 -0
  122. data/ext/macerun/src/sgraph.h +174 -0
  123. data/ext/macerun/src/stdlib2.c +993 -0
  124. data/ext/macerun/src/stdlib2.h +811 -0
  125. data/ext/macerun/src/vec.c +634 -0
  126. data/ext/macerun/src/vec.h +170 -0
  127. data/ext/sspcrun/extconf.rb +20 -0
  128. data/ext/sspcrun/src/_sspc.c +358 -0
  129. data/ext/sspcrun/src/aheap.c +545 -0
  130. data/ext/sspcrun/src/aheap.h +251 -0
  131. data/ext/sspcrun/src/base.c +92 -0
  132. data/ext/sspcrun/src/base.h +59 -0
  133. data/ext/sspcrun/src/fstar.c +496 -0
  134. data/ext/sspcrun/src/fstar.h +80 -0
  135. data/ext/sspcrun/src/grhfil.c +213 -0
  136. data/ext/sspcrun/src/itemset.c +713 -0
  137. data/ext/sspcrun/src/itemset.h +170 -0
  138. data/ext/sspcrun/src/problem.c +415 -0
  139. data/ext/sspcrun/src/problem.h +179 -0
  140. data/ext/sspcrun/src/queue.c +533 -0
  141. data/ext/sspcrun/src/queue.h +182 -0
  142. data/ext/sspcrun/src/sample.c +19 -0
  143. data/ext/sspcrun/src/sspc.c +598 -0
  144. data/ext/sspcrun/src/sspc2.c +491 -0
  145. data/ext/sspcrun/src/stdlib2.c +1482 -0
  146. data/ext/sspcrun/src/stdlib2.h +892 -0
  147. data/ext/sspcrun/src/trsact.c +817 -0
  148. data/ext/sspcrun/src/trsact.h +160 -0
  149. data/ext/sspcrun/src/vec.c +745 -0
  150. data/ext/sspcrun/src/vec.h +172 -0
  151. data/ext/sspcrun/sspcrun.cpp +54 -0
  152. data/lib/nysol/enumLcmEp.rb +338 -0
  153. data/lib/nysol/enumLcmEsp.rb +284 -0
  154. data/lib/nysol/enumLcmIs.rb +275 -0
  155. data/lib/nysol/enumLcmSeq.rb +143 -0
  156. data/lib/nysol/items.rb +201 -0
  157. data/lib/nysol/seqDB.rb +256 -0
  158. data/lib/nysol/take.rb +39 -0
  159. data/lib/nysol/taxonomy.rb +113 -0
  160. data/lib/nysol/traDB.rb +257 -0
  161. metadata +239 -0
@@ -0,0 +1,747 @@
1
+ /* QUEUE based Transaction library, including database reduction.
2
+ 25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, please
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about the codes for the users. */
11
+
12
+ #ifndef _trsact_c_
13
+ #define _trsact_c_
14
+
15
+ #include"trsact.h"
16
+ #include"base.c"
17
+ #include"vec.c"
18
+
19
+ /***********************************/
20
+ /* print transactions */
21
+ /***********************************/
22
+ void TRSACT_print (TRSACT *T, QUEUE *occ, PERM *p){
23
+ VEC_ID i, t;
24
+ QUEUE_ID j;
25
+ QUEUE_INT e;
26
+ FLOOP (i, 0, occ? occ->t: T->T.t){
27
+ t = occ? *((QUEUE_INT *)(&(((char *)(occ->v))[i*T->occ_unit]))): i;
28
+ if ( occ ) printf (QUEUE_INTF "::: ", t);
29
+ for (j=0; j<T->T.v[t].t ; j++){
30
+ e = T->T.v[t].v[j];
31
+ printf (QUEUE_INTF, p? p[e]: e);
32
+ if ( T->T.w ) printf ("(" WEIGHTF ")", T->T.w[t][j]);
33
+ printf (",");
34
+ }
35
+ if ( T->w ) printf (" :" WEIGHTF " ", T->w[t]);
36
+ printf (" (" QUEUE_INTF ")\n", T->T.v[t].end);
37
+ }
38
+ }
39
+
40
+ /* print the profiles of the transaction database */
41
+ void TRSACT_prop_print (TRSACT *T){
42
+ if ( !(T->flag & SHOW_MESSAGE) ) return;
43
+ print_err ("trsact: %s", T->fname);
44
+ // print_fname (" ,2nd-trsact %s (from ID %d)", T->fname2, T->end1);
45
+ print_err (" ,#transactions %d ,#items %d ,size %zd", T->rows_org, T->clms_org, T->eles_org);
46
+ print_err (" extracted database: #transactions %d ,#items %d ,size %zd", T->T.t, T->T.clms, T->T.eles);
47
+ print_fname (" ,weightfile %s", T->wfname);
48
+ print_fname (" ,2nd-weightfile %s", T->wfname2);
49
+ print_fname (" ,itemweightfile %s", T->item_fname);
50
+ print_fname (" ,2nd-itemweightfile %s", T->item_fname2);
51
+ print_fname (" ,item-order-file %s", T->pfname);
52
+ print_err ("\n");
53
+ }
54
+
55
+
56
+ /* initialization of structure TRSACT */
57
+ void TRSACT_init (TRSACT *T){
58
+ T->type = TYPE_TRSACT;
59
+ T->fname = T->fname2 = T->wfname = T->wfname2 = T->item_fname = T->item_fname2 = T->pfname =NULL;
60
+ T->flag = 0;
61
+ T->T = INIT_SETFAMILY;
62
+ T->clms_org = T->clm_max = T->clms_end = 0;
63
+ T->rows_org = T->row_max = T->end1 = T->sep = 0;
64
+ T->perm = NULL;
65
+ T->trperm = NULL;
66
+ T->w = T->pw = NULL;
67
+
68
+ T->clm_lb = 0;
69
+ T->clm_ub = VEC_ID_END;
70
+ T->row_lb = 0;
71
+ T->row_ub = QUEUE_IDHUGE;
72
+ T->w_lb = -WEIGHTHUGE; T->w_ub = WEIGHTHUGE;
73
+
74
+ T->eles_org = 0;
75
+ T->total_w = T->total_pw = T->total_w_org = T->total_pw_org =0;
76
+
77
+ T->jump = INIT_QUEUE;
78
+ T->str_num = 0;
79
+ T->head = T->strID = NULL;
80
+
81
+ T->th = 1;
82
+ T->mark = NULL;
83
+ T->shift = NULL;
84
+ T->occ_unit = sizeof(QUEUE_INT);
85
+ T->OQ = NULL;
86
+ T->sc = NULL;
87
+
88
+ T->new_t = 0;
89
+ T->buf = INIT_BASE;
90
+ T->wbuf = INIT_BASE;
91
+ }
92
+
93
+ /**************************************************************/
94
+ void TRSACT_end (TRSACT *T){
95
+ if ( T->OQ ){ free2 (T->OQ->v ); free2 (T->OQ[T->T.clms].v); }
96
+ free2 (T->T.w);
97
+ SETFAMILY_end (&T->T);
98
+ if ( T->w != T->pw ) free2 (T->pw);
99
+ mfree (T->w, T->perm, T->trperm);
100
+ mfree (T->mark, T->shift, T->sc, T->OQ, T->head, T->strID);
101
+ QUEUE_end (&T->jump);
102
+ BASE_end (&T->buf);
103
+ BASE_end (&T->wbuf);
104
+ TRSACT_init (T);
105
+ }
106
+
107
+ #ifndef TRSACT_MAXNUM
108
+ #define TRSACT_MAXNUM 20000000LL
109
+ #endif
110
+
111
+ /*****************************************/
112
+ /* scan file "fp" with weight file wfp and count #items, #transactions in the file. */
113
+ /* count weight only if wfp!=NULL */
114
+ /* T->rows_org, clms_org, eles_org := #items, #transactions, #all items */
115
+ /* ignore the transactions of size not in range T->clm_lb - clm_ub */
116
+ /* T->total_w, total_pw := sum of (positive) weights of transactions */
117
+ /* C->clmt[i],C->cw[i] := the number/(sum of weights) of transactions including i */
118
+ /****************************************/
119
+ void TRSACT_file_count (TRSACT *T, FILE_COUNT *C, FILE2 *fp, char *wf){
120
+ QUEUE_INT i, item, kk=0, k, jump_end=0;
121
+ WEIGHT w, s;
122
+ VEC_ID *jump=NULL;
123
+ FILE2 wfp = INIT_FILE2;
124
+ LONG jj;
125
+
126
+ if ( wf ){
127
+ FILE2_open (wfp, wf, "r", goto ERR);
128
+ ARY_SCAN (kk, WEIGHT, wfp, 1);
129
+ kk += T->rows_org;
130
+ realloc2 (C->rw, kk+1, goto ERR);
131
+ FILE2_reset (&wfp);
132
+ ARY_READ (C->rw, double, kk, wfp);
133
+ ARY_MIN (w, i, C->rw, 0, kk);
134
+ if ( w<0 ) T->flag |= TRSACT_NEGATIVE;
135
+ FILE2_close (&wfp);
136
+ }
137
+ do {
138
+ s=0;
139
+ k=0;
140
+ w = wf? (T->rows_org<kk? C->rw[T->rows_org]: TRSACT_DEFAULT_WEIGHT): 1;
141
+ do {
142
+ jj = FILE2_read_int (fp);
143
+ item = (QUEUE_INT)jj;
144
+ if ( (FILE_err&4)==0 && jj<TRSACT_MAXNUM && jj>=0 ){
145
+ ENMAX (T->clms_org, item+1); // update #items
146
+ reallocx (jump, jump_end, k, 0, goto ERR);
147
+ jump[k] = item;
148
+ k++;
149
+ s += wf? (item<kk? MAX(C->rw[item],0): TRSACT_DEFAULT_WEIGHT): 1;
150
+
151
+ // count/weight-sum for the transpose mode
152
+ reallocx (C->clmt, C->clm_end, item, 0, goto ERR);
153
+ C->clmt[item]++;
154
+ if ( !(T->flag&LOAD_TPOSE) ){
155
+ reallocx (C->cw, C->cw_end, item, 0, goto ERR);
156
+ C->cw[item] += MAX(w,0); // sum up positive weights
157
+ }
158
+ }
159
+ } while ( (FILE_err&3)==0);
160
+
161
+ // count/weight-sum for the transpose mode
162
+ reallocx (C->rowt, C->row_end, T->rows_org, 0, goto ERR);
163
+ C->rowt[T->rows_org] = k;
164
+ if ( T->flag&LOAD_TPOSE ){
165
+ reallocx (C->cw, C->cw_end, T->rows_org, 0, goto ERR);
166
+ C->cw[T->rows_org] = s; // sum up positive weights
167
+ }
168
+ if ( k==0 && FILE_err&2 ) break;
169
+ T->rows_org++; // increase #transaction
170
+
171
+ if ( !wf ) s = k; // un-weighted case; weighted sum is #included-items
172
+ if ( k==0 ){
173
+ T->str_num++; // increase #streams if empty transaction is read
174
+ } else {
175
+ T->eles_org += k;
176
+ if ( (!(T->flag&LOAD_TPOSE) && !RANGE (T->row_lb, k, T->row_ub))
177
+ || ((T->flag&LOAD_TPOSE) && (!RANGE(T->w_lb, s, T->w_ub) || !RANGE (T->clm_lb, k, T->clm_ub)) ) ) FLOOP (i, 0, k) C->clmt[jump[i]]--;
178
+ }
179
+ } while ( (FILE_err&2)==0);
180
+ free2 (jump);
181
+ // swap the variables in transpose mode
182
+ if ( C->rw == NULL ){ T->total_w_org = T->total_pw_org = T->rows_org; return; }
183
+ C->clm_btm = MIN(kk, T->rows_org);
184
+ reallocx (C->rw, kk, T->rows_org, TRSACT_DEFAULT_WEIGHT, goto ERR);
185
+ FLOOP (k, 0, T->rows_org){
186
+ T->total_w_org += C->rw[k];
187
+ T->total_pw_org += MAX(C->rw[k],0);
188
+ }
189
+ return;
190
+ ERR:;
191
+ FILE2_close (&wfp);
192
+ mfree (C->rw, C->cw, C->clmt, C->rowt, jump);
193
+ EXIT;
194
+ }
195
+
196
+ /* allocate memory, set permutation, and free C.clmt,rowt,rw,cw */
197
+ int TRSACT_alloc (TRSACT *T, FILE_COUNT *C){
198
+ VEC_ID t, tt=0, ttt, ttt_max, h, flag, org;
199
+ FILE_COUNT_INT *ct;
200
+ size_t s=0;
201
+ PERM *q, *p=NULL;
202
+ char *buf;
203
+
204
+ // swap variables in the case of transpose
205
+ if ( T->flag & LOAD_TPOSE ){
206
+ common_QUEUE_INT = T->clms_org; T->clms_org = (QUEUE_INT)T->rows_org; T->rows_org = (VEC_ID)common_QUEUE_INT;
207
+ SWAP_PNT (C->clmt, C->rowt);
208
+ }
209
+ ttt_max = ttt = T->clms_org;
210
+
211
+ if ( T->flag&TRSACT_SHRINK ) T->flag |= LOAD_DBLBUF;
212
+ // count valid columns/elements
213
+
214
+ if ( T->pfname && !(T->flag&TRSACT_WRITE_PERM) ){
215
+ ARY_LOAD (p, QUEUE_INT, ttt, T->pfname, 1, EXIT0);
216
+ ARY_MAX (ttt_max, tt, p, 0, ttt);
217
+ // ENMAX (T->clms_org, ttt_max+1);
218
+ T->T.clms = ttt_max+1;
219
+ } else {
220
+ if ( T->flag&LOAD_PERM ){
221
+ if ( T->flag&TRSACT_FRQSORT )
222
+ p = qsort_perm_WEIGHT (C->cw, T->clms_org, (T->flag&LOAD_INCSORT)?1:-1);
223
+ else p = qsort_perm_FILE_COUNT_INT (C->clmt, T->clms_org, (T->flag&LOAD_INCSORT)?1:-1);
224
+ }
225
+ if ( T->pfname ) ARY_WRITE (T->pfname, p, T->clms_org, PERMF " ", EXIT0);
226
+ }
227
+ T->clms_end = MAX (T->clms_org, T->T.clms);
228
+
229
+ malloc2 (C->cperm, T->clms_org+1, EXIT0);
230
+ ARY_FILL (C->cperm, 0, T->clms_org, T->clms_org+1);
231
+ FLOOP (t, 0, ttt){
232
+ tt = p? p[t]: t;
233
+ if ( tt >= T->clms_org ) continue;
234
+ if ( RANGE(T->w_lb, C->cw[tt], T->w_ub) && RANGE (T->clm_lb, C->clmt[tt], T->clm_ub)){
235
+ s += C->clmt[tt];
236
+ C->cperm[tt] = (T->pfname && !(T->flag&TRSACT_WRITE_PERM))? t: T->T.clms++;
237
+ } else C->cperm[tt] = T->clms_end+1;
238
+ }
239
+ free2 (p);
240
+ if ( T->T.clms == 0 ) error ("there is no frequent item", return 0);
241
+
242
+ // count valid rows/elements
243
+ if ( T->flag&(LOAD_SIZSORT+LOAD_WSORT) ){
244
+ if ( T->flag&LOAD_WSORT && C->rw )
245
+ p = qsort_perm_WEIGHT (C->rw, T->rows_org, (T->flag&LOAD_DECROWSORT)?-1:1);
246
+ else p = qsort_perm_FILE_COUNT_INT (C->rowt, T->rows_org, (T->flag&LOAD_DECROWSORT)?-1:1);
247
+ }
248
+ malloc2 (C->rperm, T->rows_org, EXIT0);
249
+ FLOOP (t, 0, T->rows_org){ // compute #elements according to rowt, and set rperm
250
+ tt = p? p[t]: t;
251
+ if ( RANGE (T->row_lb, C->rowt[tt], T->row_ub) ){
252
+ C->rperm[tt] = T->T.t++;
253
+ T->T.eles += C->rowt[t];
254
+ } else C->rperm[tt] = T->rows_org+1;
255
+ }
256
+
257
+ free2 (p); free2 (C->cw);
258
+ flag = (T->T.eles > s && !(T->flag & LOAD_TPOSE) );
259
+ if ( flag ) T->T.eles = s;
260
+
261
+ T->T.end = T->T.t * ((T->flag&LOAD_DBLBUF)? 2: 1)+1;
262
+ malloc2 (T->w, T->T.end, EXIT0);
263
+ if ( TRSACT_NEGATIVE ) malloc2 (T->pw, T->T.end, EXIT0);
264
+ else T->pw = NULL;
265
+ malloc2 (T->trperm, T->T.t, EXIT0);
266
+ malloc2 (T->T.v, T->T.end, EXIT0);
267
+ malloc2 (buf, (T->T.eles+T->T.end+1)*T->T.unit, EXIT0);
268
+ T->T.buf = (QUEUE_INT *)buf;
269
+ calloc2 (T->perm, T->T.clms+1, EXIT0);
270
+ QUEUE_alloc (&T->jump, T->T.clms+1);
271
+ BASE_alloc (&T->buf, sizeof(QUEUE_INT), MAX((int)T->row_max*4, (int)(T->T.eles+1)/10+T->T.clms+100));
272
+ BASE_alloc (&T->wbuf, sizeof(WEIGHT), MAX((int)T->row_max*4, (int)(T->T.eles+1)/10+T->T.clms+100));
273
+ if ( T->flag&TRSACT_SHRINK ){
274
+ malloc2 (T->mark, T->T.end, EXIT0);
275
+ malloc2 (T->shift, T->T.end, EXIT0);
276
+ calloc2 (T->sc, T->T.clms, EXIT0);
277
+ }
278
+ if ( T->flag&TRSACT_MULTI_STREAM ){
279
+ malloc2 (T->head, T->str_num+2, EXIT0);
280
+ malloc2 (T->strID, (T->flag&LOAD_TPOSE)?T->T.clms:T->T.end, EXIT0);
281
+ }
282
+ if ( T->flag&TRSACT_UNION ) calloc2 (T->T.w, T->T.end, EXIT0);
283
+
284
+ if ( ERROR_MES ) return(0);
285
+
286
+ // set variables w.r.t rows
287
+ tt=0; FLOOP (t, 0, T->rows_org){
288
+ if ( C->rperm[t] <= T->rows_org ){
289
+ T->T.v[tt] = INIT_QUEUE;
290
+ T->trperm[tt] = t;
291
+ C->rperm[t] = tt;
292
+ T->w[tt] = C->rw? C->rw[t]: 1;
293
+ if ( T->pw ) T->pw[tt] = MAX (T->w[tt], 0);
294
+ if ( !flag ){
295
+ T->T.v[tt].v = (QUEUE_INT *)buf;
296
+ buf += (C->rowt[t]+1)*T->T.unit;
297
+ }
298
+ tt++;
299
+ }
300
+ }
301
+ free2 (C->rw);
302
+ // make the inverse perm of items
303
+ FLOOP (t, 0, T->clms_org)
304
+ if ( C->cperm[t] <= T->clms_end ) T->perm[C->cperm[t]] = t;
305
+
306
+ // set head of each stream, and stream ID of each transaction
307
+ if ( T->flag&TRSACT_MULTI_STREAM ){
308
+ malloc2 (T->head, T->str_num+2, EXIT0);
309
+ malloc2 (T->strID, (T->flag&LOAD_TPOSE)?T->T.clms:T->T.end, EXIT0);
310
+ }
311
+ org = (T->flag&LOAD_TPOSE)? T->clms_org: T->rows_org;
312
+ q = (T->flag&LOAD_TPOSE)? C->cperm: C->rperm;
313
+ ct = (T->flag&LOAD_TPOSE)? C->clmt: C->rowt;
314
+ h=1; tt=0; FLOOP (t, 0, org){
315
+ if ( q[t] <= org ){
316
+ if ( t == T->end1 && T->sep==0 ) T->sep = tt;
317
+ if ( t == T->sep && T->sep>0 ) T->sep = tt;
318
+ if ( T->strID ) T->strID[tt] = h;
319
+ tt++;
320
+ }
321
+ if ( T->head && ct[t]==0 ) T->head[h++] = tt+1;
322
+ }
323
+
324
+ T->new_t = T->T.t;
325
+ free2 (C->rowt); free2 (C->clmt);
326
+ return ( flag );
327
+ }
328
+
329
+
330
+ /* load the file to allocated memory according to permutation */
331
+ void TRSACT_file_read (TRSACT *T, FILE2 *fp, FILE_COUNT *C, VEC_ID *t, int flag){
332
+ QUEUE_INT item;
333
+ QUEUE_ID tt;
334
+ LONG jj;
335
+
336
+ FILE2_reset (fp);
337
+ do {
338
+ if ( flag ) T->T.v[*t].v = *t? T->T.v[*t-1].v + T->T.v[*t-1].t +1: T->T.buf;
339
+ do {
340
+ jj = FILE2_read_int (fp);
341
+ item = (QUEUE_INT)jj;
342
+ if ( (FILE_err&4)==0 && jj<TRSACT_MAXNUM && jj>=0 ){
343
+ // printf ("%d %d %d %d\n", C->rperm[*t], T->rows_org, C->cperm[item], T->clms_org );
344
+ if ( T->flag&LOAD_TPOSE ){
345
+ if ( C->rperm[item]<=T->rows_org && C->cperm[*t]<=T->clms_end )
346
+ QUE_INS (T->T.v[ C->rperm[item] ], C->cperm[*t]);
347
+ } else if ( C->rperm[*t]<=T->rows_org && C->cperm[item]<=T->clms_end )
348
+ QUE_INS (T->T.v[ C->rperm[*t] ], C->cperm[item]);
349
+ }
350
+ } while ( (FILE_err&3)==0);
351
+ (*t)++;
352
+ } while ( (FILE_err&2)==0 );
353
+ FLOOP (tt, 0, T->T.t) T->T.v[tt].v[T->T.v[tt].t] = T->T.clms;
354
+ }
355
+
356
+ /* sort the transactions and items according to the flag, allocate OQ, and database reduction */
357
+ void TRSACT_sort (TRSACT *T, FILE_COUNT *C, int flag){
358
+ VEC_ID t, *p;
359
+ int f;
360
+ PERM pp;
361
+ QUEUE Q;
362
+ QUEUE_ID i;
363
+
364
+ FLOOP (t, 0, T->T.t) T->T.v[t].v[T->T.v[t].t] = T->T.clms;
365
+ /* if ( flag )
366
+ flag = (T->flag&(LOAD_SIZSORT+LOAD_WSORT)? ((T->flag&LOAD_DECROWSORT)? -1:1):0) *sizeof(QUEUE);
367
+ if ( flag ){ // sort rows for the case that some columns are not read
368
+ qsort_perm__VECt ((VEC *)T->T.v, T->T.t, C->rperm, flag);
369
+ ARY_INVPERMUTE (T->T.v, C->rperm, Q, T->T.t, "TRSACT_sort: ARY_INVPERMUTE", EXIT);
370
+ ARY_INVPERMUTE_ (T->trperm, C->rperm, pp, T->T.t);
371
+ }
372
+ */
373
+ flag = (T->flag&(LOAD_SIZSORT+LOAD_WSORT)? ((T->flag&LOAD_DECROWSORT)? -1:1):0) *sizeof(QUEUE);
374
+ if ( flag ){ // sort rows for the case that some columns are not read
375
+ qsort_perm__VECt ((VEC *)T->T.v, T->T.t, C->rperm, flag);
376
+ ARY_INVPERMUTE (T->T.v, C->rperm, Q, T->T.t, EXIT);
377
+ ARY_INVPERMUTE_ (T->trperm, C->rperm, pp, T->T.t);
378
+ }
379
+
380
+ free2 (C->rperm); free2 (C->cperm);
381
+
382
+ if ( T->flag & LOAD_PERM ) flag = 1;
383
+ else flag = (T->flag&LOAD_INCSORT)? 1: ((T->flag&LOAD_DECSORT)? -1: 0);
384
+ if ( flag ) FLOOP (t, 0, T->T.t) qsort_QUEUE_INT (T->T.v[t].v, T->T.v[t].t, flag);
385
+ if ( T->flag & LOAD_RM_DUP ){
386
+ FLOOP (t, 0, T->T.t){ MQUE_RM_DUP (T->T.v[t]); T->T.v[t].v[T->T.v[t].t] = T->T.clms; }
387
+ }
388
+ ST_MAX (T->row_max, i, T->T.v, t, 0, T->T.t);
389
+
390
+ if ( T->flag&(TRSACT_ALLOC_OCC+TRSACT_SHRINK) ){
391
+ calloc2 (p, T->T.clms, EXIT);
392
+ QUEUE_delivery (NULL, p, NULL, T->T.v, NULL, T->T.t, T->T.clms);
393
+ ARY_MAX (T->clm_max, i, p, 0, T->T.clms);
394
+ MQUE_ALLOC (T->OQ, T->T.clms, p, T->occ_unit, 1, EXIT);
395
+ QUEUE_alloc (&T->OQ[T->T.clms], MAX(T->T.t, T->clm_max));
396
+ FLOOP (i, 0, T->T.clms+1) T->OQ[i].end = 0; // end is illegally set to 0, for the use in "TRSACT_find_same"
397
+ ARY_INIT_PERM (T->OQ[T->T.clms].v, T->T.t); // initial occurrence := all transactions
398
+ T->OQ[T->T.clms].t = T->T.t;
399
+ free (p);
400
+ }
401
+
402
+ // shrinking database
403
+ if ( T->flag&TRSACT_1ST_SHRINK ){
404
+ Q = T->OQ[T->T.clms];
405
+ T->OQ[T->T.clms].t = 0;
406
+ TRSACT_find_same (T, &Q, T->T.clms);
407
+ f = T->flag; // preserve the flag
408
+ BITRM (T->flag ,TRSACT_MAKE_NEW +TRSACT_UNION +TRSACT_INTSEC);
409
+ TRSACT_merge_trsact (T, &T->OQ[T->T.clms], T->T.clms); // just remove duplicated trsacts
410
+ T->flag = f; // recover flag
411
+ T->OQ[T->T.clms].t = 0;
412
+ FLOOP (t, 0, T->T.t) if ( T->mark[t]>0 ) QUE_INS(T->OQ[T->T.clms], t); // make resulted occ
413
+ }
414
+
415
+ // QUEUE_delivery (T->OQ, NULL, NULL, T->T.v, &T->OQ[T->T.clms], T->T.t, T->T.clms);
416
+ }
417
+
418
+ /*****************************************/
419
+ /* load transaction file and its weight */
420
+ /*****************************************/
421
+ void TRSACT_load (TRSACT *T){
422
+ FILE2 fp = INIT_FILE2, fp2 = INIT_FILE2;
423
+ FILE_COUNT C = INIT_FILE_COUNT;
424
+ VEC_ID t=0;
425
+ int f;
426
+
427
+ FILE2_open (fp, T->fname, "r", EXIT);
428
+ if ( T->fname2 ) FILE2_open (fp2, T->fname2, "r", EXIT);
429
+ TRSACT_file_count (T, &C, &fp, T->wfname); if (ERROR_MES) goto END;
430
+ T->end1 = T->rows_org;
431
+ if ( T->fname2 ) TRSACT_file_count (T, &C, &fp2, T->wfname2); if (ERROR_MES) goto END;
432
+ f = TRSACT_alloc (T, &C); if (ERROR_MES) goto END;
433
+ TRSACT_file_read (T, &fp, &C, &t, f); if (ERROR_MES) goto END;
434
+ if ( T->fname2 ) TRSACT_file_read (T, &fp2, &C, &t, f); if (ERROR_MES) goto END;
435
+ TRSACT_sort (T, &C, f);
436
+
437
+ END:;
438
+ FILE2_close (&fp);
439
+ FILE2_close (&fp2);
440
+ if (ERROR_MES) TRSACT_end (T); else TRSACT_prop_print (T);
441
+ return;
442
+ }
443
+
444
+ /* iteration of delivery; operate one transaction */
445
+ /* use OQ.end to count the number of items */
446
+ /* jump will be cleared (t := s) at the beginning */
447
+ void TRSACT_delivery_iter (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, VEC_ID t, QUEUE_INT m){
448
+ WEIGHT *y=0;
449
+ QUEUE_INT *x;
450
+ int f = T->flag&TRSACT_NEGATIVE;
451
+
452
+ if ( T->T.w ) y = T->T.w[t];
453
+ MQUE_MLOOP (T->T.v[t], x, m){
454
+ if ( T->OQ[*x].end == 0 ){ QUE_INS (*jump, *x); w[*x] = 0; if ( f ) pw[*x] = 0; }
455
+ T->OQ[*x].end++;
456
+ if ( y ){
457
+ w[*x] += *y; if ( *y>0 && f) pw[*x] += *y;
458
+ y++;
459
+ } else {
460
+ w[*x] += T->w[t]; if ( f ) pw[*x] += T->pw[t];
461
+ }
462
+ }
463
+ }
464
+
465
+ void TRSACT_delivery (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, QUEUE *occ, QUEUE_INT m){
466
+ VEC_ID i, t;
467
+ char *b = (char *)(occ?occ->v: NULL);
468
+ jump->t = jump->s;
469
+ FLOOP (i, occ?occ->s:0, occ?occ->t:T->T.t){
470
+ t = occ? *((QUEUE_INT *)b): i;
471
+ TRSACT_delivery_iter (T, jump, w, pw, t, m);
472
+ b += T->occ_unit;
473
+ }
474
+ }
475
+
476
+ /* usual delivery (make transpose) with checking sc
477
+ don't touch jump */
478
+ /* if (T->flag&TRSACT_DELIV_SC), do not stack to items e with non-zero T->sc[e] */
479
+ void TRSACT_deliv (TRSACT *T, QUEUE *occ, QUEUE_INT m){
480
+ VEC_ID i, t;
481
+ QUEUE_INT *x;
482
+ char *b = (char *)(occ?occ->v: NULL);
483
+ if ( T->flag&TRSACT_DELIV_SC ){
484
+ FLOOP (i, occ?occ->s:0, occ?occ->t:T->T.t){
485
+ t = occ? *((QUEUE_INT *)b): i;
486
+ MQUE_MLOOP (T->T.v[t], x, m)
487
+ if ( !T->sc[*x] ) QUE_INS (T->OQ[*x], t);
488
+ b += T->occ_unit;
489
+ }
490
+ } else {
491
+ FLOOP (i, occ?occ->s:0, occ?occ->t:T->T.t){
492
+ t = occ? *((QUEUE_INT *)b): i;
493
+ MQUE_MLOOP (T->T.v[t], x, m) QUE_INS (T->OQ[*x], t);
494
+ b += T->occ_unit;
495
+ }
496
+ }
497
+ }
498
+
499
+ /**************************************************************/
500
+ /* Find identical transactions in a subset of transactions, by radix-sort like method */
501
+ /* infrequent items (refer LCM_occ) and items larger than item_max are ignored */
502
+ /* INPUT: T:transactions, occ:subset of T represented by indices, result:array for output, end:largest item not to be ignored */
503
+ /* OUTPUT: if transactions i1, i2,..., ik are the same, they have same value in T->mark[i]
504
+ (not all) isolated transaction may have mark 1 */
505
+ /* use 0 to end-1 of T->mark, T->jump, T->shift and T->OQ temporary
506
+ T->OQ[i].t and OQ[i].s have to be 0. */
507
+ /*************************************************************************/
508
+ void TRSACT_find_same (TRSACT *T, QUEUE *occ, QUEUE_INT end){
509
+ VEC_ID mark=2, t_end;
510
+ QUEUE *o=occ, *Q = T->T.v, *EQ, *QQ = T->OQ;
511
+ QUEUE_INT *x, *y, e;
512
+ QUEUE_ID ot = occ->t;
513
+
514
+ // initialization
515
+ MQUE_FLOOP (*occ, x){ T->mark[*x] = mark; T->shift[*x] = Q[*x].v; }
516
+ T->jump.t = T->jump.s; QQ[T->T.clms].s = 0;
517
+
518
+ while (1){
519
+ if ( o->t - o->s == 1 ) T->mark[o->v[--o->t]] = 1; // no same transactions; mark by 1
520
+ if ( o->t == 0 ) goto END;
521
+ // if previously inserted transactions are in different group, then change their marks with incrementing mark by one
522
+ mark++; for (x=&o->v[o->s] ; x < &o->v[o->t] ; x++) T->mark[*x] = mark;
523
+ t_end = o->t;
524
+ o->s = o->t = 0;
525
+
526
+ // insert each t to buckets
527
+ for (x=o->v ; x<o->v+t_end ; x++){
528
+ // get next item in transaction t
529
+ do {
530
+ e = *(T->shift[*x]);
531
+ T->shift[*x]++;
532
+ if ( e >= end ){ e = T->T.clms; break; }
533
+ } while ( T->sc[e] );
534
+ EQ = &QQ[e];
535
+ // if previously inserted transactions are in different group, then change their mark to the transaction ID of top transacion.
536
+ y = &(EQ->v[EQ->s]);
537
+ if ( EQ->s < EQ->t && T->mark[*y] != T->mark[*x] ){
538
+ if ( EQ->t - EQ->s == 1 ) T->mark[EQ->v[--EQ->t]] = 1; // the tail of the queue has no same transaction; mark the tail by 1
539
+ else {
540
+ mark++; for ( ; y< EQ->v + EQ->t ; y++) T->mark[*y] = mark;
541
+ EQ->s = EQ->t;
542
+ }
543
+ } else if ( EQ->t == 0 && e<T->T.clms ) QUE_INS (T->jump, e);
544
+ QUE_INS (*EQ, *x); // insert t to bucket of e
545
+ }
546
+ END:;
547
+ if ( QUEUE_LENGTH_(T->jump) == 0 ) break;
548
+ o = &QQ[QUEUE_ext_tail_ (&T->jump)];
549
+ }
550
+
551
+ // same transactions are in queue of item_max
552
+ if ( QQ[T->T.clms].t -QQ[T->T.clms].s == 1 ) T->mark[QQ[T->T.clms].v[--QQ[T->T.clms].t]] = 1;
553
+ if ( occ != &QQ[T->T.clms] ) occ->t = ot;
554
+ }
555
+
556
+
557
+ /****************************************************************************/
558
+ /* copy transaction t to tt (only items i s.t. sc[i]==0) **/
559
+ /* T->w has to be allocated. itemweight will be alocated even if T->w[t] == NULL */
560
+ /****************************************************************************/
561
+ void TRSACT_copy (TRSACT *T, VEC_ID tt, VEC_ID t, QUEUE_INT end){
562
+ QUEUE_INT *x, *buf;
563
+ WEIGHT *wbuf = NULL, tw = T->w[t], *w = T->T.w? T->T.w[t]: NULL;
564
+ int bnum = T->buf.num, bblock = T->buf.block_num, wflag = (w || (T->flag&TRSACT_UNION));
565
+
566
+ buf = (QUEUE_INT *)BASE_get_memory (&T->buf, T->T.v[t].t+1);
567
+ if ( ERROR_MES ) return;
568
+ if ( wflag ) T->T.w[tt] = wbuf = (WEIGHT *)BASE_get_memory (&T->wbuf, T->T.v[t].t+1);
569
+ if ( ERROR_MES ){ T->buf.num = bnum; T->buf.block_num = bblock; return; }
570
+ T->T.v[tt].v = buf;
571
+ T->w[tt] = T->w[t];
572
+ if ( T->flag&TRSACT_NEGATIVE ) T->pw[tt] = T->pw[t];
573
+ MQUE_MLOOP (T->T.v[t], x, end){
574
+ if ( !T->sc[*x] ){
575
+ *buf = *x; buf++;
576
+ if ( wflag ){ *wbuf = w? *w: tw; wbuf++; }
577
+ }
578
+ if ( w ) w++;
579
+ }
580
+ T->T.v[tt].t = (VEC_ID)(buf - T->T.v[tt].v);
581
+ *buf = T->T.clms;
582
+ T->buf.num = (int)(buf - ((QUEUE_INT *)T->buf.base[T->buf.block_num]) + 1);
583
+ if ( wflag ) T->wbuf.num = (int)(wbuf - ((WEIGHT *)T->wbuf.base[T->wbuf.block_num]) + 1);
584
+ }
585
+
586
+ /****************************************************************************/
587
+ /* intersection of transaction t and tt (only items i s.t. sc[i]==0) **/
588
+ /* shift is the array of pointers indicates the start of each transaction **/
589
+ /****************************************************************************/
590
+ void TRSACT_suffix_and (TRSACT *T, VEC_ID tt, VEC_ID t){
591
+ QUEUE_INT *x=T->shift[tt], *y=T->shift[t], *xx=T->shift[tt];
592
+ while ( *x < T->T.clms && *y < T->T.clms ){
593
+ if ( *x > *y ) y++;
594
+ else {
595
+ if ( *x == *y ){
596
+ if ( !T->sc[*x] ){ *xx = *x; xx++; }
597
+ y++;
598
+ }
599
+ x++;
600
+ }
601
+ }
602
+ T->T.v[tt].t = (VEC_ID)(xx - T->T.v[tt].v);
603
+ *xx = T->T.clms;
604
+ T->buf.num = (int)(xx - ((QUEUE_INT *)T->buf.base[T->buf.block_num]) + 1);
605
+ }
606
+
607
+
608
+ /***************************************************************************/
609
+ /* take union of transaction t to tt (only items i s.t. pw[i]>=th) */
610
+ /* CAUSION: t has to be placed at the last of trsact_buf2. */
611
+ /* if the size of t inclreases, the following memory will be overwrited */
612
+ /* if memory (T->buf, T->wbuf) is short, do nothing and return 1 */
613
+ /* T->T.w[t] can be NULL, but T->T.w[x] can not */
614
+ /***************************************************************************/
615
+ void TRSACT_itemweight_union (TRSACT *T, VEC_ID tt, VEC_ID t){
616
+ int bnum = T->buf.num, bblock = T->buf.block_num;
617
+ QUEUE_ID siz = T->T.v[tt].t +T->T.v[t].t;
618
+ QUEUE_INT *xx_end = T->T.v[tt].v + siz, *xx = xx_end;
619
+ QUEUE_INT *x = T->T.v[tt].v + T->T.v[tt].t-1, *y = T->T.v[t].v + T->T.v[t].t-1;
620
+ WEIGHT *ww = T->T.w[tt] +siz, *wx = T->T.w[tt] +T->T.v[tt].t-1, *wy = T->T.w[t] +T->T.v[t].t-1;
621
+ WEIGHT tw = T->w[t];
622
+ int flag=0, wf = (T->T.w[t]!=NULL);
623
+
624
+ // if sufficiently large memory can not be taken from the current memory block, use the next block
625
+ if ( xx_end >= (QUEUE_INT *)T->buf.base[T->buf.block_num] +T->buf.block_siz ){
626
+ xx_end = xx = ((QUEUE_INT*)BASE_get_memory (&T->buf, T->buf.block_siz)) +siz;
627
+ if (ERROR_MES) return;
628
+ ww = ((WEIGHT *)BASE_get_memory (&T->wbuf, T->wbuf.block_siz)) +siz;
629
+ if ( ERROR_MES ){ T->buf.num = bnum; T->buf.block_num = bblock; return; }
630
+ flag =1;
631
+ }
632
+ if ( ERROR_MES ) return;
633
+
634
+ // take union and store it in the allocated memory
635
+ while ( x >= T->T.v[tt].v && y >= T->T.v[t].v ){
636
+ if ( *x > *y ){
637
+ if ( !T->sc[*x] ){ *xx = *x; *ww = *wx; xx--; ww--; }
638
+ x--; wx--;
639
+ if ( x < T->T.v[tt].v ){
640
+ while ( y >= T->T.v[t].v ){
641
+ if ( !T->sc[*y] ){ *xx = *y; *ww = wf? *wy: tw; xx--; ww--; }
642
+ y--; wy--;
643
+ }
644
+ }
645
+ } else {
646
+ if ( !T->sc[*y] ){
647
+ *ww = wf? *wy: tw; *xx = *y;
648
+ if ( *x == *y ){ *ww += *wx; x--; wx--; }
649
+ xx--; ww--;
650
+ }
651
+ y--; wy--;
652
+ if ( y < T->T.v[t].v ){
653
+ while ( x >= T->T.v[tt].v ){
654
+ if ( !T->sc[*x] ){ *xx = *x; *ww = *wx; xx--; ww--; }
655
+ x--; wx--;
656
+ }
657
+ }
658
+ }
659
+ }
660
+ T->T.v[tt].t = (VEC_ID)(xx_end -xx);
661
+
662
+ // if [tt].v will overflow, set [tt].v to the top of next memory block
663
+ if ( flag ){
664
+ if ( T->T.v[tt].v + T->T.v[tt].t+1 >= (QUEUE_INT *)T->buf.base[T->buf.block_num-1] +T->buf.block_siz ){
665
+ T->T.v[tt].v = (QUEUE_INT *)T->buf.base[T->buf.block_num];
666
+ T->T.w[tt] = (WEIGHT *)T->wbuf.base[T->wbuf.block_num];
667
+ } else { // new memory block is allocated, but the transaction fits in the previous block
668
+ T->buf.block_num--;
669
+ T->wbuf.block_num--;
670
+ }
671
+ }
672
+
673
+ // copy the union to the original position
674
+ for ( x=T->T.v[tt].v,wx=T->T.w[tt] ; xx<xx_end ; ){
675
+ xx++; ww++;
676
+ *x = *xx; *wx = *ww;
677
+ x++; wx++;
678
+ }
679
+ *x = T->T.clms;
680
+ T->wbuf.num = T->buf.num = (int)(x - ((QUEUE_INT *)T->buf.base[T->buf.block_num]) +1);
681
+ return;
682
+ }
683
+
684
+
685
+
686
+ /*****/
687
+ /* merge duplicated transactions in occ according to those having same value in T->mark
688
+ the mark except for the representative will be zero, for each group of the same transactions
689
+ the mark of the representative will be its (new) ID +2 (one is for identical transaction) */
690
+ /* T->flag&TRSACT_MAKE_NEW: make new trsact for representative
691
+ T->flag&TRSACT_INTSEC: take suffix intersection of the same trsacts
692
+ T->flag&TRSACT_UNION: take union of the same trsacts */
693
+ /* o will be cleard after the execution */
694
+ void TRSACT_merge_trsact (TRSACT *T, QUEUE *o, QUEUE_INT end){
695
+ VEC_ID mark = 0, tt=0;
696
+ QUEUE_INT *x;
697
+
698
+ MQUE_FLOOP (*o, x){
699
+ if ( mark == T->mark[*x] ){
700
+ T->mark[*x] = 0; // mark of unified (deleted) transaction
701
+ T->w[tt] += T->w[*x]; if ( T->pw ) T->pw[tt] += T->pw[*x];
702
+ if ( T->flag & TRSACT_INTSEC ){
703
+ TRSACT_suffix_and (T, tt, *x);
704
+ T->buf.num = (int)(T->T.v[tt].v - (QUEUE_INT *)T->buf.base[T->buf.block_num] +T->T.v[tt].t +1);
705
+ }
706
+ if ( T->flag & TRSACT_UNION ){
707
+ TRSACT_itemweight_union (T, tt, *x);
708
+ if ( ERROR_MES ) T->mark[*x] = *x+2; // do not merge if not enough memory
709
+ }
710
+ }
711
+ if ( mark != T->mark[*x] && T->mark[*x] > 1 ){ // *x is not the same to the previous, or memory short
712
+ mark = T->mark[*x];
713
+ if ( T->flag&TRSACT_MAKE_NEW ){
714
+ tt = T->new_t++;
715
+ TRSACT_copy (T, tt, *x, (T->flag&(TRSACT_INTSEC+TRSACT_UNION))? T->T.clms: end);
716
+ if ( ERROR_MES ){ T->new_t--; tt = *x; }
717
+ else for (T->shift[tt]=T->T.v[tt].v ; *(T->shift[tt])<end ; T->shift[tt]++);
718
+ } else tt = *x;
719
+ T->mark[*x] = tt+2;
720
+ }
721
+ }
722
+ o->t = o->s = 0;
723
+ }
724
+
725
+ /* remove the unified transactions from occ (consider T->occ_unit) */
726
+ void TRSACT_reduce_occ (TRSACT *T, QUEUE *occ){
727
+ QUEUE_INT *x, *y=occ->v;
728
+ QUEUE_ID i=0;
729
+ if ( T->occ_unit == sizeof(QUEUE_INT) ){
730
+ MQUE_FLOOP (*occ, x){
731
+ if ( T->mark[*x] == 0 ) continue;
732
+ *y = T->mark[*x]>1? T->mark[*x]-2: *x;
733
+ y++; i++;
734
+ }
735
+ } else {
736
+ MQUE_FLOOP_ (*occ, x, T->occ_unit){
737
+ if ( T->mark[*x] == 0 ) continue;
738
+ memcpy (y, x, T->occ_unit);
739
+ *y = T->mark[*x]>1? T->mark[*x]-2: *x;
740
+ y = (QUEUE_INT *)(((char *)y)+T->occ_unit);
741
+ i++;
742
+ }
743
+ }
744
+ occ->t = i;
745
+ }
746
+
747
+ #endif