nysol-zdd 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/ext/zdd_so/BDD.cc +495 -0
  3. data/ext/zdd_so/BDD.h +356 -0
  4. data/ext/zdd_so/BDDDG.cc +1818 -0
  5. data/ext/zdd_so/BDDDG.h +107 -0
  6. data/ext/zdd_so/BDDHASH.cc +91 -0
  7. data/ext/zdd_so/BtoI.cc +503 -0
  8. data/ext/zdd_so/BtoI.h +144 -0
  9. data/ext/zdd_so/CtoI.cc +1072 -0
  10. data/ext/zdd_so/CtoI.h +186 -0
  11. data/ext/zdd_so/MLZBDDV.cc +153 -0
  12. data/ext/zdd_so/MLZBDDV.h +42 -0
  13. data/ext/zdd_so/SOP.cc +608 -0
  14. data/ext/zdd_so/SOP.h +199 -0
  15. data/ext/zdd_so/ZBDD.cc +1035 -0
  16. data/ext/zdd_so/ZBDD.h +243 -0
  17. data/ext/zdd_so/ZBDDDG.cc +1834 -0
  18. data/ext/zdd_so/ZBDDDG.h +105 -0
  19. data/ext/zdd_so/ZBDDHASH.cc +91 -0
  20. data/ext/zdd_so/bddc.c +2816 -0
  21. data/ext/zdd_so/bddc.h +132 -0
  22. data/ext/zdd_so/extconf.rb +25 -0
  23. data/ext/zdd_so/include/aheap.c +211 -0
  24. data/ext/zdd_so/include/aheap.h +111 -0
  25. data/ext/zdd_so/include/base.c +93 -0
  26. data/ext/zdd_so/include/base.h +60 -0
  27. data/ext/zdd_so/include/itemset.c +473 -0
  28. data/ext/zdd_so/include/itemset.h +153 -0
  29. data/ext/zdd_so/include/problem.c +371 -0
  30. data/ext/zdd_so/include/problem.h +160 -0
  31. data/ext/zdd_so/include/queue.c +518 -0
  32. data/ext/zdd_so/include/queue.h +177 -0
  33. data/ext/zdd_so/include/sgraph.c +331 -0
  34. data/ext/zdd_so/include/sgraph.h +170 -0
  35. data/ext/zdd_so/include/stdlib2.c +832 -0
  36. data/ext/zdd_so/include/stdlib2.h +746 -0
  37. data/ext/zdd_so/include/trsact.c +723 -0
  38. data/ext/zdd_so/include/trsact.h +167 -0
  39. data/ext/zdd_so/include/vec.c +583 -0
  40. data/ext/zdd_so/include/vec.h +159 -0
  41. data/ext/zdd_so/lcm-vsop.cc +596 -0
  42. data/ext/zdd_so/print.cc +683 -0
  43. data/ext/zdd_so/table.cc +330 -0
  44. data/ext/zdd_so/vsop.h +88 -0
  45. data/ext/zdd_so/zdd_so.cpp +3277 -0
  46. data/lib/nysol/zdd.rb +31 -0
  47. metadata +131 -0
@@ -0,0 +1,723 @@
1
+ /* QUEUE based Transaction library, including database reduction.
2
+ 25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, do not forget to
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about the codes for the users.
11
+ For the commercial use, please make a contact to Takeaki Uno. */
12
+
13
+ #ifndef _trsact_c_
14
+ #define _trsact_c_
15
+
16
+ // #define WEIGHT_DOUBLE
17
+
18
+ #include"trsact.h"
19
+ #include"base.c"
20
+ #include"vec.c"
21
+
22
+ /***********************************/
23
+ /* print transactions */
24
+ /***********************************/
25
+ void TRSACT_print (TRSACT *T, QUEUE *occ, PERM *p){
26
+ VEC_ID i, t;
27
+ QUEUE_ID j;
28
+ QUEUE_INT e;
29
+ FLOOP (i, 0, occ? occ->t: T->T.t){
30
+ t = occ? *((QUEUE_INT *)(&(((char *)(occ->v))[i*T->occ_unit]))): i;
31
+ if ( occ ) printf (QUEUE_INTF "::: ", t);
32
+ ARY_FLOOP (T->T.v[t], j, e){
33
+ printf (QUEUE_INTF, p? p[e]: e);
34
+ if ( T->T.w ) printf ("(" WEIGHTF ")", T->T.w[t][j]);
35
+ printf (",");
36
+ }
37
+ if ( T->w ) printf (" :" WEIGHTF " ", T->w[t]);
38
+ printf (" (" QUEUE_INTF ")\n", T->T.v[t].end);
39
+ }
40
+ }
41
+
42
+ /*
43
+ void TRSACT_prop_print (TRSACT *T){
44
+ print_err ("trsact: %s", P->trsact_fname);
45
+ if ( P->trsact2_fname2 ) print_err (" ,2nd-trsact2 %s (from ID %d)", P->trsact_fname2, P->TT.end1);
46
+ print_err (" ,#transactions %d ,#items %d ,size %zd", P->TT.rows_org, P->TT.clms_org, P->TT.eles_org);
47
+ print_err (" extracted database: #transactions %d ,#items %d ,size %zd", P->TT.T.t, P->TT.T.clms, P->TT.T.eles);
48
+ if ( P->trsact_wfname ) print_err (" ,weightfile %s", P->trsact_wfname);
49
+ if ( P->trsact_wfname2 ) print_err (" ,2nd-weightfile %s", P->trsact_wfname2);
50
+ if ( P->trsact_pfname ) print_err (" ,item-order-file %s", P->trsact_pfname);
51
+ print_err ("\n");
52
+ }
53
+ */
54
+
55
+ /* initialization of structure TRSACT */
56
+ void TRSACT_init (TRSACT *T){
57
+ T->type = TYPE_TRSACT;
58
+ T->flag = 0;
59
+ T->T = INIT_SETFAMILY;
60
+ T->clms_org = T->clm_max = T->clms_end = T->non_empty_clms = 0;
61
+ T->rows_org = T->row_max = T->end1 = T->sep = 0;
62
+ T->perm = NULL;
63
+ T->trperm = NULL;
64
+ T->w = T->pw = NULL;
65
+
66
+ T->clm_lb = 0;
67
+ T->clm_ub = VEC_ID_END;
68
+ T->row_lb = 0;
69
+ T->row_ub = QUEUE_IDHUGE;
70
+ T->w_lb = -WEIGHTHUGE; T->w_ub = WEIGHTHUGE;
71
+
72
+ T->eles_org = 0;
73
+ T->total_w = T->total_pw = T->total_w_org = T->total_pw_org =0;
74
+
75
+ T->jump = INIT_QUEUE;
76
+ T->str_num = 0;
77
+ T->head = T->strID = NULL;
78
+
79
+ T->th = 1;
80
+ T->mark = NULL;
81
+ T->shift = NULL;
82
+ T->occ_unit = sizeof(QUEUE_INT);
83
+ T->OQ = NULL;
84
+ T->sc = NULL;
85
+
86
+ T->new_t = 0;
87
+ T->buf = INIT_BASE;
88
+ T->wbuf = INIT_BASE;
89
+ }
90
+
91
+ /**************************************************************/
92
+ void TRSACT_end (TRSACT *T){
93
+ if ( T->OQ ){ free2 (T->OQ->v ); free2 (T->OQ[T->T.clms].v); }
94
+ free2 (T->T.w);
95
+ SETFAMILY_end (&T->T);
96
+ if ( T->w != T->pw ) free2 (T->pw);
97
+ mfree (T->w, T->perm, T->trperm);
98
+ mfree (T->mark, T->shift, T->sc, T->OQ, T->head, T->strID);
99
+ QUEUE_end (&T->jump);
100
+ BASE_end (&T->buf);
101
+ BASE_end (&T->wbuf);
102
+ TRSACT_init (T);
103
+ }
104
+
105
+ /*****************************************/
106
+ /* scan file "fp" with weight file wfp and count #items, #transactions in the file. */
107
+ /* count weight only if wfp!=NULL */
108
+ /* T->rows_org, clms_org, eles_org := #items, #transactions, #all items */
109
+ /* ignore the transactions of size not in range T->clm_lb - clm_ub */
110
+ /* T->total_w, total_pw := sum of (positive) weights of transactions */
111
+ /* C->clmt[i],C->cw[i] := the number/(sum of weights) of transactions including i */
112
+ /****************************************/
113
+ void TRSACT_file_count (TRSACT *T, FILE_COUNT *C, FILE2 *fp, char *wf){
114
+ QUEUE_INT i, item, kk=0, k, jump_end=0;
115
+ WEIGHT w, s;
116
+ VEC_ID *jump=NULL;
117
+ FILE2 wfp;
118
+
119
+ if ( wf ){
120
+ FILE2_open (wfp, wf, "r", "TRSACT_file_count:weight file", goto ERR);
121
+ ARY_SCAN (kk, WEIGHT, wfp, 1);
122
+ kk += T->rows_org;
123
+ realloc2 (C->rw, kk+1, "TRSACT_file_count: C->rw", goto ERR);
124
+ FILE2_reset (&wfp);
125
+ ARY_READ (C->rw, double, kk, wfp);
126
+ ARY_MIN (w, i, C->rw, 0, kk);
127
+ if ( w<0 ) T->flag |= TRSACT_NEGATIVE;
128
+ FILE2_close (&wfp);
129
+ }
130
+ do {
131
+ s=0;
132
+ k=0;
133
+ w = wf? (T->rows_org<kk? C->rw[T->rows_org]: TRSACT_DEFAULT_WEIGHT): 1;
134
+ do {
135
+ item = (QUEUE_INT)FILE2_read_int (fp);
136
+ if ( (FILE_err&4)==0 ){
137
+ ENMAX (T->clms_org, item+1); // update #items
138
+ reallocx (jump, jump_end, k, 0, "TRSACT_file_count: jump", goto ERR);
139
+ jump[k] = item;
140
+ k++;
141
+ s += wf? (item<kk? MAX(C->rw[item],0): TRSACT_DEFAULT_WEIGHT): 1;
142
+
143
+ // count/weight-sum for the transpose mode
144
+ reallocx (C->clmt, C->clm_end, item, 0, "TRSACT_file_count:clmt",goto ERR);
145
+ C->clmt[item]++;
146
+ if ( !(T->flag&LOAD_TPOSE) ){
147
+ reallocx (C->cw, C->cw_end, item, 0, "TRSACT_file_count: cw", goto ERR);
148
+ C->cw[item] += MAX(w,0); // sum up positive weights
149
+ }
150
+ }
151
+ } while ( (FILE_err&3)==0);
152
+
153
+ // count/weight-sum for the transpose mode
154
+ reallocx (C->rowt, C->row_end, T->rows_org, 0, "TRSACT_file_count:rowt", goto ERR);
155
+ C->rowt[T->rows_org] = k;
156
+ if ( T->flag&LOAD_TPOSE ){
157
+ reallocx (C->cw, C->cw_end, T->rows_org, 0, "TRSACT_file_count: cw", goto ERR);
158
+ C->cw[T->rows_org] = s; // sum up positive weights
159
+ }
160
+ if ( k==0 && FILE_err&2 ) break;
161
+ T->rows_org++; // increase #transaction
162
+
163
+ if ( !wf ) s = k; // un-weighted case; weighted sum is #included-items
164
+ if ( k==0 ){
165
+ T->str_num++; // increase #streams if empty transaction is read
166
+ } else {
167
+ T->eles_org += k;
168
+ if ( (!(T->flag&LOAD_TPOSE) && !RANGE (T->row_lb, k, T->row_ub))
169
+ || ((T->flag&LOAD_TPOSE) && (!RANGE(T->w_lb, s, T->w_ub) || !RANGE (T->clm_lb, k, T->clm_ub)) ) ) FLOOP (i, 0, k) C->clmt[jump[i]]--;
170
+ }
171
+ } while ( (FILE_err&2)==0);
172
+ free2 (jump);
173
+ // swap the variables in transpose mode
174
+ if ( C->rw == NULL ){ T->total_w_org = T->total_pw_org = T->rows_org; return; }
175
+ C->clm_btm = MIN(kk, T->rows_org);
176
+ reallocx (C->rw, kk, T->rows_org, TRSACT_DEFAULT_WEIGHT, "TRSACT_file_count: rw", goto ERR);
177
+ FLOOP (k, 0, T->rows_org){
178
+ T->total_w_org += C->rw[k];
179
+ T->total_pw_org += MAX(C->rw[k],0);
180
+ }
181
+ return;
182
+ ERR:;
183
+ mfree (C->rw, C->cw, C->clmt, C->rowt, jump);
184
+ EXIT;
185
+ }
186
+
187
+ /* allocate memory, set permutation, and free C.clmt,rowt,rw,cw */
188
+ int TRSACT_alloc (TRSACT *T, char *pfname, FILE_COUNT *C){
189
+ VEC_ID t, tt=0, ttt=T->clms_org, ttt_max = ttt, h, flag, org;
190
+ FILE_COUNT_INT *ct;
191
+ size_t s=0;
192
+ PERM *q, *p=NULL;
193
+ char *buf;
194
+
195
+ // swap variables in the case of transpose
196
+ if ( T->flag & LOAD_TPOSE ){
197
+ common_QUEUE_INT = T->clms_org; T->clms_org = (QUEUE_INT)T->rows_org; T->rows_org = (VEC_ID)common_QUEUE_INT;
198
+ SWAP_PNT (C->clmt, C->rowt);
199
+ }
200
+
201
+ if ( T->flag&TRSACT_SHRINK ) T->flag |= LOAD_DBLBUF;
202
+ // count valid columns/elements
203
+
204
+ if ( pfname && !(T->flag&TRSACT_WRITE_PERM) ){
205
+ ARY_LOAD (p, QUEUE_INT, ttt, pfname, 1, "TRSACT_load: item order file", EXIT0);
206
+ ARY_MAX (ttt_max, tt, p, 0, ttt);
207
+ // ENMAX (T->clms_org, ttt_max+1);
208
+ T->T.clms = ttt_max+1;
209
+ } else {
210
+ if ( T->flag&LOAD_PERM ){
211
+ if ( T->flag&TRSACT_FRQSORT )
212
+ p = qsort_perm_WEIGHT (C->cw, T->clms_org, (T->flag&LOAD_INCSORT)?1:-1);
213
+ else p = qsort_perm_FILE_COUNT_INT (C->clmt, T->clms_org, (T->flag&LOAD_INCSORT)?1:-1);
214
+ }
215
+ if ( pfname ) ARY_WRITE (pfname, p, T->clms_org, PERMF " ", "TRSACT_alloc: item-order output", EXIT0);
216
+ }
217
+ T->clms_end = MAX (T->clms_org, T->T.clms);
218
+
219
+ malloc2 (C->cperm, T->clms_org+1, "TRSACT_alloc: cperm", EXIT0);
220
+ ARY_FILL (C->cperm, 0, T->clms_org, T->clms_org+1);
221
+ FLOOP (t, 0, ttt){
222
+ tt = p? p[t]: t;
223
+ if ( tt >= T->clms_org ) continue;
224
+ if ( RANGE(T->w_lb, C->cw[tt], T->w_ub) && RANGE (T->clm_lb, C->clmt[tt], T->clm_ub)){
225
+ s += C->clmt[tt];
226
+ C->cperm[tt] = (pfname && !(T->flag&TRSACT_WRITE_PERM))? t: T->T.clms++;
227
+ T->non_empty_clms++;
228
+ } else C->cperm[tt] = T->clms_end+1;
229
+ }
230
+ free2 (p);
231
+
232
+ // count valid rows/elements
233
+ if ( T->flag&(LOAD_SIZSORT+LOAD_WSORT) ){
234
+ if ( T->flag&LOAD_WSORT && C->rw )
235
+ p = qsort_perm_WEIGHT (C->rw, T->rows_org, (T->flag&LOAD_DECROWSORT)?-1:1);
236
+ else p = qsort_perm_FILE_COUNT_INT (C->rowt, T->rows_org, (T->flag&LOAD_DECROWSORT)?-1:1);
237
+ }
238
+ malloc2 (C->rperm, T->rows_org, "TRSACT_alloc: rperm", EXIT0);
239
+ FLOOP (t, 0, T->rows_org){ // compute #elements according to rowt, and set rperm
240
+ tt = p? p[t]: t;
241
+ if ( RANGE (T->row_lb, C->rowt[tt], T->row_ub) ){
242
+ C->rperm[tt] = T->T.t++;
243
+ T->T.eles += C->rowt[t];
244
+ } else C->rperm[tt] = T->rows_org+1;
245
+ }
246
+
247
+ free2 (p); free2 (C->cw);
248
+ flag = (T->T.eles > s && !(T->flag & LOAD_TPOSE) );
249
+ if ( flag ) T->T.eles = s;
250
+
251
+ T->T.end = T->T.t * ((T->flag&LOAD_DBLBUF)? 2: 1)+1;
252
+ malloc2 (T->w, T->T.end, "TRSACT_alloc: T->w", EXIT0);
253
+ if ( TRSACT_NEGATIVE ) malloc2 (T->pw, T->T.end, "TRSACT_alloc: T->pw", EXIT0);
254
+ else T->pw = NULL;
255
+ malloc2 (T->trperm, T->T.t, "TRSACT_alloc: T->trperm", EXIT0);
256
+ malloc2 (T->T.v, T->T.end, "TRSACT_alloc: T->T.v", EXIT0);
257
+ malloc2 (buf, (T->T.eles+T->T.end+1)*T->T.unit, "TRSACT_alloc: T->T.buf", EXIT0);
258
+ T->T.buf = (QUEUE_INT *)buf;
259
+ calloc2 (T->perm, T->T.clms+1, "TRSACT_alloc: T->perm", EXIT0);
260
+ QUEUE_alloc (&T->jump, T->T.clms+1);
261
+ BASE_alloc (&T->buf, sizeof(QUEUE_INT), MAX((int)T->row_max*4,(int)(T->T.eles+T->T.end+1)/10+100));
262
+ BASE_alloc (&T->wbuf, sizeof(WEIGHT), MAX((int)T->row_max*4, (int)(T->T.eles+T->T.end+1)/10+100));
263
+ if ( T->flag&TRSACT_SHRINK ){
264
+ malloc2 (T->mark, T->T.end, "TRSACT_alloc: mark", EXIT0);
265
+ malloc2 (T->shift, T->T.end, "TRSACT_alloc: shift", EXIT0);
266
+ calloc2 (T->sc, T->T.clms, "TRSACT_alloc: sc", EXIT0);
267
+ }
268
+ if ( T->flag&TRSACT_MULTI_STREAM ){
269
+ malloc2 (T->head, T->str_num+2, "TRSACT_alloc: haed", EXIT0);
270
+ malloc2 (T->strID, (T->flag&LOAD_TPOSE)?T->T.clms:T->T.end, "TRSACT_alloc:ID", EXIT0);
271
+ }
272
+ if ( T->flag&TRSACT_UNION )
273
+ calloc2 (T->T.w, T->T.end, "TRSACT_alloc: T->T.w", EXIT0);
274
+
275
+ if ( ERROR_MES ) return(0);
276
+
277
+ // set variables w.r.t rows
278
+ tt=0; FLOOP (t, 0, T->rows_org){
279
+ if ( C->rperm[t] <= T->rows_org ){
280
+ T->T.v[tt] = INIT_QUEUE;
281
+ T->trperm[tt] = t;
282
+ C->rperm[t] = tt;
283
+ T->w[tt] = C->rw? C->rw[t]: 1;
284
+ if ( T->pw ) T->pw[tt] = MAX (T->w[tt], 0);
285
+ if ( !flag ){
286
+ T->T.v[tt].v = (QUEUE_INT *)buf;
287
+ buf += (C->rowt[t]+1)*T->T.unit;
288
+ }
289
+ tt++;
290
+ }
291
+ }
292
+ free2 (C->rw);
293
+ // make the inverse perm of items
294
+ FLOOP (t, 0, T->clms_org)
295
+ if ( C->cperm[t] <= T->clms_end ) T->perm[C->cperm[t]] = t;
296
+
297
+ // set head of each stream, and stream ID of each transaction
298
+ if ( T->flag&TRSACT_MULTI_STREAM ){
299
+ malloc2 (T->head, T->str_num+2, "TRSACT_alloc: haed", EXIT0);
300
+ malloc2 (T->strID, (T->flag&LOAD_TPOSE)?T->T.clms:T->T.end, "TRSACT_alloc:ID", EXIT0);
301
+ }
302
+ org = (T->flag&LOAD_TPOSE)? T->clms_org: T->rows_org;
303
+ q = (T->flag&LOAD_TPOSE)? C->cperm: C->rperm;
304
+ ct = (T->flag&LOAD_TPOSE)? C->clmt: C->rowt;
305
+ h=1; tt=0; FLOOP (t, 0, org){
306
+ if ( q[t] <= org ){
307
+ if ( t == T->end1 && T->sep==0 ) T->sep = tt;
308
+ if ( t == T->sep && T->sep>0 ) T->sep = tt;
309
+ if ( T->strID ) T->strID[tt] = h;
310
+ tt++;
311
+ }
312
+ if ( T->head && ct[t]==0 ) T->head[h++] = tt+1;
313
+ }
314
+
315
+ T->new_t = T->T.t;
316
+ free2 (C->rowt); free2 (C->clmt);
317
+ return ( flag );
318
+ }
319
+
320
+
321
+ /* load the file to allocated memory according to permutation, and free C.rw, C.cw */
322
+ void TRSACT_file_read (TRSACT *T, FILE2 *fp, FILE_COUNT *C, VEC_ID *t, int flag){
323
+ QUEUE_INT item;
324
+
325
+ FILE2_reset (fp);
326
+ do {
327
+ if ( flag ) T->T.v[*t].v = *t? T->T.v[*t-1].v + T->T.v[*t-1].t +1: T->T.buf;
328
+ do {
329
+ item = (QUEUE_INT)FILE2_read_int (fp);
330
+ if ( (FILE_err&4)==0 ){
331
+ // printf ("%d %d %d %d\n", C->rperm[*t], T->rows_org, C->cperm[item], T->clms_org );
332
+ if ( T->flag&LOAD_TPOSE ){
333
+ if ( C->rperm[item]<=T->rows_org && C->cperm[*t]<=T->clms_end )
334
+ ARY_INS (T->T.v[ C->rperm[item] ], C->cperm[*t]);
335
+ } else if ( C->rperm[*t]<=T->rows_org && C->cperm[item]<=T->clms_end )
336
+ ARY_INS (T->T.v[ C->rperm[*t] ], C->cperm[item]);
337
+ }
338
+ } while ( (FILE_err&3)==0);
339
+ (*t)++;
340
+ } while ( (FILE_err&2)==0 );
341
+ }
342
+
343
+ /* sort the transactions and items according to the flag, allocate OQ, and database reduction */
344
+ void TRSACT_sort (TRSACT *T, FILE_COUNT *C, int flag){
345
+ VEC_ID t, *p;
346
+ int f;
347
+ PERM pp;
348
+ QUEUE Q;
349
+ QUEUE_ID i;
350
+
351
+ FLOOP (t, 0, T->T.t)
352
+ T->T.v[t].v[T->T.v[t].t] = T->T.clms;
353
+ if ( flag )
354
+ flag = (T->flag&(LOAD_SIZSORT+LOAD_WSORT)? ((T->flag&LOAD_DECROWSORT)? -1:1):0) *sizeof(QUEUE);
355
+ if ( flag ){ // sort rows for the case that some columns are not read
356
+ qsort_perm__VECt ((VEC *)T->T.v, T->T.t, C->rperm, flag);
357
+ ARY_INVPERMUTE (T->T.v, C->rperm, Q, T->T.t, "TRSACT_sort: ARY_INVPERMUTE", EXIT);
358
+ ARY_INVPERMUTE_ (T->trperm, C->rperm, pp, T->T.t);
359
+ }
360
+ free2 (C->rperm); free2 (C->cperm);
361
+
362
+ if ( T->flag & LOAD_PERM ) flag = 1;
363
+ else flag = (T->flag&LOAD_INCSORT)? 1: ((T->flag&LOAD_DECSORT)? -1: 0);
364
+ if ( flag ) FLOOP (t, 0, T->T.t) qsort_QUEUE_INT (T->T.v[t].v, T->T.v[t].t, flag);
365
+ if ( T->flag & LOAD_RM_DUP ) FLOOP (t, 0, T->T.t) MQUE_RM_DUP (T->T.v[t]);
366
+ ST_MAX (T->row_max, i, T->T.v, t, 0, T->T.t);
367
+
368
+ if ( T->flag&(TRSACT_ALLOC_OCC+TRSACT_SHRINK) ){
369
+ calloc2 (p, T->T.clms, "TRSACT_sort: p", EXIT);
370
+ QUEUE_delivery (NULL, p, NULL, T->T.v, NULL, T->T.t, T->T.clms);
371
+ ARY_MAX (T->clm_max, i, p, 0, T->T.clms);
372
+ MQUE_ALLOC (T->OQ, T->T.clms, p, T->occ_unit, 1, EXIT);
373
+ QUEUE_alloc (&T->OQ[T->T.clms], MAX(T->T.t, T->clm_max));
374
+ FLOOP (i, 0, T->T.clms+1) T->OQ[i].end = 0; // end is illegally set to 0, for the use in "TRSACT_find_same"
375
+ ARY_INIT_PERM (T->OQ[T->T.clms].v, T->T.t); // initial occurrence := all transactions
376
+ T->OQ[T->T.clms].t = T->T.t;
377
+ free (p);
378
+ }
379
+
380
+ // shrinking database
381
+ if ( T->flag&TRSACT_SHRINK ){
382
+ Q = T->OQ[T->T.clms];
383
+ T->OQ[T->T.clms].t = 0;
384
+ TRSACT_find_same (T, &Q, T->T.clms);
385
+ f = T->flag; // preserve the flag
386
+ BITRM (T->flag ,TRSACT_MAKE_NEW +TRSACT_UNION +TRSACT_INTSEC);
387
+ TRSACT_merge_trsact (T, &T->OQ[T->T.clms], T->T.clms); // just remove duplicated trsacts
388
+ T->flag = f; // recover flag
389
+ T->OQ[T->T.clms].t = 0;
390
+ FLOOP (t, 0, T->T.t) if ( T->mark[t]>0 ) ARY_INS(T->OQ[T->T.clms], t); // make resulted occ
391
+ }
392
+
393
+ // QUEUE_delivery (T->OQ, NULL, NULL, T->T.v, &T->OQ[T->T.clms], T->T.t, T->T.clms);
394
+ }
395
+
396
+ /*****************************************/
397
+ /* load transaction file and its weight */
398
+ /*****************************************/
399
+ void TRSACT_load (TRSACT *T, char *fname, char *fname2, char *wfname, char *wfname2, char *pfname){
400
+ FILE2 fp, fp2;
401
+ FILE_COUNT C = INIT_FILE_COUNT;
402
+ VEC_ID t=0;
403
+ int f;
404
+
405
+ FILE2_open (fp, fname, "r", "input-file open error", EXIT);
406
+ if ( fname2 ) FILE2_open (fp2, fname2, "r", "input-file2 open error", EXIT);
407
+ TRSACT_file_count (T, &C, &fp, wfname); if (ERROR_MES) goto END;
408
+ T->end1 = T->rows_org;
409
+ if ( fname2 ) TRSACT_file_count (T, &C, &fp2, wfname2); if (ERROR_MES) goto END;
410
+ f = TRSACT_alloc (T, pfname, &C); if (ERROR_MES) goto END;
411
+ TRSACT_file_read (T, &fp, &C, &t, f); if (ERROR_MES) goto END;
412
+ if ( fname2 ) TRSACT_file_read (T, &fp2, &C, &t, f); if (ERROR_MES) goto END;
413
+ TRSACT_sort (T, &C, f);
414
+
415
+ END:;
416
+ FILE2_close (&fp);
417
+ if (ERROR_MES) TRSACT_end (T);
418
+ return;
419
+ }
420
+
421
+ /* iteration of delivery; operate one transaction */
422
+ /* use OQ.end to count the number of items */
423
+ /* jump will be cleared (t := s) at the beginning */
424
+ void TRSACT_delivery_iter (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, VEC_ID t, QUEUE_INT m){
425
+ WEIGHT *y=0;
426
+ QUEUE_INT *x;
427
+ int f = T->flag&TRSACT_NEGATIVE;
428
+
429
+ if ( T->T.w ) y = T->T.w[t];
430
+ MQUE_MLOOP (T->T.v[t], x, m){
431
+ if ( T->OQ[*x].end == 0 ){ ARY_INS (*jump, *x); w[*x] = 0; if ( f ) pw[*x] = 0; }
432
+ T->OQ[*x].end++;
433
+ if ( y ){
434
+ w[*x] += *y; if ( *y>0 && f) pw[*x] += *y;
435
+ y++;
436
+ } else {
437
+ w[*x] += T->w[t]; if ( f ) pw[*x] += T->pw[t];
438
+ }
439
+ }
440
+ }
441
+
442
+ void TRSACT_delivery (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, QUEUE *occ, QUEUE_INT m){
443
+ VEC_ID i, t;
444
+ char *b = (char *)(occ?occ->v: NULL);
445
+ jump->t = jump->s;
446
+ FLOOP (i, occ?occ->s:0, occ?occ->t:T->T.t){
447
+ t = occ? *((QUEUE_INT *)b): i;
448
+ TRSACT_delivery_iter (T, jump, w, pw, t, m);
449
+ b += T->occ_unit;
450
+ }
451
+ }
452
+
453
+ /* usual delivery (make transpose) with checking sc
454
+ don't touch jump */
455
+ /* if (T->flag&TRSACT_DELIV_SC), do not stack to items e with non-zero T->sc[e] */
456
+ void TRSACT_deliv (TRSACT *T, QUEUE *occ, QUEUE_INT m){
457
+ VEC_ID i, t;
458
+ QUEUE_INT *x;
459
+ char *b = (char *)(occ?occ->v: NULL);
460
+ if ( T->flag&TRSACT_DELIV_SC ){
461
+ FLOOP (i, occ?occ->s:0, occ?occ->t:T->T.t){
462
+ t = occ? *((QUEUE_INT *)b): i;
463
+ MQUE_MLOOP (T->T.v[t], x, m)
464
+ if ( !T->sc[*x] ) ARY_INS (T->OQ[*x], t);
465
+ b += T->occ_unit;
466
+ }
467
+ } else {
468
+ FLOOP (i, occ?occ->s:0, occ?occ->t:T->T.t){
469
+ t = occ? *((QUEUE_INT *)b): i;
470
+ MQUE_MLOOP (T->T.v[t], x, m) ARY_INS (T->OQ[*x], t);
471
+ b += T->occ_unit;
472
+ }
473
+ }
474
+ }
475
+
476
+ /**************************************************************/
477
+ /* Find identical transactions in a subset of transactions, by radix-sort like method */
478
+ /* infrequent items (refer LCM_occ) and items larger than item_max are ignored */
479
+ /* INPUT: T:transactions, occ:subset of T represented by indices, result:array for output, item_max:largest item not to be ignored */
480
+ /* OUTPUT: if transactions i1, i2,..., ik are the same, they have same value in T->mark[i]
481
+ (not all) isolated transaction may have mark 1 */
482
+ /* use 0 to end-1 of QQ temporary, and QQ[i].t and QQ[i].s have to be 0. */
483
+ /*************************************************************************/
484
+ void TRSACT_find_same (TRSACT *T, QUEUE *occ, QUEUE_INT end){
485
+ VEC_ID mark=0, t_end;
486
+ QUEUE *o=occ, *Q = T->T.v, *EQ, *QQ = T->OQ;
487
+ QUEUE_INT *x, *y, e;
488
+ QUEUE_ID ot = occ->t;
489
+
490
+ // initialization
491
+ MQUE_FLOOP (*occ, x){ T->mark[*x] = mark; T->shift[*x] = Q[*x].v; }
492
+ T->jump.t = T->jump.s; QQ[T->T.clms].s = 0;
493
+
494
+ while (1){
495
+ if ( o->t - o->s == 1 ) T->mark[o->v[--o->t]] = 1; // no same transactions; mark by 1
496
+ if ( o->t == 0 ) goto END;
497
+ // if previously inserted transactions are in different group, then change their marks with incrementing mark by one
498
+ mark++; for (x=&o->v[o->s] ; x <&o->v[o->t] ; x++) T->mark[*x] = mark;
499
+ t_end = o->t;
500
+ o->s = o->t = 0;
501
+
502
+ // insert each t to buckets
503
+ for (x=o->v ; x<o->v+t_end ; x++){
504
+ // get next item in transaction t
505
+ do {
506
+ e = *(T->shift[*x]);
507
+ T->shift[*x]++;
508
+ if ( e >= end ){ e = T->T.clms; break; }
509
+ } while ( T->sc[e] );
510
+ EQ = &QQ[e];
511
+ // if previously inserted transactions are in different group, then change their mark to the transaction ID of top transacion.
512
+ y = &(EQ->v[EQ->s]);
513
+ if ( EQ->s < EQ->t && T->mark[*y] != T->mark[*x] ){
514
+ if ( EQ->t - EQ->s == 1 ) T->mark[EQ->v[--EQ->t]] = 1; // the tail of the queue has no same transaction; mark the tail by 1
515
+ else {
516
+ mark++; for ( ; y< EQ->v + EQ->t ; y++) T->mark[*y] = mark;
517
+ EQ->s = EQ->t;
518
+ }
519
+ } else if ( EQ->t == 0 && e<T->T.clms ) ARY_INS (T->jump, e);
520
+ ARY_INS (*EQ, *x); // insert t to bucket of e
521
+ }
522
+ END:;
523
+ if ( QUEUE_LENGTH_(T->jump) == 0 ) break;
524
+ o = &QQ[QUEUE_ext_tail_ (&T->jump)];
525
+ }
526
+
527
+ // same transactions are in queue of item_max
528
+ if ( QQ[T->T.clms].t -QQ[T->T.clms].s == 1 ) T->mark[QQ[T->T.clms].v[--QQ[T->T.clms].t]] = 1;
529
+ if ( occ != &QQ[T->T.clms] ) occ->t = ot;
530
+ }
531
+
532
+
533
+ /****************************************************************************/
534
+ /* copy transaction t to tt (only items i s.t. sc[i]==0) **/
535
+ /* T->w has to be allocated. itemweight will be alocated even if T->w[t] == NULL */
536
+ /****************************************************************************/
537
+ void TRSACT_copy (TRSACT *T, VEC_ID tt, VEC_ID t, QUEUE_INT end){
538
+ QUEUE_INT *x, *buf;
539
+ WEIGHT *wbuf = NULL, tw = T->w[t], *w = T->T.w? T->T.w[t]: NULL;
540
+ int bnum = T->buf.num, bblock = T->buf.block_num, wflag = (w || (T->flag&TRSACT_UNION));
541
+
542
+ buf = (QUEUE_INT *)BASE_get_memory (&T->buf, T->T.v[t].t+1);
543
+ if ( ERROR_MES ) return;
544
+ if ( wflag ) T->T.w[tt] = wbuf = (WEIGHT *)BASE_get_memory (&T->wbuf, T->T.v[t].t+1);
545
+ if ( ERROR_MES ){ T->buf.num = bnum; T->buf.block_num = bblock; return; }
546
+ T->T.v[tt].v = buf;
547
+ T->w[tt] = T->w[t];
548
+ if ( T->flag&TRSACT_NEGATIVE ) T->pw[tt] = T->pw[t];
549
+ MQUE_MLOOP (T->T.v[t], x, end){
550
+ if ( !T->sc[*x] ){
551
+ *buf = *x; buf++;
552
+ if ( wflag ){ *wbuf = w? *w: tw; wbuf++; }
553
+ }
554
+ if ( w ) w++;
555
+ }
556
+ T->T.v[tt].t = (VEC_ID)(buf - T->T.v[tt].v);
557
+ *buf = T->T.clms;
558
+ T->buf.num = (int)(buf - ((QUEUE_INT *)T->buf.base[T->buf.block_num]) + 1);
559
+ if ( wflag ) T->wbuf.num = (int)(wbuf - ((WEIGHT *)T->wbuf.base[T->wbuf.block_num]) + 1);
560
+ }
561
+
562
+ /****************************************************************************/
563
+ /* intersection of transaction t and tt (only items i s.t. sc[i]==0) **/
564
+ /* shift is the array of pointers indicates the start of each transaction **/
565
+ /****************************************************************************/
566
+ void TRSACT_suffix_and (TRSACT *T, VEC_ID tt, VEC_ID t){
567
+ QUEUE_INT *x=T->shift[tt], *y=T->shift[t], *xx=T->shift[tt];
568
+ while ( *x < T->T.clms && *y < T->T.clms ){
569
+ if ( *x > *y ) y++;
570
+ else {
571
+ if ( *x == *y ){
572
+ if ( !T->sc[*x] ){ *xx = *x; xx++; }
573
+ y++;
574
+ }
575
+ x++;
576
+ }
577
+ }
578
+ T->T.v[tt].t = (VEC_ID)(xx - T->T.v[tt].v);
579
+ *xx = T->T.clms;
580
+ T->buf.num = (int)(xx - ((QUEUE_INT *)T->buf.base[T->buf.block_num]) + 1);
581
+ }
582
+
583
+
584
+ /***************************************************************************/
585
+ /* take union of transaction t to tt (only items i s.t. pw[i]>=th) */
586
+ /* CAUSION: t has to be placed at the last of trsact_buf2. */
587
+ /* if the size of t inclreases, the following memory will be overwrited */
588
+ /* if memory (T->buf, T->wbuf) is short, do nothing and return 1 */
589
+ /* T->T.w[t] can be NULL, but T->T.w[x] can not */
590
+ /***************************************************************************/
591
+ void TRSACT_itemweight_union (TRSACT *T, VEC_ID tt, VEC_ID t){
592
+ int bnum = T->buf.num, bblock = T->buf.block_num;
593
+ QUEUE_ID siz = T->T.v[tt].t +T->T.v[t].t;
594
+ QUEUE_INT *xx_end = T->T.v[tt].v + siz, *xx = xx_end;
595
+ QUEUE_INT *x = T->T.v[tt].v + T->T.v[tt].t-1, *y = T->T.v[t].v + T->T.v[t].t-1;
596
+ WEIGHT *ww = T->T.w[tt] +siz, *wx = T->T.w[tt] +T->T.v[tt].t-1, *wy = T->T.w[t] +T->T.v[t].t-1;
597
+ WEIGHT tw = T->w[t];
598
+ int flag=0, wf = (T->T.w[t]!=NULL);
599
+
600
+ // if sufficiently large memory can not be taken from the current memory block, use the next block
601
+ if ( xx_end >= (QUEUE_INT *)T->buf.base[T->buf.block_num] +T->buf.block_siz ){
602
+ xx_end = xx = ((QUEUE_INT*)BASE_get_memory (&T->buf, T->buf.block_siz)) +siz;
603
+ if (ERROR_MES) return;
604
+ ww = ((WEIGHT *)BASE_get_memory (&T->wbuf, T->wbuf.block_siz)) +siz;
605
+ if ( ERROR_MES ){ T->buf.num = bnum; T->buf.block_num = bblock; return; }
606
+ flag =1;
607
+ }
608
+ if ( ERROR_MES ) return;
609
+
610
+ // take union and store it in the allocated memory
611
+ while ( x >= T->T.v[tt].v && y >= T->T.v[t].v ){
612
+ if ( *x > *y ){
613
+ if ( !T->sc[*x] ){ *xx = *x; *ww = *wx; xx--; ww--; }
614
+ x--; wx--;
615
+ if ( x < T->T.v[tt].v ){
616
+ while ( y >= T->T.v[t].v ){
617
+ if ( !T->sc[*y] ){ *xx = *y; *ww = wf? *wy: tw; xx--; ww--; }
618
+ y--; wy--;
619
+ }
620
+ }
621
+ } else {
622
+ if ( !T->sc[*y] ){
623
+ *ww = wf? *wy: tw; *xx = *y;
624
+ if ( *x == *y ){ *ww += *wx; x--; wx--; }
625
+ xx--; ww--;
626
+ }
627
+ y--; wy--;
628
+ if ( y < T->T.v[t].v ){
629
+ while ( x >= T->T.v[tt].v ){
630
+ if ( !T->sc[*x] ){ *xx = *x; *ww = *wx; xx--; ww--; }
631
+ x--; wx--;
632
+ }
633
+ }
634
+ }
635
+ }
636
+ T->T.v[tt].t = (VEC_ID)(xx_end -xx);
637
+
638
+ // if [tt].v will overflow, set [tt].v to the top of next memory block
639
+ if ( flag ){
640
+ if ( T->T.v[tt].v + T->T.v[tt].t+1 >= (QUEUE_INT *)T->buf.base[T->buf.block_num-1] +T->buf.block_siz ){
641
+ T->T.v[tt].v = (QUEUE_INT *)T->buf.base[T->buf.block_num];
642
+ T->T.w[tt] = (WEIGHT *)T->wbuf.base[T->wbuf.block_num];
643
+ } else { // new memory block is allocated, but the transaction fits in the previous block
644
+ T->buf.block_num--;
645
+ T->wbuf.block_num--;
646
+ }
647
+ }
648
+
649
+ // copy the union to the original position
650
+ for ( x=T->T.v[tt].v,wx=T->T.w[tt] ; xx<xx_end ; ){
651
+ xx++; ww++;
652
+ *x = *xx; *wx = *ww;
653
+ x++; wx++;
654
+ }
655
+ *x = T->T.clms;
656
+ T->wbuf.num = T->buf.num = (int)(x - ((QUEUE_INT *)T->buf.base[T->buf.block_num]) +1);
657
+ return;
658
+ }
659
+
660
+
661
+
662
+ /*****/
663
+ /* merge duplicated transactions in occ according to those having same value in T->mark
664
+ the mark except for the representative will be zero, for each group of the same transactions
665
+ the mark of the representative will be its (new) ID +2 (one is for identical transaction) */
666
+ /* T->flag&TRSACT_MAKE_NEW: make new trsact for representative
667
+ T->flag&TRSACT_INTSEC: take suffix intersection of the same trsacts
668
+ T->flag&TRSACT_UNION: take union of the same trsacts */
669
+ /* o will be cleard after the execution */
670
+ void TRSACT_merge_trsact (TRSACT *T, QUEUE *o, QUEUE_INT end){
671
+ VEC_ID mark = 0, tt=0;
672
+ QUEUE_INT *x;
673
+
674
+ MQUE_FLOOP (*o, x){
675
+ if ( mark == T->mark[*x] ){
676
+ T->mark[*x] = 0; // mark of unified (deleted) transaction
677
+ T->w[tt] += T->w[*x]; if ( T->pw ) T->pw[tt] += T->pw[*x];
678
+ if ( T->flag & TRSACT_INTSEC ){
679
+ TRSACT_suffix_and (T, tt, *x);
680
+ T->buf.num = (int)(T->T.v[tt].v - (QUEUE_INT *)T->buf.base[T->buf.block_num] +T->T.v[tt].t +1);
681
+ }
682
+ if ( T->flag & TRSACT_UNION ){
683
+ TRSACT_itemweight_union (T, tt, *x);
684
+ if ( ERROR_MES ) T->mark[*x] = *x+2; // do not merge if not enough memory
685
+ }
686
+ }
687
+ if ( mark != T->mark[*x] && T->mark[*x] > 1 ){ // *x is not the same to the previous, or memory short
688
+ mark = T->mark[*x];
689
+ if ( T->flag&TRSACT_MAKE_NEW ){
690
+ tt = T->new_t++;
691
+ TRSACT_copy (T, tt, *x, (T->flag&(TRSACT_INTSEC+TRSACT_UNION))? T->T.clms: end);
692
+ if ( ERROR_MES ){ T->new_t--; tt = *x; }
693
+ else for (T->shift[tt]=T->T.v[tt].v ; *(T->shift[tt])<end ; T->shift[tt]++);
694
+ } else tt = *x;
695
+ T->mark[*x] = tt+2;
696
+ }
697
+ }
698
+ o->t = o->s = 0;
699
+ }
700
+
701
+ /* remove the unified transactions from occ (consider T->occ_unit) */
702
+ void TRSACT_reduce_occ (TRSACT *T, QUEUE *occ){
703
+ QUEUE_INT *x, *y=occ->v;
704
+ QUEUE_ID i=0;
705
+ if ( T->occ_unit == sizeof(QUEUE_INT) ){
706
+ MQUE_FLOOP (*occ, x){
707
+ if ( T->mark[*x] == 0 ) continue;
708
+ *y = T->mark[*x]>1? T->mark[*x]-2: *x;
709
+ y++; i++;
710
+ }
711
+ } else {
712
+ MQUE_FLOOP_ (*occ, x, T->occ_unit){
713
+ if ( T->mark[*x] == 0 ) continue;
714
+ memcpy (y, x, T->occ_unit);
715
+ *y = T->mark[*x]>1? T->mark[*x]-2: *x;
716
+ y = (QUEUE_INT *)(((char *)y)+T->occ_unit);
717
+ i++;
718
+ }
719
+ }
720
+ occ->t = i;
721
+ }
722
+
723
+ #endif