nysol-zdd 3.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/ext/zdd_so/BDD.cc +495 -0
  3. data/ext/zdd_so/BDD.h +356 -0
  4. data/ext/zdd_so/BDDDG.cc +1818 -0
  5. data/ext/zdd_so/BDDDG.h +107 -0
  6. data/ext/zdd_so/BDDHASH.cc +91 -0
  7. data/ext/zdd_so/BtoI.cc +503 -0
  8. data/ext/zdd_so/BtoI.h +144 -0
  9. data/ext/zdd_so/CtoI.cc +1072 -0
  10. data/ext/zdd_so/CtoI.h +186 -0
  11. data/ext/zdd_so/MLZBDDV.cc +153 -0
  12. data/ext/zdd_so/MLZBDDV.h +42 -0
  13. data/ext/zdd_so/SOP.cc +608 -0
  14. data/ext/zdd_so/SOP.h +199 -0
  15. data/ext/zdd_so/ZBDD.cc +1035 -0
  16. data/ext/zdd_so/ZBDD.h +243 -0
  17. data/ext/zdd_so/ZBDDDG.cc +1834 -0
  18. data/ext/zdd_so/ZBDDDG.h +105 -0
  19. data/ext/zdd_so/ZBDDHASH.cc +91 -0
  20. data/ext/zdd_so/bddc.c +2816 -0
  21. data/ext/zdd_so/bddc.h +132 -0
  22. data/ext/zdd_so/extconf.rb +25 -0
  23. data/ext/zdd_so/include/aheap.c +211 -0
  24. data/ext/zdd_so/include/aheap.h +111 -0
  25. data/ext/zdd_so/include/base.c +93 -0
  26. data/ext/zdd_so/include/base.h +60 -0
  27. data/ext/zdd_so/include/itemset.c +473 -0
  28. data/ext/zdd_so/include/itemset.h +153 -0
  29. data/ext/zdd_so/include/problem.c +371 -0
  30. data/ext/zdd_so/include/problem.h +160 -0
  31. data/ext/zdd_so/include/queue.c +518 -0
  32. data/ext/zdd_so/include/queue.h +177 -0
  33. data/ext/zdd_so/include/sgraph.c +331 -0
  34. data/ext/zdd_so/include/sgraph.h +170 -0
  35. data/ext/zdd_so/include/stdlib2.c +832 -0
  36. data/ext/zdd_so/include/stdlib2.h +746 -0
  37. data/ext/zdd_so/include/trsact.c +723 -0
  38. data/ext/zdd_so/include/trsact.h +167 -0
  39. data/ext/zdd_so/include/vec.c +583 -0
  40. data/ext/zdd_so/include/vec.h +159 -0
  41. data/ext/zdd_so/lcm-vsop.cc +596 -0
  42. data/ext/zdd_so/print.cc +683 -0
  43. data/ext/zdd_so/table.cc +330 -0
  44. data/ext/zdd_so/vsop.h +88 -0
  45. data/ext/zdd_so/zdd_so.cpp +3277 -0
  46. data/lib/nysol/zdd.rb +31 -0
  47. metadata +131 -0
@@ -0,0 +1,723 @@
1
+ /* QUEUE based Transaction library, including database reduction.
2
+ 25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, do not forget to
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about the codes for the users.
11
+ For the commercial use, please make a contact to Takeaki Uno. */
12
+
13
+ #ifndef _trsact_c_
14
+ #define _trsact_c_
15
+
16
+ // #define WEIGHT_DOUBLE
17
+
18
+ #include"trsact.h"
19
+ #include"base.c"
20
+ #include"vec.c"
21
+
22
+ /***********************************/
23
+ /* print transactions */
24
+ /***********************************/
25
+ void TRSACT_print (TRSACT *T, QUEUE *occ, PERM *p){
26
+ VEC_ID i, t;
27
+ QUEUE_ID j;
28
+ QUEUE_INT e;
29
+ FLOOP (i, 0, occ? occ->t: T->T.t){
30
+ t = occ? *((QUEUE_INT *)(&(((char *)(occ->v))[i*T->occ_unit]))): i;
31
+ if ( occ ) printf (QUEUE_INTF "::: ", t);
32
+ ARY_FLOOP (T->T.v[t], j, e){
33
+ printf (QUEUE_INTF, p? p[e]: e);
34
+ if ( T->T.w ) printf ("(" WEIGHTF ")", T->T.w[t][j]);
35
+ printf (",");
36
+ }
37
+ if ( T->w ) printf (" :" WEIGHTF " ", T->w[t]);
38
+ printf (" (" QUEUE_INTF ")\n", T->T.v[t].end);
39
+ }
40
+ }
41
+
42
+ /*
43
+ void TRSACT_prop_print (TRSACT *T){
44
+ print_err ("trsact: %s", P->trsact_fname);
45
+ if ( P->trsact2_fname2 ) print_err (" ,2nd-trsact2 %s (from ID %d)", P->trsact_fname2, P->TT.end1);
46
+ print_err (" ,#transactions %d ,#items %d ,size %zd", P->TT.rows_org, P->TT.clms_org, P->TT.eles_org);
47
+ print_err (" extracted database: #transactions %d ,#items %d ,size %zd", P->TT.T.t, P->TT.T.clms, P->TT.T.eles);
48
+ if ( P->trsact_wfname ) print_err (" ,weightfile %s", P->trsact_wfname);
49
+ if ( P->trsact_wfname2 ) print_err (" ,2nd-weightfile %s", P->trsact_wfname2);
50
+ if ( P->trsact_pfname ) print_err (" ,item-order-file %s", P->trsact_pfname);
51
+ print_err ("\n");
52
+ }
53
+ */
54
+
55
+ /* initialization of structure TRSACT */
56
+ void TRSACT_init (TRSACT *T){
57
+ T->type = TYPE_TRSACT;
58
+ T->flag = 0;
59
+ T->T = INIT_SETFAMILY;
60
+ T->clms_org = T->clm_max = T->clms_end = T->non_empty_clms = 0;
61
+ T->rows_org = T->row_max = T->end1 = T->sep = 0;
62
+ T->perm = NULL;
63
+ T->trperm = NULL;
64
+ T->w = T->pw = NULL;
65
+
66
+ T->clm_lb = 0;
67
+ T->clm_ub = VEC_ID_END;
68
+ T->row_lb = 0;
69
+ T->row_ub = QUEUE_IDHUGE;
70
+ T->w_lb = -WEIGHTHUGE; T->w_ub = WEIGHTHUGE;
71
+
72
+ T->eles_org = 0;
73
+ T->total_w = T->total_pw = T->total_w_org = T->total_pw_org =0;
74
+
75
+ T->jump = INIT_QUEUE;
76
+ T->str_num = 0;
77
+ T->head = T->strID = NULL;
78
+
79
+ T->th = 1;
80
+ T->mark = NULL;
81
+ T->shift = NULL;
82
+ T->occ_unit = sizeof(QUEUE_INT);
83
+ T->OQ = NULL;
84
+ T->sc = NULL;
85
+
86
+ T->new_t = 0;
87
+ T->buf = INIT_BASE;
88
+ T->wbuf = INIT_BASE;
89
+ }
90
+
91
+ /**************************************************************/
92
+ void TRSACT_end (TRSACT *T){
93
+ if ( T->OQ ){ free2 (T->OQ->v ); free2 (T->OQ[T->T.clms].v); }
94
+ free2 (T->T.w);
95
+ SETFAMILY_end (&T->T);
96
+ if ( T->w != T->pw ) free2 (T->pw);
97
+ mfree (T->w, T->perm, T->trperm);
98
+ mfree (T->mark, T->shift, T->sc, T->OQ, T->head, T->strID);
99
+ QUEUE_end (&T->jump);
100
+ BASE_end (&T->buf);
101
+ BASE_end (&T->wbuf);
102
+ TRSACT_init (T);
103
+ }
104
+
105
+ /*****************************************/
106
+ /* scan file "fp" with weight file wfp and count #items, #transactions in the file. */
107
+ /* count weight only if wfp!=NULL */
108
+ /* T->rows_org, clms_org, eles_org := #items, #transactions, #all items */
109
+ /* ignore the transactions of size not in range T->clm_lb - clm_ub */
110
+ /* T->total_w, total_pw := sum of (positive) weights of transactions */
111
+ /* C->clmt[i],C->cw[i] := the number/(sum of weights) of transactions including i */
112
+ /****************************************/
113
+ void TRSACT_file_count (TRSACT *T, FILE_COUNT *C, FILE2 *fp, char *wf){
114
+ QUEUE_INT i, item, kk=0, k, jump_end=0;
115
+ WEIGHT w, s;
116
+ VEC_ID *jump=NULL;
117
+ FILE2 wfp;
118
+
119
+ if ( wf ){
120
+ FILE2_open (wfp, wf, "r", "TRSACT_file_count:weight file", goto ERR);
121
+ ARY_SCAN (kk, WEIGHT, wfp, 1);
122
+ kk += T->rows_org;
123
+ realloc2 (C->rw, kk+1, "TRSACT_file_count: C->rw", goto ERR);
124
+ FILE2_reset (&wfp);
125
+ ARY_READ (C->rw, double, kk, wfp);
126
+ ARY_MIN (w, i, C->rw, 0, kk);
127
+ if ( w<0 ) T->flag |= TRSACT_NEGATIVE;
128
+ FILE2_close (&wfp);
129
+ }
130
+ do {
131
+ s=0;
132
+ k=0;
133
+ w = wf? (T->rows_org<kk? C->rw[T->rows_org]: TRSACT_DEFAULT_WEIGHT): 1;
134
+ do {
135
+ item = (QUEUE_INT)FILE2_read_int (fp);
136
+ if ( (FILE_err&4)==0 ){
137
+ ENMAX (T->clms_org, item+1); // update #items
138
+ reallocx (jump, jump_end, k, 0, "TRSACT_file_count: jump", goto ERR);
139
+ jump[k] = item;
140
+ k++;
141
+ s += wf? (item<kk? MAX(C->rw[item],0): TRSACT_DEFAULT_WEIGHT): 1;
142
+
143
+ // count/weight-sum for the transpose mode
144
+ reallocx (C->clmt, C->clm_end, item, 0, "TRSACT_file_count:clmt",goto ERR);
145
+ C->clmt[item]++;
146
+ if ( !(T->flag&LOAD_TPOSE) ){
147
+ reallocx (C->cw, C->cw_end, item, 0, "TRSACT_file_count: cw", goto ERR);
148
+ C->cw[item] += MAX(w,0); // sum up positive weights
149
+ }
150
+ }
151
+ } while ( (FILE_err&3)==0);
152
+
153
+ // count/weight-sum for the transpose mode
154
+ reallocx (C->rowt, C->row_end, T->rows_org, 0, "TRSACT_file_count:rowt", goto ERR);
155
+ C->rowt[T->rows_org] = k;
156
+ if ( T->flag&LOAD_TPOSE ){
157
+ reallocx (C->cw, C->cw_end, T->rows_org, 0, "TRSACT_file_count: cw", goto ERR);
158
+ C->cw[T->rows_org] = s; // sum up positive weights
159
+ }
160
+ if ( k==0 && FILE_err&2 ) break;
161
+ T->rows_org++; // increase #transaction
162
+
163
+ if ( !wf ) s = k; // un-weighted case; weighted sum is #included-items
164
+ if ( k==0 ){
165
+ T->str_num++; // increase #streams if empty transaction is read
166
+ } else {
167
+ T->eles_org += k;
168
+ if ( (!(T->flag&LOAD_TPOSE) && !RANGE (T->row_lb, k, T->row_ub))
169
+ || ((T->flag&LOAD_TPOSE) && (!RANGE(T->w_lb, s, T->w_ub) || !RANGE (T->clm_lb, k, T->clm_ub)) ) ) FLOOP (i, 0, k) C->clmt[jump[i]]--;
170
+ }
171
+ } while ( (FILE_err&2)==0);
172
+ free2 (jump);
173
+ // swap the variables in transpose mode
174
+ if ( C->rw == NULL ){ T->total_w_org = T->total_pw_org = T->rows_org; return; }
175
+ C->clm_btm = MIN(kk, T->rows_org);
176
+ reallocx (C->rw, kk, T->rows_org, TRSACT_DEFAULT_WEIGHT, "TRSACT_file_count: rw", goto ERR);
177
+ FLOOP (k, 0, T->rows_org){
178
+ T->total_w_org += C->rw[k];
179
+ T->total_pw_org += MAX(C->rw[k],0);
180
+ }
181
+ return;
182
+ ERR:;
183
+ mfree (C->rw, C->cw, C->clmt, C->rowt, jump);
184
+ EXIT;
185
+ }
186
+
187
+ /* allocate memory, set permutation, and free C.clmt,rowt,rw,cw */
188
+ int TRSACT_alloc (TRSACT *T, char *pfname, FILE_COUNT *C){
189
+ VEC_ID t, tt=0, ttt=T->clms_org, ttt_max = ttt, h, flag, org;
190
+ FILE_COUNT_INT *ct;
191
+ size_t s=0;
192
+ PERM *q, *p=NULL;
193
+ char *buf;
194
+
195
+ // swap variables in the case of transpose
196
+ if ( T->flag & LOAD_TPOSE ){
197
+ common_QUEUE_INT = T->clms_org; T->clms_org = (QUEUE_INT)T->rows_org; T->rows_org = (VEC_ID)common_QUEUE_INT;
198
+ SWAP_PNT (C->clmt, C->rowt);
199
+ }
200
+
201
+ if ( T->flag&TRSACT_SHRINK ) T->flag |= LOAD_DBLBUF;
202
+ // count valid columns/elements
203
+
204
+ if ( pfname && !(T->flag&TRSACT_WRITE_PERM) ){
205
+ ARY_LOAD (p, QUEUE_INT, ttt, pfname, 1, "TRSACT_load: item order file", EXIT0);
206
+ ARY_MAX (ttt_max, tt, p, 0, ttt);
207
+ // ENMAX (T->clms_org, ttt_max+1);
208
+ T->T.clms = ttt_max+1;
209
+ } else {
210
+ if ( T->flag&LOAD_PERM ){
211
+ if ( T->flag&TRSACT_FRQSORT )
212
+ p = qsort_perm_WEIGHT (C->cw, T->clms_org, (T->flag&LOAD_INCSORT)?1:-1);
213
+ else p = qsort_perm_FILE_COUNT_INT (C->clmt, T->clms_org, (T->flag&LOAD_INCSORT)?1:-1);
214
+ }
215
+ if ( pfname ) ARY_WRITE (pfname, p, T->clms_org, PERMF " ", "TRSACT_alloc: item-order output", EXIT0);
216
+ }
217
+ T->clms_end = MAX (T->clms_org, T->T.clms);
218
+
219
+ malloc2 (C->cperm, T->clms_org+1, "TRSACT_alloc: cperm", EXIT0);
220
+ ARY_FILL (C->cperm, 0, T->clms_org, T->clms_org+1);
221
+ FLOOP (t, 0, ttt){
222
+ tt = p? p[t]: t;
223
+ if ( tt >= T->clms_org ) continue;
224
+ if ( RANGE(T->w_lb, C->cw[tt], T->w_ub) && RANGE (T->clm_lb, C->clmt[tt], T->clm_ub)){
225
+ s += C->clmt[tt];
226
+ C->cperm[tt] = (pfname && !(T->flag&TRSACT_WRITE_PERM))? t: T->T.clms++;
227
+ T->non_empty_clms++;
228
+ } else C->cperm[tt] = T->clms_end+1;
229
+ }
230
+ free2 (p);
231
+
232
+ // count valid rows/elements
233
+ if ( T->flag&(LOAD_SIZSORT+LOAD_WSORT) ){
234
+ if ( T->flag&LOAD_WSORT && C->rw )
235
+ p = qsort_perm_WEIGHT (C->rw, T->rows_org, (T->flag&LOAD_DECROWSORT)?-1:1);
236
+ else p = qsort_perm_FILE_COUNT_INT (C->rowt, T->rows_org, (T->flag&LOAD_DECROWSORT)?-1:1);
237
+ }
238
+ malloc2 (C->rperm, T->rows_org, "TRSACT_alloc: rperm", EXIT0);
239
+ FLOOP (t, 0, T->rows_org){ // compute #elements according to rowt, and set rperm
240
+ tt = p? p[t]: t;
241
+ if ( RANGE (T->row_lb, C->rowt[tt], T->row_ub) ){
242
+ C->rperm[tt] = T->T.t++;
243
+ T->T.eles += C->rowt[t];
244
+ } else C->rperm[tt] = T->rows_org+1;
245
+ }
246
+
247
+ free2 (p); free2 (C->cw);
248
+ flag = (T->T.eles > s && !(T->flag & LOAD_TPOSE) );
249
+ if ( flag ) T->T.eles = s;
250
+
251
+ T->T.end = T->T.t * ((T->flag&LOAD_DBLBUF)? 2: 1)+1;
252
+ malloc2 (T->w, T->T.end, "TRSACT_alloc: T->w", EXIT0);
253
+ if ( TRSACT_NEGATIVE ) malloc2 (T->pw, T->T.end, "TRSACT_alloc: T->pw", EXIT0);
254
+ else T->pw = NULL;
255
+ malloc2 (T->trperm, T->T.t, "TRSACT_alloc: T->trperm", EXIT0);
256
+ malloc2 (T->T.v, T->T.end, "TRSACT_alloc: T->T.v", EXIT0);
257
+ malloc2 (buf, (T->T.eles+T->T.end+1)*T->T.unit, "TRSACT_alloc: T->T.buf", EXIT0);
258
+ T->T.buf = (QUEUE_INT *)buf;
259
+ calloc2 (T->perm, T->T.clms+1, "TRSACT_alloc: T->perm", EXIT0);
260
+ QUEUE_alloc (&T->jump, T->T.clms+1);
261
+ BASE_alloc (&T->buf, sizeof(QUEUE_INT), MAX((int)T->row_max*4,(int)(T->T.eles+T->T.end+1)/10+100));
262
+ BASE_alloc (&T->wbuf, sizeof(WEIGHT), MAX((int)T->row_max*4, (int)(T->T.eles+T->T.end+1)/10+100));
263
+ if ( T->flag&TRSACT_SHRINK ){
264
+ malloc2 (T->mark, T->T.end, "TRSACT_alloc: mark", EXIT0);
265
+ malloc2 (T->shift, T->T.end, "TRSACT_alloc: shift", EXIT0);
266
+ calloc2 (T->sc, T->T.clms, "TRSACT_alloc: sc", EXIT0);
267
+ }
268
+ if ( T->flag&TRSACT_MULTI_STREAM ){
269
+ malloc2 (T->head, T->str_num+2, "TRSACT_alloc: haed", EXIT0);
270
+ malloc2 (T->strID, (T->flag&LOAD_TPOSE)?T->T.clms:T->T.end, "TRSACT_alloc:ID", EXIT0);
271
+ }
272
+ if ( T->flag&TRSACT_UNION )
273
+ calloc2 (T->T.w, T->T.end, "TRSACT_alloc: T->T.w", EXIT0);
274
+
275
+ if ( ERROR_MES ) return(0);
276
+
277
+ // set variables w.r.t rows
278
+ tt=0; FLOOP (t, 0, T->rows_org){
279
+ if ( C->rperm[t] <= T->rows_org ){
280
+ T->T.v[tt] = INIT_QUEUE;
281
+ T->trperm[tt] = t;
282
+ C->rperm[t] = tt;
283
+ T->w[tt] = C->rw? C->rw[t]: 1;
284
+ if ( T->pw ) T->pw[tt] = MAX (T->w[tt], 0);
285
+ if ( !flag ){
286
+ T->T.v[tt].v = (QUEUE_INT *)buf;
287
+ buf += (C->rowt[t]+1)*T->T.unit;
288
+ }
289
+ tt++;
290
+ }
291
+ }
292
+ free2 (C->rw);
293
+ // make the inverse perm of items
294
+ FLOOP (t, 0, T->clms_org)
295
+ if ( C->cperm[t] <= T->clms_end ) T->perm[C->cperm[t]] = t;
296
+
297
+ // set head of each stream, and stream ID of each transaction
298
+ if ( T->flag&TRSACT_MULTI_STREAM ){
299
+ malloc2 (T->head, T->str_num+2, "TRSACT_alloc: haed", EXIT0);
300
+ malloc2 (T->strID, (T->flag&LOAD_TPOSE)?T->T.clms:T->T.end, "TRSACT_alloc:ID", EXIT0);
301
+ }
302
+ org = (T->flag&LOAD_TPOSE)? T->clms_org: T->rows_org;
303
+ q = (T->flag&LOAD_TPOSE)? C->cperm: C->rperm;
304
+ ct = (T->flag&LOAD_TPOSE)? C->clmt: C->rowt;
305
+ h=1; tt=0; FLOOP (t, 0, org){
306
+ if ( q[t] <= org ){
307
+ if ( t == T->end1 && T->sep==0 ) T->sep = tt;
308
+ if ( t == T->sep && T->sep>0 ) T->sep = tt;
309
+ if ( T->strID ) T->strID[tt] = h;
310
+ tt++;
311
+ }
312
+ if ( T->head && ct[t]==0 ) T->head[h++] = tt+1;
313
+ }
314
+
315
+ T->new_t = T->T.t;
316
+ free2 (C->rowt); free2 (C->clmt);
317
+ return ( flag );
318
+ }
319
+
320
+
321
+ /* load the file to allocated memory according to permutation, and free C.rw, C.cw */
322
+ void TRSACT_file_read (TRSACT *T, FILE2 *fp, FILE_COUNT *C, VEC_ID *t, int flag){
323
+ QUEUE_INT item;
324
+
325
+ FILE2_reset (fp);
326
+ do {
327
+ if ( flag ) T->T.v[*t].v = *t? T->T.v[*t-1].v + T->T.v[*t-1].t +1: T->T.buf;
328
+ do {
329
+ item = (QUEUE_INT)FILE2_read_int (fp);
330
+ if ( (FILE_err&4)==0 ){
331
+ // printf ("%d %d %d %d\n", C->rperm[*t], T->rows_org, C->cperm[item], T->clms_org );
332
+ if ( T->flag&LOAD_TPOSE ){
333
+ if ( C->rperm[item]<=T->rows_org && C->cperm[*t]<=T->clms_end )
334
+ ARY_INS (T->T.v[ C->rperm[item] ], C->cperm[*t]);
335
+ } else if ( C->rperm[*t]<=T->rows_org && C->cperm[item]<=T->clms_end )
336
+ ARY_INS (T->T.v[ C->rperm[*t] ], C->cperm[item]);
337
+ }
338
+ } while ( (FILE_err&3)==0);
339
+ (*t)++;
340
+ } while ( (FILE_err&2)==0 );
341
+ }
342
+
343
+ /* sort the transactions and items according to the flag, allocate OQ, and database reduction */
344
+ void TRSACT_sort (TRSACT *T, FILE_COUNT *C, int flag){
345
+ VEC_ID t, *p;
346
+ int f;
347
+ PERM pp;
348
+ QUEUE Q;
349
+ QUEUE_ID i;
350
+
351
+ FLOOP (t, 0, T->T.t)
352
+ T->T.v[t].v[T->T.v[t].t] = T->T.clms;
353
+ if ( flag )
354
+ flag = (T->flag&(LOAD_SIZSORT+LOAD_WSORT)? ((T->flag&LOAD_DECROWSORT)? -1:1):0) *sizeof(QUEUE);
355
+ if ( flag ){ // sort rows for the case that some columns are not read
356
+ qsort_perm__VECt ((VEC *)T->T.v, T->T.t, C->rperm, flag);
357
+ ARY_INVPERMUTE (T->T.v, C->rperm, Q, T->T.t, "TRSACT_sort: ARY_INVPERMUTE", EXIT);
358
+ ARY_INVPERMUTE_ (T->trperm, C->rperm, pp, T->T.t);
359
+ }
360
+ free2 (C->rperm); free2 (C->cperm);
361
+
362
+ if ( T->flag & LOAD_PERM ) flag = 1;
363
+ else flag = (T->flag&LOAD_INCSORT)? 1: ((T->flag&LOAD_DECSORT)? -1: 0);
364
+ if ( flag ) FLOOP (t, 0, T->T.t) qsort_QUEUE_INT (T->T.v[t].v, T->T.v[t].t, flag);
365
+ if ( T->flag & LOAD_RM_DUP ) FLOOP (t, 0, T->T.t) MQUE_RM_DUP (T->T.v[t]);
366
+ ST_MAX (T->row_max, i, T->T.v, t, 0, T->T.t);
367
+
368
+ if ( T->flag&(TRSACT_ALLOC_OCC+TRSACT_SHRINK) ){
369
+ calloc2 (p, T->T.clms, "TRSACT_sort: p", EXIT);
370
+ QUEUE_delivery (NULL, p, NULL, T->T.v, NULL, T->T.t, T->T.clms);
371
+ ARY_MAX (T->clm_max, i, p, 0, T->T.clms);
372
+ MQUE_ALLOC (T->OQ, T->T.clms, p, T->occ_unit, 1, EXIT);
373
+ QUEUE_alloc (&T->OQ[T->T.clms], MAX(T->T.t, T->clm_max));
374
+ FLOOP (i, 0, T->T.clms+1) T->OQ[i].end = 0; // end is illegally set to 0, for the use in "TRSACT_find_same"
375
+ ARY_INIT_PERM (T->OQ[T->T.clms].v, T->T.t); // initial occurrence := all transactions
376
+ T->OQ[T->T.clms].t = T->T.t;
377
+ free (p);
378
+ }
379
+
380
+ // shrinking database
381
+ if ( T->flag&TRSACT_SHRINK ){
382
+ Q = T->OQ[T->T.clms];
383
+ T->OQ[T->T.clms].t = 0;
384
+ TRSACT_find_same (T, &Q, T->T.clms);
385
+ f = T->flag; // preserve the flag
386
+ BITRM (T->flag ,TRSACT_MAKE_NEW +TRSACT_UNION +TRSACT_INTSEC);
387
+ TRSACT_merge_trsact (T, &T->OQ[T->T.clms], T->T.clms); // just remove duplicated trsacts
388
+ T->flag = f; // recover flag
389
+ T->OQ[T->T.clms].t = 0;
390
+ FLOOP (t, 0, T->T.t) if ( T->mark[t]>0 ) ARY_INS(T->OQ[T->T.clms], t); // make resulted occ
391
+ }
392
+
393
+ // QUEUE_delivery (T->OQ, NULL, NULL, T->T.v, &T->OQ[T->T.clms], T->T.t, T->T.clms);
394
+ }
395
+
396
+ /*****************************************/
397
+ /* load transaction file and its weight */
398
+ /*****************************************/
399
+ void TRSACT_load (TRSACT *T, char *fname, char *fname2, char *wfname, char *wfname2, char *pfname){
400
+ FILE2 fp, fp2;
401
+ FILE_COUNT C = INIT_FILE_COUNT;
402
+ VEC_ID t=0;
403
+ int f;
404
+
405
+ FILE2_open (fp, fname, "r", "input-file open error", EXIT);
406
+ if ( fname2 ) FILE2_open (fp2, fname2, "r", "input-file2 open error", EXIT);
407
+ TRSACT_file_count (T, &C, &fp, wfname); if (ERROR_MES) goto END;
408
+ T->end1 = T->rows_org;
409
+ if ( fname2 ) TRSACT_file_count (T, &C, &fp2, wfname2); if (ERROR_MES) goto END;
410
+ f = TRSACT_alloc (T, pfname, &C); if (ERROR_MES) goto END;
411
+ TRSACT_file_read (T, &fp, &C, &t, f); if (ERROR_MES) goto END;
412
+ if ( fname2 ) TRSACT_file_read (T, &fp2, &C, &t, f); if (ERROR_MES) goto END;
413
+ TRSACT_sort (T, &C, f);
414
+
415
+ END:;
416
+ FILE2_close (&fp);
417
+ if (ERROR_MES) TRSACT_end (T);
418
+ return;
419
+ }
420
+
421
+ /* iteration of delivery; operate one transaction */
422
+ /* use OQ.end to count the number of items */
423
+ /* jump will be cleared (t := s) at the beginning */
424
+ void TRSACT_delivery_iter (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, VEC_ID t, QUEUE_INT m){
425
+ WEIGHT *y=0;
426
+ QUEUE_INT *x;
427
+ int f = T->flag&TRSACT_NEGATIVE;
428
+
429
+ if ( T->T.w ) y = T->T.w[t];
430
+ MQUE_MLOOP (T->T.v[t], x, m){
431
+ if ( T->OQ[*x].end == 0 ){ ARY_INS (*jump, *x); w[*x] = 0; if ( f ) pw[*x] = 0; }
432
+ T->OQ[*x].end++;
433
+ if ( y ){
434
+ w[*x] += *y; if ( *y>0 && f) pw[*x] += *y;
435
+ y++;
436
+ } else {
437
+ w[*x] += T->w[t]; if ( f ) pw[*x] += T->pw[t];
438
+ }
439
+ }
440
+ }
441
+
442
+ void TRSACT_delivery (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, QUEUE *occ, QUEUE_INT m){
443
+ VEC_ID i, t;
444
+ char *b = (char *)(occ?occ->v: NULL);
445
+ jump->t = jump->s;
446
+ FLOOP (i, occ?occ->s:0, occ?occ->t:T->T.t){
447
+ t = occ? *((QUEUE_INT *)b): i;
448
+ TRSACT_delivery_iter (T, jump, w, pw, t, m);
449
+ b += T->occ_unit;
450
+ }
451
+ }
452
+
453
+ /* usual delivery (make transpose) with checking sc
454
+ don't touch jump */
455
+ /* if (T->flag&TRSACT_DELIV_SC), do not stack to items e with non-zero T->sc[e] */
456
+ void TRSACT_deliv (TRSACT *T, QUEUE *occ, QUEUE_INT m){
457
+ VEC_ID i, t;
458
+ QUEUE_INT *x;
459
+ char *b = (char *)(occ?occ->v: NULL);
460
+ if ( T->flag&TRSACT_DELIV_SC ){
461
+ FLOOP (i, occ?occ->s:0, occ?occ->t:T->T.t){
462
+ t = occ? *((QUEUE_INT *)b): i;
463
+ MQUE_MLOOP (T->T.v[t], x, m)
464
+ if ( !T->sc[*x] ) ARY_INS (T->OQ[*x], t);
465
+ b += T->occ_unit;
466
+ }
467
+ } else {
468
+ FLOOP (i, occ?occ->s:0, occ?occ->t:T->T.t){
469
+ t = occ? *((QUEUE_INT *)b): i;
470
+ MQUE_MLOOP (T->T.v[t], x, m) ARY_INS (T->OQ[*x], t);
471
+ b += T->occ_unit;
472
+ }
473
+ }
474
+ }
475
+
476
+ /**************************************************************/
477
+ /* Find identical transactions in a subset of transactions, by radix-sort like method */
478
+ /* infrequent items (refer LCM_occ) and items larger than item_max are ignored */
479
+ /* INPUT: T:transactions, occ:subset of T represented by indices, result:array for output, item_max:largest item not to be ignored */
480
+ /* OUTPUT: if transactions i1, i2,..., ik are the same, they have same value in T->mark[i]
481
+ (not all) isolated transaction may have mark 1 */
482
+ /* use 0 to end-1 of QQ temporary, and QQ[i].t and QQ[i].s have to be 0. */
483
+ /*************************************************************************/
484
+ void TRSACT_find_same (TRSACT *T, QUEUE *occ, QUEUE_INT end){
485
+ VEC_ID mark=0, t_end;
486
+ QUEUE *o=occ, *Q = T->T.v, *EQ, *QQ = T->OQ;
487
+ QUEUE_INT *x, *y, e;
488
+ QUEUE_ID ot = occ->t;
489
+
490
+ // initialization
491
+ MQUE_FLOOP (*occ, x){ T->mark[*x] = mark; T->shift[*x] = Q[*x].v; }
492
+ T->jump.t = T->jump.s; QQ[T->T.clms].s = 0;
493
+
494
+ while (1){
495
+ if ( o->t - o->s == 1 ) T->mark[o->v[--o->t]] = 1; // no same transactions; mark by 1
496
+ if ( o->t == 0 ) goto END;
497
+ // if previously inserted transactions are in different group, then change their marks with incrementing mark by one
498
+ mark++; for (x=&o->v[o->s] ; x <&o->v[o->t] ; x++) T->mark[*x] = mark;
499
+ t_end = o->t;
500
+ o->s = o->t = 0;
501
+
502
+ // insert each t to buckets
503
+ for (x=o->v ; x<o->v+t_end ; x++){
504
+ // get next item in transaction t
505
+ do {
506
+ e = *(T->shift[*x]);
507
+ T->shift[*x]++;
508
+ if ( e >= end ){ e = T->T.clms; break; }
509
+ } while ( T->sc[e] );
510
+ EQ = &QQ[e];
511
+ // if previously inserted transactions are in different group, then change their mark to the transaction ID of top transacion.
512
+ y = &(EQ->v[EQ->s]);
513
+ if ( EQ->s < EQ->t && T->mark[*y] != T->mark[*x] ){
514
+ if ( EQ->t - EQ->s == 1 ) T->mark[EQ->v[--EQ->t]] = 1; // the tail of the queue has no same transaction; mark the tail by 1
515
+ else {
516
+ mark++; for ( ; y< EQ->v + EQ->t ; y++) T->mark[*y] = mark;
517
+ EQ->s = EQ->t;
518
+ }
519
+ } else if ( EQ->t == 0 && e<T->T.clms ) ARY_INS (T->jump, e);
520
+ ARY_INS (*EQ, *x); // insert t to bucket of e
521
+ }
522
+ END:;
523
+ if ( QUEUE_LENGTH_(T->jump) == 0 ) break;
524
+ o = &QQ[QUEUE_ext_tail_ (&T->jump)];
525
+ }
526
+
527
+ // same transactions are in queue of item_max
528
+ if ( QQ[T->T.clms].t -QQ[T->T.clms].s == 1 ) T->mark[QQ[T->T.clms].v[--QQ[T->T.clms].t]] = 1;
529
+ if ( occ != &QQ[T->T.clms] ) occ->t = ot;
530
+ }
531
+
532
+
533
+ /****************************************************************************/
534
+ /* copy transaction t to tt (only items i s.t. sc[i]==0) **/
535
+ /* T->w has to be allocated. itemweight will be alocated even if T->w[t] == NULL */
536
+ /****************************************************************************/
537
+ void TRSACT_copy (TRSACT *T, VEC_ID tt, VEC_ID t, QUEUE_INT end){
538
+ QUEUE_INT *x, *buf;
539
+ WEIGHT *wbuf = NULL, tw = T->w[t], *w = T->T.w? T->T.w[t]: NULL;
540
+ int bnum = T->buf.num, bblock = T->buf.block_num, wflag = (w || (T->flag&TRSACT_UNION));
541
+
542
+ buf = (QUEUE_INT *)BASE_get_memory (&T->buf, T->T.v[t].t+1);
543
+ if ( ERROR_MES ) return;
544
+ if ( wflag ) T->T.w[tt] = wbuf = (WEIGHT *)BASE_get_memory (&T->wbuf, T->T.v[t].t+1);
545
+ if ( ERROR_MES ){ T->buf.num = bnum; T->buf.block_num = bblock; return; }
546
+ T->T.v[tt].v = buf;
547
+ T->w[tt] = T->w[t];
548
+ if ( T->flag&TRSACT_NEGATIVE ) T->pw[tt] = T->pw[t];
549
+ MQUE_MLOOP (T->T.v[t], x, end){
550
+ if ( !T->sc[*x] ){
551
+ *buf = *x; buf++;
552
+ if ( wflag ){ *wbuf = w? *w: tw; wbuf++; }
553
+ }
554
+ if ( w ) w++;
555
+ }
556
+ T->T.v[tt].t = (VEC_ID)(buf - T->T.v[tt].v);
557
+ *buf = T->T.clms;
558
+ T->buf.num = (int)(buf - ((QUEUE_INT *)T->buf.base[T->buf.block_num]) + 1);
559
+ if ( wflag ) T->wbuf.num = (int)(wbuf - ((WEIGHT *)T->wbuf.base[T->wbuf.block_num]) + 1);
560
+ }
561
+
562
+ /****************************************************************************/
563
+ /* intersection of transaction t and tt (only items i s.t. sc[i]==0) **/
564
+ /* shift is the array of pointers indicates the start of each transaction **/
565
+ /****************************************************************************/
566
+ void TRSACT_suffix_and (TRSACT *T, VEC_ID tt, VEC_ID t){
567
+ QUEUE_INT *x=T->shift[tt], *y=T->shift[t], *xx=T->shift[tt];
568
+ while ( *x < T->T.clms && *y < T->T.clms ){
569
+ if ( *x > *y ) y++;
570
+ else {
571
+ if ( *x == *y ){
572
+ if ( !T->sc[*x] ){ *xx = *x; xx++; }
573
+ y++;
574
+ }
575
+ x++;
576
+ }
577
+ }
578
+ T->T.v[tt].t = (VEC_ID)(xx - T->T.v[tt].v);
579
+ *xx = T->T.clms;
580
+ T->buf.num = (int)(xx - ((QUEUE_INT *)T->buf.base[T->buf.block_num]) + 1);
581
+ }
582
+
583
+
584
+ /***************************************************************************/
585
+ /* take union of transaction t to tt (only items i s.t. pw[i]>=th) */
586
+ /* CAUSION: t has to be placed at the last of trsact_buf2. */
587
+ /* if the size of t inclreases, the following memory will be overwrited */
588
+ /* if memory (T->buf, T->wbuf) is short, do nothing and return 1 */
589
+ /* T->T.w[t] can be NULL, but T->T.w[x] can not */
590
+ /***************************************************************************/
591
+ void TRSACT_itemweight_union (TRSACT *T, VEC_ID tt, VEC_ID t){
592
+ int bnum = T->buf.num, bblock = T->buf.block_num;
593
+ QUEUE_ID siz = T->T.v[tt].t +T->T.v[t].t;
594
+ QUEUE_INT *xx_end = T->T.v[tt].v + siz, *xx = xx_end;
595
+ QUEUE_INT *x = T->T.v[tt].v + T->T.v[tt].t-1, *y = T->T.v[t].v + T->T.v[t].t-1;
596
+ WEIGHT *ww = T->T.w[tt] +siz, *wx = T->T.w[tt] +T->T.v[tt].t-1, *wy = T->T.w[t] +T->T.v[t].t-1;
597
+ WEIGHT tw = T->w[t];
598
+ int flag=0, wf = (T->T.w[t]!=NULL);
599
+
600
+ // if sufficiently large memory can not be taken from the current memory block, use the next block
601
+ if ( xx_end >= (QUEUE_INT *)T->buf.base[T->buf.block_num] +T->buf.block_siz ){
602
+ xx_end = xx = ((QUEUE_INT*)BASE_get_memory (&T->buf, T->buf.block_siz)) +siz;
603
+ if (ERROR_MES) return;
604
+ ww = ((WEIGHT *)BASE_get_memory (&T->wbuf, T->wbuf.block_siz)) +siz;
605
+ if ( ERROR_MES ){ T->buf.num = bnum; T->buf.block_num = bblock; return; }
606
+ flag =1;
607
+ }
608
+ if ( ERROR_MES ) return;
609
+
610
+ // take union and store it in the allocated memory
611
+ while ( x >= T->T.v[tt].v && y >= T->T.v[t].v ){
612
+ if ( *x > *y ){
613
+ if ( !T->sc[*x] ){ *xx = *x; *ww = *wx; xx--; ww--; }
614
+ x--; wx--;
615
+ if ( x < T->T.v[tt].v ){
616
+ while ( y >= T->T.v[t].v ){
617
+ if ( !T->sc[*y] ){ *xx = *y; *ww = wf? *wy: tw; xx--; ww--; }
618
+ y--; wy--;
619
+ }
620
+ }
621
+ } else {
622
+ if ( !T->sc[*y] ){
623
+ *ww = wf? *wy: tw; *xx = *y;
624
+ if ( *x == *y ){ *ww += *wx; x--; wx--; }
625
+ xx--; ww--;
626
+ }
627
+ y--; wy--;
628
+ if ( y < T->T.v[t].v ){
629
+ while ( x >= T->T.v[tt].v ){
630
+ if ( !T->sc[*x] ){ *xx = *x; *ww = *wx; xx--; ww--; }
631
+ x--; wx--;
632
+ }
633
+ }
634
+ }
635
+ }
636
+ T->T.v[tt].t = (VEC_ID)(xx_end -xx);
637
+
638
+ // if [tt].v will overflow, set [tt].v to the top of next memory block
639
+ if ( flag ){
640
+ if ( T->T.v[tt].v + T->T.v[tt].t+1 >= (QUEUE_INT *)T->buf.base[T->buf.block_num-1] +T->buf.block_siz ){
641
+ T->T.v[tt].v = (QUEUE_INT *)T->buf.base[T->buf.block_num];
642
+ T->T.w[tt] = (WEIGHT *)T->wbuf.base[T->wbuf.block_num];
643
+ } else { // new memory block is allocated, but the transaction fits in the previous block
644
+ T->buf.block_num--;
645
+ T->wbuf.block_num--;
646
+ }
647
+ }
648
+
649
+ // copy the union to the original position
650
+ for ( x=T->T.v[tt].v,wx=T->T.w[tt] ; xx<xx_end ; ){
651
+ xx++; ww++;
652
+ *x = *xx; *wx = *ww;
653
+ x++; wx++;
654
+ }
655
+ *x = T->T.clms;
656
+ T->wbuf.num = T->buf.num = (int)(x - ((QUEUE_INT *)T->buf.base[T->buf.block_num]) +1);
657
+ return;
658
+ }
659
+
660
+
661
+
662
+ /*****/
663
+ /* merge duplicated transactions in occ according to those having same value in T->mark
664
+ the mark except for the representative will be zero, for each group of the same transactions
665
+ the mark of the representative will be its (new) ID +2 (one is for identical transaction) */
666
+ /* T->flag&TRSACT_MAKE_NEW: make new trsact for representative
667
+ T->flag&TRSACT_INTSEC: take suffix intersection of the same trsacts
668
+ T->flag&TRSACT_UNION: take union of the same trsacts */
669
+ /* o will be cleard after the execution */
670
+ void TRSACT_merge_trsact (TRSACT *T, QUEUE *o, QUEUE_INT end){
671
+ VEC_ID mark = 0, tt=0;
672
+ QUEUE_INT *x;
673
+
674
+ MQUE_FLOOP (*o, x){
675
+ if ( mark == T->mark[*x] ){
676
+ T->mark[*x] = 0; // mark of unified (deleted) transaction
677
+ T->w[tt] += T->w[*x]; if ( T->pw ) T->pw[tt] += T->pw[*x];
678
+ if ( T->flag & TRSACT_INTSEC ){
679
+ TRSACT_suffix_and (T, tt, *x);
680
+ T->buf.num = (int)(T->T.v[tt].v - (QUEUE_INT *)T->buf.base[T->buf.block_num] +T->T.v[tt].t +1);
681
+ }
682
+ if ( T->flag & TRSACT_UNION ){
683
+ TRSACT_itemweight_union (T, tt, *x);
684
+ if ( ERROR_MES ) T->mark[*x] = *x+2; // do not merge if not enough memory
685
+ }
686
+ }
687
+ if ( mark != T->mark[*x] && T->mark[*x] > 1 ){ // *x is not the same to the previous, or memory short
688
+ mark = T->mark[*x];
689
+ if ( T->flag&TRSACT_MAKE_NEW ){
690
+ tt = T->new_t++;
691
+ TRSACT_copy (T, tt, *x, (T->flag&(TRSACT_INTSEC+TRSACT_UNION))? T->T.clms: end);
692
+ if ( ERROR_MES ){ T->new_t--; tt = *x; }
693
+ else for (T->shift[tt]=T->T.v[tt].v ; *(T->shift[tt])<end ; T->shift[tt]++);
694
+ } else tt = *x;
695
+ T->mark[*x] = tt+2;
696
+ }
697
+ }
698
+ o->t = o->s = 0;
699
+ }
700
+
701
+ /* remove the unified transactions from occ (consider T->occ_unit) */
702
+ void TRSACT_reduce_occ (TRSACT *T, QUEUE *occ){
703
+ QUEUE_INT *x, *y=occ->v;
704
+ QUEUE_ID i=0;
705
+ if ( T->occ_unit == sizeof(QUEUE_INT) ){
706
+ MQUE_FLOOP (*occ, x){
707
+ if ( T->mark[*x] == 0 ) continue;
708
+ *y = T->mark[*x]>1? T->mark[*x]-2: *x;
709
+ y++; i++;
710
+ }
711
+ } else {
712
+ MQUE_FLOOP_ (*occ, x, T->occ_unit){
713
+ if ( T->mark[*x] == 0 ) continue;
714
+ memcpy (y, x, T->occ_unit);
715
+ *y = T->mark[*x]>1? T->mark[*x]-2: *x;
716
+ y = (QUEUE_INT *)(((char *)y)+T->occ_unit);
717
+ i++;
718
+ }
719
+ }
720
+ occ->t = i;
721
+ }
722
+
723
+ #endif