nysol-zdd 3.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/ext/zdd_so/BDD.cc +495 -0
  3. data/ext/zdd_so/BDD.h +356 -0
  4. data/ext/zdd_so/BDDDG.cc +1818 -0
  5. data/ext/zdd_so/BDDDG.h +107 -0
  6. data/ext/zdd_so/BDDHASH.cc +91 -0
  7. data/ext/zdd_so/BtoI.cc +503 -0
  8. data/ext/zdd_so/BtoI.h +144 -0
  9. data/ext/zdd_so/CtoI.cc +1072 -0
  10. data/ext/zdd_so/CtoI.h +186 -0
  11. data/ext/zdd_so/MLZBDDV.cc +153 -0
  12. data/ext/zdd_so/MLZBDDV.h +42 -0
  13. data/ext/zdd_so/SOP.cc +608 -0
  14. data/ext/zdd_so/SOP.h +199 -0
  15. data/ext/zdd_so/ZBDD.cc +1035 -0
  16. data/ext/zdd_so/ZBDD.h +243 -0
  17. data/ext/zdd_so/ZBDDDG.cc +1834 -0
  18. data/ext/zdd_so/ZBDDDG.h +105 -0
  19. data/ext/zdd_so/ZBDDHASH.cc +91 -0
  20. data/ext/zdd_so/bddc.c +2816 -0
  21. data/ext/zdd_so/bddc.h +132 -0
  22. data/ext/zdd_so/extconf.rb +25 -0
  23. data/ext/zdd_so/include/aheap.c +211 -0
  24. data/ext/zdd_so/include/aheap.h +111 -0
  25. data/ext/zdd_so/include/base.c +93 -0
  26. data/ext/zdd_so/include/base.h +60 -0
  27. data/ext/zdd_so/include/itemset.c +473 -0
  28. data/ext/zdd_so/include/itemset.h +153 -0
  29. data/ext/zdd_so/include/problem.c +371 -0
  30. data/ext/zdd_so/include/problem.h +160 -0
  31. data/ext/zdd_so/include/queue.c +518 -0
  32. data/ext/zdd_so/include/queue.h +177 -0
  33. data/ext/zdd_so/include/sgraph.c +331 -0
  34. data/ext/zdd_so/include/sgraph.h +170 -0
  35. data/ext/zdd_so/include/stdlib2.c +832 -0
  36. data/ext/zdd_so/include/stdlib2.h +746 -0
  37. data/ext/zdd_so/include/trsact.c +723 -0
  38. data/ext/zdd_so/include/trsact.h +167 -0
  39. data/ext/zdd_so/include/vec.c +583 -0
  40. data/ext/zdd_so/include/vec.h +159 -0
  41. data/ext/zdd_so/lcm-vsop.cc +596 -0
  42. data/ext/zdd_so/print.cc +683 -0
  43. data/ext/zdd_so/table.cc +330 -0
  44. data/ext/zdd_so/vsop.h +88 -0
  45. data/ext/zdd_so/zdd_so.cpp +3277 -0
  46. data/lib/nysol/zdd.rb +31 -0
  47. metadata +131 -0
@@ -0,0 +1,167 @@
1
+ /* QUEUE based Transaction library, including database reduction.
2
+ 25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, do not forget to
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about the codes for the users.
11
+ For the commercial use, please make a contact to Takeaki Uno. */
12
+
13
+ #ifndef _trsact_h_
14
+ #define _trsact_h_
15
+
16
+ // #define WEIGHT double
17
+ // #define WEIGHT_DOUBLE
18
+
19
+ #include"vec.h"
20
+ #include"base.h"
21
+
22
+ #ifndef WEIGHT
23
+ #define WEIGHT int
24
+ #ifdef WEIGHT_DOUBLE
25
+ #undef WEIGHT_DOUBLE
26
+ #endif
27
+ #endif
28
+
29
+ typedef struct {
30
+ unsigned char type; // mark to identify type of the structure
31
+ SETFAMILY T; // transaction
32
+ int flag; // flag
33
+ WEIGHT *w, *pw; // weight/positive-weight of transactions
34
+
35
+ QUEUE_INT clms_org, clm_max, clms_end, non_empty_clms; // #items in original file, max size of clms, and max of (original item, internal item)
36
+ VEC_ID rows_org, row_max; // #transactions in the original file
37
+ VEC_ID end1, sep; // #trsact in 1st file, the ID of the last permed trsact of 1st file
38
+ size_t eles_org; // #elements in the original file
39
+ WEIGHT total_w, total_pw, total_w_org, total_pw_org;
40
+ WEIGHT th; // threshold for frequency of items
41
+ PERM *perm, *trperm; // original item permutation loaded from permutation file (and inverse)
42
+
43
+ // lower/upper bound of #elements in a column/row. colunmn or row of out of range will be ignored
44
+ VEC_ID clm_lb, clm_ub;
45
+ QUEUE_ID row_lb, row_ub;
46
+ WEIGHT w_lb, w_ub;
47
+
48
+ VEC_ID str_num; // number of database (itemset stream/string datasets) in T
49
+ VEC_ID *head, *strID; // the head (beginning) of each stream, stream ID of each transaction
50
+ int occ_unit;
51
+
52
+ // for finding same transactions
53
+ QUEUE jump, *OQ; // queue of non-empty buckets, used in find_same_transactions
54
+ VEC_ID *mark; // marks for transactions
55
+ QUEUE_INT **shift; // memory for shift positions of each transaction
56
+ char *sc; // flag for non-active (in-frequent) items
57
+
58
+ // for extra transactions
59
+ VEC_ID new_t; // the start ID of un-used transactions
60
+ BASE buf; // buffer for transaction
61
+ BASE wbuf; // buffer for itemweights
62
+ } TRSACT;
63
+
64
+ #define TRSACT_FRQSORT 65536 // sort transactions in decreasing order
65
+ #define TRSACT_ITEMWEIGHT 131072 // initialize itemweight by transaction weights
66
+ #define TRSACT_SHRINK 262144 // do not allocate memory for shrink, but do for mining
67
+ #define TRSACT_MULTI_STREAM 524288 // separate the datasets at each empty transaction
68
+ #define TRSACT_UNION 1048576 // take union of transactions, at the database reduction
69
+ #define TRSACT_INTSEC 2097152 // take intersection of transactions, at the database reduction
70
+ #define TRSACT_MAKE_NEW 4194304 // make new transaction for each
71
+ #define TRSACT_ALLOC_OCC 8388608 // make new transaction for each
72
+ #define TRSACT_DELIV_SC 16777216 // look T->sc when delivery
73
+ #define TRSACT_NEGATIVE 33554432 // flag for whether some transaction weights are negative or not
74
+ //#define TRSACT_INIT_SHRINK 65536 // allocate memory for database reduction
75
+ #define TRSACT_WRITE_PERM 67108864 // write item-order to file
76
+
77
+ #ifndef TRSACT_DEFAULT_WEIGHT
78
+ #define TRSACT_DEFAULT_WEIGHT 0 // default weight of the transaction, for missing weights in weight file
79
+ #endif
80
+
81
+ /* print transactions */
82
+ void TRSACT_print (TRSACT *T, QUEUE *occ, PERM *p);
83
+ void TRSACT_prop_print (TRSACT *T);
84
+
85
+ /**************************************************************/
86
+ void TRSACT_init (TRSACT *T);
87
+
88
+ /**************************************************************/
89
+ void TRSACT_end (TRSACT *T);
90
+
91
+ /*****************************************/
92
+ /* scan file "fp" with weight file wfp and count #items, #transactions in the file. */
93
+ /* count weight only if wfp!=NULL */
94
+ /* T->rows_org, clms_org, eles_org := #items, #transactions, #all items */
95
+ /* ignore the transactions of size not in range T->clm_lb - clm_ub */
96
+ /* T->total_w, total_pw := sum of (positive) weights of transactions */
97
+ /* C.clmt[i],C.cw[i] := the number/(sum of weights) of transactions including i */
98
+ /****************************************/
99
+ void TRSACT_file_count (TRSACT *T, FILE_COUNT *C, FILE2 *fp, char *wf);
100
+
101
+ /* allocate memory, set permutation, and free C.clmt,rowt,rw,cw */
102
+ int TRSACT_alloc (TRSACT *T, char *pfname, FILE_COUNT *C);
103
+
104
+ /* load the file to allocated memory according to permutation, and free C.rw, C.cw */
105
+ void TRSACT_file_read (TRSACT *T, FILE2 *fp, FILE_COUNT *C, VEC_ID *t, int flag);
106
+
107
+ /*****************************************/
108
+ /* load transaction file to TRSACT */
109
+ void TRSACT_load (TRSACT *T, char *fname, char *fname2, char *wfname, char *wfname2, char *pfname);
110
+
111
+ /* occurrence deliver (only counting) */
112
+ /* WARNING: next cell of the last item of each transaction must be INTHUGE */
113
+ /* compute occurrence for items less than max item, in the database induced
114
+ by occ */
115
+ /* if jump!=0, all i with non-zero occ[i].t will be inserted to jump */
116
+ /* be careful for overflow of jump */
117
+ /* if occ==NULL, scan all transactions */
118
+ /* flag&1: count only positive weights */
119
+ void TRSACT_delivery_iter (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, VEC_ID t, QUEUE_INT m);
120
+ void TRSACT_delivery (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, QUEUE *occ, QUEUE_INT m);
121
+ // QUEUE *TRSACT_alloc_occ (TRSACT *T, QUEUE_INT end);
122
+ //QUEUE_ID TRSACT_occ_dup (SETFAMILY *S, QUEUE *OQ, QUEUE *jump, WEIGHT *occ_w, WEIGHT *occ_pw);
123
+
124
+ /**************************************************************/
125
+ /* Find identical transactions in a subset of transactions, by radix-sort like method */
126
+ /* infrequent items (refer LCM_occ) and items larger than item_max are ignored */
127
+ /* INPUT: T:transactions, occ:subset of T represented by indices, result:array for output, item_max:largest item not to be ignored */
128
+ /* OUTPUT: if transactions i1, i2,..., ik are the same, they have same value in T->mark[i]
129
+ (not all) isolated transaction may have mark 1 */
130
+ /* use 0 to end-1 of QQ temporary, and QQ[i].t and QQ[i].s have to be 0. */
131
+ /*************************************************************************/
132
+ void TRSACT_find_same (TRSACT *T, QUEUE *occ, QUEUE_INT end);
133
+
134
+ /* copy transaction t to tt (only items i s.t. pw[i]>=th) **/
135
+ void TRSACT_copy (TRSACT *T, VEC_ID tt, VEC_ID t, QUEUE_INT end);
136
+
137
+ /* intersection of transaction t and tt (only items i s.t. pw[i]>=th) **/
138
+ /* shift is the array of pointers indicates the start of each transaction **/
139
+ void TRSACT_suffix_and (TRSACT *T, VEC_ID tt, VEC_ID t);
140
+
141
+ /* take union of transaction t to tt (only items i s.t. pw[i]>=th) */
142
+ /* CAUSION: t has to be placed at the last of trsact_buf2. */
143
+ /* if the size of t inclreases, the following memory will be overwrited */
144
+ /* if memory (T->buf) is short, do nothing and return 1 */
145
+ void TRSACT_itemweight_union (TRSACT *T, VEC_ID tt, VEC_ID t);
146
+
147
+
148
+ /*****/
149
+ /* remove duplicated transactions from occ, and add the weight of the removed trsacts to the representative one */
150
+ /* duplicated trsacts are in occ[item_max]. Clear the queue when return */
151
+ /* T->flag&TRSACT_MAKE_NEW: make new trsact for representative
152
+ T->flag&TRSACT_INTSEC: take suffix intersection of the same trsacts
153
+ T->flag&TRSACT_UNION: take union of the same trsacts */
154
+ void TRSACT_merge_trsact (TRSACT *T, QUEUE *o, QUEUE_INT end);
155
+
156
+ /* remove the unified transactions from occ (consider T->occ_unit) */
157
+ void TRSACT_reduce_occ (TRSACT *T, QUEUE *occ);
158
+
159
+ #ifdef _alist_h_
160
+
161
+ /* occurrence deliver (only counting), for MALIST */
162
+ //void TRSACT_MALIST_delivery (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, MALIST *occ, ALIST_ID l, QUEUE_INT m);
163
+ //void TRSACT_MALIST_occ_deliver (TRSACT *TT, MALIST *occ, int l, int item_max);
164
+
165
+ #endif
166
+
167
+ #endif
@@ -0,0 +1,583 @@
1
+ /* library for vector and sparse vector, and matrix */
2
+ /* Takeaki Uno 27/Dec/2008 */
3
+
4
+ #ifndef _vec_c_
5
+ #define _vec_c_
6
+
7
+ #include"vec.h"
8
+ #include"stdlib2.c"
9
+ #include"queue.c"
10
+
11
+ MAT INIT_MAT = {TYPE_MAT,NULL,0,0,NULL,NULL,0,0,0};
12
+ SVEC INIT_SVEC_ELE = {0,0};
13
+ SVEC INIT_SVEC = {TYPE_SVEC,NULL,0,0};
14
+ SMAT INIT_SMAT = {TYPE_SMAT,NULL,0,0,NULL,NULL,0,0,0,0};
15
+ SETFAMILY INIT_SETFAMILY = INIT_SETFAMILY_;
16
+
17
+ QSORT_TYPE (SVEC_VAL, SVEC_VAL)
18
+ QSORT_TYPE (SVEC_VAL2, SVEC_VAL2)
19
+
20
+ /* allocate memory according to rows and rowt */
21
+ void VEC_alloc (VEC *V, VEC_ID clms){
22
+ *V = INIT_VEC;
23
+ V->end = clms;
24
+ calloc2 (V->v, clms+1, "VEC_alloc: V->v", EXIT);
25
+ }
26
+
27
+ /* terminate routine for VEC */
28
+ void VEC_end (VEC *V){
29
+ free2 (V->v);
30
+ *V = INIT_VEC;
31
+ }
32
+
33
+ /* allocate memory according to rows and rowt */
34
+ void MAT_alloc (MAT *M, VEC_ID rows, VEC_ID clms){
35
+ VEC_ID i;
36
+ calloc2 (M->v, rows+1, "MAT_alloc: M->v", EXIT);
37
+ calloc2 (M->buf, (clms+1) * (rows+1), "MAT_alloc: M->v", {free(M->v);EXIT;});
38
+ M->end = rows;
39
+ M->clms = clms;
40
+ FLOOP (i, 0, rows){
41
+ M->v[i].end = M->v[i].t = clms;
42
+ M->v[i].v = M->buf + i*(clms+1);
43
+ }
44
+ }
45
+
46
+ /* terminate routine for MAT */
47
+ void MAT_end (MAT *M){
48
+ free2 (M->buf);
49
+ free2 (M->buf2);
50
+ free2 (M->v);
51
+ *M = INIT_MAT;
52
+ }
53
+
54
+ /* allocate memory */
55
+ void SVEC_alloc (SVEC *V, VEC_ID end){
56
+ *V = INIT_SVEC;
57
+ calloc2 (V->v, end+1, "SVEC_alloc: V->v", EXIT);
58
+ V->end = end;
59
+ V->t = 0;
60
+ }
61
+
62
+ /* terminate routine for SVEC */
63
+ void SVEC_end (SVEC *V){
64
+ free2 (V->v);
65
+ *V = INIT_SVEC;
66
+ }
67
+
68
+ /* allocate memory according to rows and rowt */
69
+ void SMAT_alloc (SMAT *M, VEC_ID rows, VEC_ID *rowt, VEC_ID clms, size_t eles){
70
+ VEC_ID i;
71
+ if ( eles == 0 ) ARY_SUM (M->ele_end, rowt, 0, rows); else M->ele_end = eles;
72
+ calloc2 (M->buf, M->ele_end*((M->flag&LOAD_DBLBUF)?2:1) +rows +2, "SMAT_alloc: buf", EXIT);
73
+ malloc2 (M->v, rows+1, "SMAT_alloc: M->v", {free(M->buf);EXIT;});
74
+ ARY_FILL (M->v, 0, rows, INIT_SVEC);
75
+ M->end = rows;
76
+ M->clms = clms;
77
+ if ( rowt ){
78
+ FLOOP (i, 0, rows){
79
+ M->v[i].v = i? M->v[i-1].v + rowt[i-1] +1: M->buf;
80
+ M->v[i].end = rowt[i];
81
+ }
82
+ }
83
+ }
84
+
85
+ /* terminate routine for MAT */
86
+ void SMAT_end (SMAT *M){
87
+ free2 (M->buf);
88
+ free2 (M->buf2);
89
+ free2 (M->v);
90
+ *M = INIT_SMAT;
91
+ }
92
+
93
+
94
+
95
+ /* allocate memory according to rows and rowt */
96
+ /* if eles == 0, compute eles from rowt and rows */
97
+ void SETFAMILY_alloc (SETFAMILY *M, VEC_ID rows, VEC_ID *rowt, VEC_ID clms, size_t eles){
98
+ VEC_ID i;
99
+ char *buf;
100
+ if ( eles == 0 ) ARY_SUM (M->ele_end, rowt, 0, rows); else M->ele_end = eles;
101
+ calloc2 (buf, (M->ele_end*((M->flag&LOAD_DBLBUF)?2:1) +((M->flag&LOAD_DBLBUF)?MAX(rows,clms):rows)+2)*M->unit, "SETFAMILY_alloc: buf", EXIT);
102
+ M->buf = (QUEUE_INT *)buf;
103
+ malloc2 (M->v, rows+1, "SETFAMILY_alloc: M->v", {free(M->buf);EXIT;});
104
+ ARY_FILL (M->v, 0, rows, INIT_QUEUE);
105
+ M->end = rows;
106
+ M->clms = clms;
107
+ if ( rowt ){
108
+ FLOOP (i, 0, rows){
109
+ M->v[i].v = (QUEUE_INT *)buf;
110
+ buf += (rowt[i] +1)*M->unit;
111
+ M->v[i].end = rowt[i]+1;
112
+ }
113
+ }
114
+ }
115
+
116
+ /* allocate memory according to rows and rowt */
117
+ /* if eles == 0, compute eles from rowt and rows */
118
+ void SETFAMILY_alloc_weight (SETFAMILY *M){
119
+ VEC_ID i;
120
+ calloc2 (M->w, M->end +1, "SETFAMILY_alloc_weight: w", EXIT);
121
+ calloc2 (M->wbuf, M->ele_end*((M->flag&LOAD_DBLBUF)?2:1)+1, "SETFAMILY_alloc_weight: *w", {free(M->w);EXIT;});
122
+ FLOOP (i, 1, M->t) M->w[i] = i? M->w[i-1] + M->v[i-1].t: M->wbuf;
123
+ }
124
+
125
+ /* terminate routine for MAT */
126
+ void SETFAMILY_end (SETFAMILY *M){
127
+ free2 (M->buf);
128
+ free2 (M->buf2);
129
+ free2 (M->v);
130
+ free2 (M->wbuf);
131
+ free2 (M->w);
132
+ *M = INIT_SETFAMILY;
133
+ }
134
+
135
+ /****************************************************************/
136
+ /****************************************************************/
137
+ /****************************************************************/
138
+
139
+ /* read binary file for MAT */
140
+ /* each unit-byte will be one number. if unit<0, the sign of unit is flipped, and each value is minesed the half of the maximum */
141
+ void MAT_load_bin (MAT *M, FILE2 *fp, int unit){
142
+ VEC_ID flag=0, i, j, jj;
143
+ size_t siz=0;
144
+ VEC_VAL z, neg=0;
145
+
146
+ if ( unit < 0 ){
147
+ unit = -unit; flag = 1; neg=128;
148
+ FLOOP (jj, 0, unit-1) neg *= 256;
149
+ }
150
+ if ( M->t == 0 ){ // determine #rows if M->t is 0 (not specified)
151
+ fseek(fp->fp, 0, SEEK_END);
152
+ siz = ftell(fp->fp);
153
+ fseek(fp->fp, 0, SEEK_SET);
154
+ M->t = (VEC_ID)(siz / unit / M->clms);
155
+ if ( M->flag & LOAD_TPOSE ) SWAP_VEC_ID (M->t, M->clms);
156
+ }
157
+ MAT_alloc (M, M->t, M->clms); if (ERROR_MES) return;
158
+ M->end = M->t;
159
+ FLOOP (i, 0, M->t){
160
+ FLOOP (j, 0, M->clms){
161
+ z=0; FLOOP (jj, 0, unit){ z *= 256; z += FILE2_getc (fp); }
162
+ if ( flag ) z -= neg;
163
+ if ( M->flag & LOAD_TPOSE ) M->v[j].v[i] = z;
164
+ else M->v[i].v[j] = z;
165
+ }
166
+ }
167
+ }
168
+
169
+ /* segmentation fault for illegal files */
170
+ /* count/read the number in file for MAT */
171
+ /* if *rows>0, only read count the numbers in a row, for the first scan. */
172
+ void MAT_file_load (MAT *M, FILE2 *fp){
173
+ QUEUE_ID c;
174
+ VEC_ID t=0;
175
+ double p;
176
+
177
+ for ( t=0 ; (FILE_err&2)==0 ; t++){
178
+ ARY_SCAN (c, double, *fp, 0);
179
+ if ( M->flag & LOAD_TPOSE ){
180
+ if ( M->t == 0 ){ M->t = c; if ( M->clms>0 ) break; }
181
+ } else if ( M->clms == 0 ){ M->clms = c; if ( M->t>0 ) break; }
182
+ }
183
+ if ( M->flag & LOAD_TPOSE ){ if ( M->clms==0 ) M->clms = t;} else if ( M->t==0 ) M->t = t;
184
+ FILE2_reset (fp);
185
+ M->end = M->t;
186
+ MAT_alloc (M, M->t, M->clms); if (ERROR_MES) return;
187
+ FLOOP (t, 0, M->t ){
188
+ FLOOP (c, 0, M->clms){
189
+ p = FILE2_read_double(fp);
190
+ if ( M->flag&LOAD_TPOSE ) M->v[c].v[t] = p;
191
+ else M->v[t].v[c] = p;
192
+ if ( c>= ((M->flag&LOAD_TPOSE)? M->t: M->clms) ) break;
193
+ }
194
+ if ( !FILE_err ) FILE2_read_until_newline (fp);
195
+ if ( c>= ((M->flag&LOAD_TPOSE)? M->clms: M->t) ) break;
196
+ }
197
+ }
198
+
199
+ /* load file with switching the format according to the flag */
200
+ void MAT_load (MAT *M, char *fname){
201
+ FILE2 fp;
202
+ int unit=0;
203
+ #ifdef USE_MATH
204
+ VEC_ID i;
205
+ #endif
206
+ if ( M->flag & VEC_LOAD_BIN ) unit = 1;
207
+ else if ( M->flag & VEC_LOAD_BIN2 ) unit = 2;
208
+ else if ( M->flag & VEC_LOAD_BIN4 ) unit = 4;
209
+ if ( M->flag & VEC_LOAD_CENTERIZE ) unit = -unit;
210
+
211
+ FILE2_open (fp, fname, "rb", "MAT_load", EXIT);
212
+ if ( unit ) MAT_load_bin (M, &fp, unit);
213
+ else MAT_file_load (M, &fp);
214
+ FILE2_close (&fp); if (ERROR_MES) EXIT;
215
+ #ifdef USE_MATH
216
+ if ( M->flag&VEC_NORMALIZE ) FLOOP (i, 0, M->t) ARY_NORMALIZE (M->v[i].v,M->v[i].t);
217
+ #endif
218
+ }
219
+
220
+
221
+ /* scan file and read the numbers for SMAT */
222
+ /* flag&1? SMAT, SETFAMILY, flag&2? tuple list format: array list :*/
223
+ void SMAT_file_load (SMAT *M, FILE2 *fp){
224
+ SVEC_VAL z=0;
225
+ VEC_ID flag= (M->type==TYPE_SMAT), t, x, y;
226
+ FILE_COUNT C;
227
+
228
+ C = FILE2_count (fp, (M->flag&(LOAD_ELE+LOAD_TPOSE)) | FILE_COUNT_ROWT, 0, 0, 0, 0, 0);
229
+ if ( M->clms == 0 ) M->clms = C.clms;
230
+ if ( M->t == 0 ) M->t = C.rows;
231
+ if ( flag ) SMAT_alloc (M, M->t, C.rowt, M->clms, 0);
232
+ else SETFAMILY_alloc ((SETFAMILY *)M, M->t, C.rowt, M->clms, 0);
233
+ free2 (C.rowt);
234
+ if ( ERROR_MES ) return;
235
+ FILE2_reset (fp);
236
+ t=0;
237
+ do {
238
+ if ( M->flag&LOAD_ELE ){
239
+ x = (VEC_ID)FILE2_read_int (fp);
240
+ y = (VEC_ID)FILE2_read_int (fp);
241
+ if ( flag ) z = FILE2_read_double (fp);
242
+ if ( FILE_err&4 ) goto LOOP_END2;
243
+ FILE2_read_until_newline (fp);
244
+ } else {
245
+ x = t;
246
+ y = (VEC_ID)FILE2_read_int (fp);
247
+ if ( FILE_err&4 ) goto LOOP_END2;
248
+ if ( flag ) z = FILE2_read_double (fp);
249
+ }
250
+ if ( M->flag&LOAD_TPOSE ) SWAP_VEC_ID (x, y);
251
+ // printf ("%d %d %d %d\n", x, M->t, y, M->clms);
252
+ if ( y >= M->clms || x >= M->t ) goto LOOP_END2;
253
+ // printf ("## %d %d\n", x, y);
254
+ if ( flag ){
255
+ M->v[x].v[M->v[x].t].i = y;
256
+ M->v[x].v[M->v[x].t].a = z;
257
+ M->v[x].t++;
258
+ } else ARY_INS (((SETFAMILY *)M)->v[x], y);
259
+ LOOP_END2:;
260
+ if ( !(M->flag&LOAD_ELE) && (FILE_err&3) ){ t++; if ( t >= M->t ) break; }
261
+ } while ( (FILE_err&2)==0 );
262
+ }
263
+
264
+ /* scan file and read the numbers for SMAT */
265
+ /* flag&1? SMAT, SETFAMILY, flag&2? tuple list format: array list :*/
266
+ void SETFAMILY_load_weight (SETFAMILY *M, char *fname){
267
+ FILE2 fp;
268
+ VEC_ID i;
269
+ QUEUE_ID j;
270
+ if ( M->flag&LOAD_TPOSE ) error ("transope and weight can't be specified simultaneously", EXIT);
271
+ FILE2_open (fp, fname, "r", "SETFAMILY_load_weight", EXIT);
272
+ SETFAMILY_alloc_weight (M);
273
+ FLOOP (i, 0, M->t){
274
+ FLOOP (j, 0, M->v[i].t)
275
+ M->w[i][j] = (WEIGHT)FILE2_read_double (&fp);
276
+ FILE2_read_until_newline (&fp);
277
+ }
278
+ }
279
+
280
+
281
+ /* load file with switching the format according to the flag */
282
+ void SMAT_load (SMAT *M, char *fname){
283
+ FILE2 fp;
284
+ VEC_ID i;
285
+ M->type = TYPE_SMAT;
286
+ FILE2_open (fp, fname, "r", "SMAT_load", EXIT);
287
+ SMAT_file_load (M, &fp);
288
+ FILE2_close (&fp); if (ERROR_MES) EXIT;
289
+ FLOOP (i, 0, M->t) M->v[i].v[M->v[i].t].i = M->clms; // end mark
290
+
291
+ #ifdef USE_MATH
292
+ if ( M->flag&VEC_NORMALIZE ) FLOOP (i, 0, M->t) SVEC_normalize (&M->v[i]); // normalize
293
+ #endif
294
+ if (M->flag&LOAD_INCSORT)
295
+ FLOOP (i, 0, M->t) qsort_VEC_ID ((VEC_ID *)(M->v[i].v), M->v[i].t, sizeof(SVEC_ELE));
296
+ if (M->flag&LOAD_DECSORT)
297
+ FLOOP (i, 0, M->t) qsort_VEC_ID ((VEC_ID *)(M->v[i].v), M->v[i].t, -(int)sizeof(SVEC_ELE));
298
+ if (M->flag&LOAD_RM_DUP)
299
+ FLOOP (i, 0, M->t) MQUE_UNIFY (M->v[i], SVEC_VAL);
300
+ M->eles = M->ele_end;
301
+ }
302
+
303
+ /* sort and duplication check */
304
+ void SETFAMILY_sort (SETFAMILY *M){
305
+ VEC_ID i;
306
+ PERM *p;
307
+ WEIGHT *ww;
308
+ QUEUE Q;
309
+ int flag = (M->flag&LOAD_INCSORT)? 1: ((M->flag&LOAD_DECSORT)? -1: 0);
310
+ if ( flag ){ // sort items in each row
311
+ malloc2 (p, M->clms, "SETFAMILY_sort: p", EXIT);
312
+ FLOOP (i, 0, M->t)
313
+ QUEUE_perm_WEIGHT (&M->v[i], M->w?M->w[i]:NULL, p, flag);
314
+ free (p);
315
+ }
316
+ flag = ((M->flag&LOAD_SIZSORT)? ((M->flag&LOAD_DECROWSORT)? -1: 1): 0) *sizeof(QUEUE);
317
+ if ( flag ){ // sort the rows
318
+ p = qsort_perm_VECt ((VEC *)M->v, M->t, flag);
319
+ ARY_INVPERMUTE_ (M->w, p, ww, M->t);
320
+ ARY_INVPERMUTE (M->v, p, Q, M->t, "SETFAMILY_sort: ARY_INVPERMUTE", EXIT);
321
+ free (p);
322
+ }
323
+ if (M->flag&LOAD_RM_DUP){ // unify the duplicated edges
324
+ FLOOP (i, 0, M->t)
325
+ QUEUE_rm_dup_WEIGHT (&M->v[i], M->w?M->w[i]:NULL);
326
+ }
327
+ }
328
+
329
+ /* scan file and load the data from file to SMAT structure */
330
+ void SETFAMILY_load (SETFAMILY *M, char *fname, char *wfname){
331
+ FILE2 fp;
332
+ VEC_ID i;
333
+ M->type = TYPE_SETFAMILY;
334
+ FILE2_open (fp, fname, "r", "SETFAMILY_load", EXIT);
335
+ SMAT_file_load ((SMAT *)M, &fp);
336
+ FILE2_close (&fp); if(ERROR_MES) EXIT;
337
+ FLOOP (i, 0, M->t) M->v[i].v[M->v[i].t] = M->clms; // end mark
338
+
339
+ if ( !(M->flag&LOAD_ELE) && wfname ){
340
+ SETFAMILY_load_weight (M, wfname);
341
+ if ( ERROR_MES ){ SETFAMILY_end (M); EXIT; }
342
+ }
343
+
344
+ SETFAMILY_sort (M);
345
+ M->eles = M->ele_end;
346
+ }
347
+
348
+ /* print routines */
349
+ void MAT_print (FILE *fp, MAT *M){
350
+ VEC *V;
351
+ MQUE_FLOOP (*M, V) ARY_FPRINT (fp, V->v, 0, V->t, VEC_VALF" ");
352
+ }
353
+ void SVEC_print (FILE *fp, SVEC *V){
354
+ SVEC_ELE *x;
355
+ MQUE_FLOOP (*V, x) fprintf (fp, "("QUEUE_IDF","SVEC_VALF") ", (*x).i, (*x).a);
356
+ fputc ('\n', fp);
357
+ }
358
+ void SMAT_print (FILE *fp, SMAT *M){
359
+ SVEC *V;
360
+ MQUE_FLOOP (*M, V) SVEC_print (fp, V);
361
+ }
362
+ void SETFAMILY_print (FILE *fp, SETFAMILY *M){
363
+ QUEUE *V;
364
+ MQUE_FLOOP (*M, V) ARY_FPRINT (fp, V->v, 0, V->t, QUEUE_INTF" ");
365
+ }
366
+
367
+ /*
368
+ void SETFAMILY_print_WEIGHT (FILE *fp, SETFAMILY *M){
369
+ if ( M->w ){
370
+ printf (","); fprint_WEIGHT (stdout, M->w[i][j]); }
371
+ printf ("\n");
372
+ }
373
+ */
374
+
375
+ /****************************************************************/
376
+ /** Inner product routines **************************************/
377
+ /****************************************************************/
378
+ SVEC_VAL2 SVEC_inpro (SVEC *V1, SVEC *V2){
379
+ VEC_ID i1, i2=0;
380
+ SVEC_VAL2 sum=0;
381
+ FLOOP (i1, 0, V1->t){
382
+ while (V2->v[i2].i < V1->v[i1].i) i2++;
383
+ if (V2->v[i2].i == V1->v[i1].i) sum += ((SVEC_VAL2)V2->v[i2].a)*V1->v[i1].a;
384
+ }
385
+ return (sum);
386
+ }
387
+
388
+
389
+ /* get ith vector */
390
+ void *MVEC_getvec (void *M, int i, int flag){
391
+ MAT *MM = (MAT *)M;
392
+ if (MM->type==TYPE_MAT) return (&MM->v[i]);
393
+ if (MM->type==TYPE_SMAT) return (&((SVEC *)M)->v[i]);
394
+ if (MM->type==TYPE_SETFAMILY) return (&((QUEUE *)M)->v[i]);
395
+ return (NULL);
396
+ }
397
+
398
+ #ifdef USE_MATH
399
+
400
+ /****************************************************************/
401
+ /** Norm computation and normalization ************************/
402
+ /****************************************************************/
403
+ double SVEC_norm (SVEC *V){
404
+ SVEC_ELE *v;
405
+ double sum=0;
406
+ MQUE_FLOOP (*V, v) sum += ((double)(v->a)) * (v->a);
407
+ return (sqrt(sum));
408
+ }
409
+ void SVEC_normalize (SVEC *V){
410
+ SVEC_ELE *v;
411
+ double norm = SVEC_norm (V);
412
+ MQUE_FLOOP (*V, v) v->a /= norm;
413
+ }
414
+
415
+ /****************************************************************/
416
+ /** Euclidean distance routines *********************************/
417
+ /****************************************************************/
418
+
419
+ /* compute the inner product of two vectors */
420
+ double VEC_eucdist (VEC *V1, VEC *V2){
421
+ VEC_ID i, end=MIN(V1->end,V2->end);
422
+ double sum=0, a0, a1, a2, a3;
423
+ for (i=0 ; i<end ; i+=4){
424
+ a0 = ((double)V1->v[i])- ((double)V2->v[i]);
425
+ a1 = ((double)V1->v[i+1])- ((double)V2->v[i+1]);
426
+ a2 = ((double)V1->v[i+2])- ((double)V2->v[i+2]);
427
+ a3 = ((double)V1->v[i+3])- ((double)V2->v[i+3]);
428
+ sum += a0*a0 + a1*a1 + a2*a2 + a3*a3;
429
+ }
430
+ if ( i+1<end ){
431
+ a0 = ((double)V1->v[i])- ((double)V2->v[i]);
432
+ a1 = ((double)V1->v[i+1])- ((double)V2->v[i+1]);
433
+ sum += a0*a0 + a1*a1;
434
+ if ( i+2<end ){ a2 = ((double)V1->v[i+2])- ((double)V2->v[i+2]); sum += a2*a2; }
435
+ } else if ( i<end ){ a0 = ((double)V1->v[i])- ((double)V2->v[i]); sum += a0*a0; }
436
+ return (sqrt(sum));
437
+ }
438
+
439
+ /* compute the inner product of two vectors */
440
+ double SVEC_eucdist (SVEC *V1, SVEC *V2){
441
+ VEC_ID i1, i2;
442
+ double sum=0, a;
443
+ for ( i1=i2=0 ; i1<V1->t && i2<V2->t ; ){
444
+ if (V2->v[i2].i > V1->v[i1].i) a = V1->v[i1].a;
445
+ else if (V2->v[i2].i < V1->v[i1].i) a = V2->v[i2].a;
446
+ else a = ((double)V2->v[i2].a) - ((double)V1->v[i1].a);
447
+ sum += a*a;
448
+ }
449
+ return (sqrt(sum));
450
+ }
451
+
452
+ /* compute the inner product of two vectors */
453
+ double VEC_SVEC_eucdist (VEC *V1, SVEC *V2){
454
+ VEC_ID i, i2=0;
455
+ double sum=0, a;
456
+ FLOOP (i, 0, V1->end){
457
+ if ( i < V2->v[i2].i ) a = V1->v[i];
458
+ else { a = ((double)V1->v[i]) - ((double)V2->v[i2].a); i2++; }
459
+ sum += a*a;
460
+ }
461
+ return (sqrt(sum));
462
+ }
463
+
464
+ /**********************************************************/
465
+ /* Euclidean distance of vector and set */
466
+ double VEC_QUEUE_eucdist (VEC *V, QUEUE *Q){
467
+ VEC_ID i;
468
+ QUEUE_ID i2=0;
469
+ double sum=0, a;
470
+ FLOOP (i, 0, V->end){
471
+ if ( i < Q->v[i2] ) a = V->v[i];
472
+ else { a = ((double)V->v[i]) - 1.0; i2++; }
473
+ sum += a*a;
474
+ }
475
+ return (sqrt(sum));
476
+ }
477
+
478
+ /* compute Euclidean distance of two sets */
479
+ double QUEUE_eucdist (QUEUE *Q1, QUEUE *Q2){
480
+ double f;
481
+ MQUE_UNION(f, *Q1, *Q2);
482
+ return (sqrt(f));
483
+ }
484
+
485
+ double MVEC_norm (void *V){
486
+ VEC *VV = (VEC *)V;
487
+ double p;
488
+ if (VV->type==TYPE_VEC){ ARY_NORM (p, VV->v, VV->t); return (p); }
489
+ if (VV->type==TYPE_SVEC) return (SVEC_norm ((SVEC *)V));
490
+ if (VV->type==TYPE_QUEUE) return (sqrt(((QUEUE*)V)->t));
491
+ return (0.0);
492
+ }
493
+
494
+ double MMAT_norm_i (void *M, int i){
495
+ MAT *MM = (MAT *)M;
496
+ double p;
497
+ if (MM->type==TYPE_MAT){ ARY_NORM (p, MM->v[i].v, MM->v[i].t); return (p); }
498
+ if (MM->type==TYPE_SMAT) return (SVEC_norm (&((SMAT *)M)->v[i]));
499
+ if (MM->type==TYPE_SETFAMILY) return (sqrt (((SETFAMILY *)M)->v[i].t));
500
+ return (0.0);
501
+ }
502
+
503
+ double MVEC_eucdist (void *V, void *U){
504
+ VEC *VV = (VEC *)V;
505
+ double p;
506
+ if (VV->type==TYPE_VEC) return (VEC_eucdist ((VEC *)V, (VEC *)U));
507
+ if (VV->type==TYPE_SVEC) return (SVEC_eucdist ((SVEC *)V, (SVEC *)U));
508
+ if (VV->type==TYPE_QUEUE){ MQUE_DIF (p, *((QUEUE *)V), *((QUEUE *)U)); return (sqrt(p));}
509
+ return (0.0);
510
+ }
511
+
512
+ double MMAT_eucdist_ij (void *M, int i, int j){
513
+ MAT *MM=(MAT *)M;
514
+ double p;
515
+ if (MM->type==TYPE_MAT) return (VEC_eucdist ( &MM->v[i], &MM->v[j] ));
516
+ if (MM->type==TYPE_SMAT) return (SVEC_eucdist ( &((SMAT *)M)->v[i], &((SMAT *)M)->v[j]));
517
+ if (MM->type==TYPE_SETFAMILY){ MQUE_DIF (p, ((SETFAMILY *)M)->v[i], ((SETFAMILY *)M)->v[j]); return (sqrt(p)); }
518
+ return (0.0);
519
+ }
520
+
521
+
522
+ #endif
523
+
524
+ /**********************************************************/
525
+ /** multi-vector routines ******************************/
526
+ /**********************************************************/
527
+
528
+ /* compute the inner product, Euclidean distance for multi vector */
529
+ double MVEC_inpro (void *V, void *U){
530
+ VEC *VV = (VEC *)V, *UU = (VEC *)U;
531
+ double p;
532
+ if (VV->type==TYPE_VEC){
533
+ if (UU->type==TYPE_VEC){ ARY_INPRO (p, VV->v, UU->v, VV->t); return (p); }
534
+ if (UU->type==TYPE_SVEC){ ARY_SVEC_INPRO (p, *((SVEC *)U), VV->v); return (p); }
535
+ if (UU->type==TYPE_QUEUE){ ARY_QUEUE_INPRO (p, *((QUEUE *)U), VV->v); return (p); }
536
+ }
537
+ if (VV->type==TYPE_SVEC){
538
+ if (UU->type==TYPE_VEC){ ARY_SVEC_INPRO (p, *((SVEC *)V), UU->v); return (p);}
539
+ if (UU->type==TYPE_SVEC) return (SVEC_inpro ((SVEC *)V, (SVEC *)U));
540
+ // if (UU->type==TYPE_QUEUE) return (VEC_QUEUE_inpro (V, U));
541
+ }
542
+ if (VV->type==TYPE_QUEUE){
543
+ if (UU->type==TYPE_VEC){ ARY_QUEUE_INPRO (p, *((QUEUE *)V), UU->v); return (p); }
544
+ // else if (UU->type==TYPE_SVEC) return (SVEC_inpro (V, U));
545
+ if (UU->type==TYPE_QUEUE){ MQUE_INTSEC (p, *((QUEUE *)V), *((QUEUE *)U)); return (p);}
546
+ }
547
+ return (0.0);
548
+ }
549
+
550
+ double MVEC_double_inpro (void *V, double *w){
551
+ VEC *VV = (VEC *)V;
552
+ double p;
553
+ if (VV->type==TYPE_VEC){ ARY_INPRO (p, VV->v, w, VV->t); return (p); }
554
+ if (VV->type==TYPE_SVEC){ ARY_SVEC_INPRO (p, *((SVEC *)V), w); return (p); }
555
+ if (VV->type==TYPE_QUEUE){ ARY_QUEUE_INPRO (p, *((QUEUE *)V), w); return (p); }
556
+ return (0.0);
557
+ }
558
+
559
+ /* compute the inner product, euclidean distance for i,jth vector */
560
+ double MMAT_inpro_ij (void *M, int i, int j){
561
+ MAT *MM = (MAT *)M;
562
+ double p;
563
+ if (MM->type==TYPE_MAT){ ARY_INPRO (p, MM->v[i].v, MM->v[j].v, MM->v[j].t); return (p); }
564
+ if (MM->type==TYPE_SMAT) return (SVEC_inpro (&((SMAT *)M)->v[i], &((SMAT *)M)->v[j]));
565
+ if (MM->type==TYPE_SETFAMILY){
566
+ p = QUEUE_intsec_ (&((SETFAMILY *)M)->v[i], &((SETFAMILY *)M)->v[j]); return (p); }
567
+ return (0.0);
568
+ }
569
+
570
+ double MMAT_double_inpro_i (void *M, int i, double *w){
571
+ MAT *MM = (MAT *)M;
572
+ double p;
573
+ if (MM->type==TYPE_MAT){ ARY_INPRO (p, MM->v[i].v, w, MM->v[i].t); return (p); }
574
+ if (MM->type==TYPE_SMAT){ ARY_SVEC_INPRO (p, ((SMAT *)M)->v[i], w); return (p); }
575
+ if (MM->type==TYPE_SETFAMILY){ ARY_QUEUE_INPRO (p, ((SETFAMILY *)M)->v[i], w); return (p); }
576
+ return (0.0);
577
+ }
578
+
579
+
580
+
581
+ #endif
582
+
583
+