nysol-zdd 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/ext/zdd_so/BDD.cc +495 -0
  3. data/ext/zdd_so/BDD.h +356 -0
  4. data/ext/zdd_so/BDDDG.cc +1818 -0
  5. data/ext/zdd_so/BDDDG.h +107 -0
  6. data/ext/zdd_so/BDDHASH.cc +91 -0
  7. data/ext/zdd_so/BtoI.cc +503 -0
  8. data/ext/zdd_so/BtoI.h +144 -0
  9. data/ext/zdd_so/CtoI.cc +1072 -0
  10. data/ext/zdd_so/CtoI.h +186 -0
  11. data/ext/zdd_so/MLZBDDV.cc +153 -0
  12. data/ext/zdd_so/MLZBDDV.h +42 -0
  13. data/ext/zdd_so/SOP.cc +608 -0
  14. data/ext/zdd_so/SOP.h +199 -0
  15. data/ext/zdd_so/ZBDD.cc +1035 -0
  16. data/ext/zdd_so/ZBDD.h +243 -0
  17. data/ext/zdd_so/ZBDDDG.cc +1834 -0
  18. data/ext/zdd_so/ZBDDDG.h +105 -0
  19. data/ext/zdd_so/ZBDDHASH.cc +91 -0
  20. data/ext/zdd_so/bddc.c +2816 -0
  21. data/ext/zdd_so/bddc.h +132 -0
  22. data/ext/zdd_so/extconf.rb +25 -0
  23. data/ext/zdd_so/include/aheap.c +211 -0
  24. data/ext/zdd_so/include/aheap.h +111 -0
  25. data/ext/zdd_so/include/base.c +93 -0
  26. data/ext/zdd_so/include/base.h +60 -0
  27. data/ext/zdd_so/include/itemset.c +473 -0
  28. data/ext/zdd_so/include/itemset.h +153 -0
  29. data/ext/zdd_so/include/problem.c +371 -0
  30. data/ext/zdd_so/include/problem.h +160 -0
  31. data/ext/zdd_so/include/queue.c +518 -0
  32. data/ext/zdd_so/include/queue.h +177 -0
  33. data/ext/zdd_so/include/sgraph.c +331 -0
  34. data/ext/zdd_so/include/sgraph.h +170 -0
  35. data/ext/zdd_so/include/stdlib2.c +832 -0
  36. data/ext/zdd_so/include/stdlib2.h +746 -0
  37. data/ext/zdd_so/include/trsact.c +723 -0
  38. data/ext/zdd_so/include/trsact.h +167 -0
  39. data/ext/zdd_so/include/vec.c +583 -0
  40. data/ext/zdd_so/include/vec.h +159 -0
  41. data/ext/zdd_so/lcm-vsop.cc +596 -0
  42. data/ext/zdd_so/print.cc +683 -0
  43. data/ext/zdd_so/table.cc +330 -0
  44. data/ext/zdd_so/vsop.h +88 -0
  45. data/ext/zdd_so/zdd_so.cpp +3277 -0
  46. data/lib/nysol/zdd.rb +31 -0
  47. metadata +131 -0
@@ -0,0 +1,167 @@
1
+ /* QUEUE based Transaction library, including database reduction.
2
+ 25/Nov/2007 by Takeaki Uno e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, do not forget to
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about the codes for the users.
11
+ For the commercial use, please make a contact to Takeaki Uno. */
12
+
13
+ #ifndef _trsact_h_
14
+ #define _trsact_h_
15
+
16
+ // #define WEIGHT double
17
+ // #define WEIGHT_DOUBLE
18
+
19
+ #include"vec.h"
20
+ #include"base.h"
21
+
22
+ #ifndef WEIGHT
23
+ #define WEIGHT int
24
+ #ifdef WEIGHT_DOUBLE
25
+ #undef WEIGHT_DOUBLE
26
+ #endif
27
+ #endif
28
+
29
+ typedef struct {
30
+ unsigned char type; // mark to identify type of the structure
31
+ SETFAMILY T; // transaction
32
+ int flag; // flag
33
+ WEIGHT *w, *pw; // weight/positive-weight of transactions
34
+
35
+ QUEUE_INT clms_org, clm_max, clms_end, non_empty_clms; // #items in original file, max size of clms, and max of (original item, internal item)
36
+ VEC_ID rows_org, row_max; // #transactions in the original file
37
+ VEC_ID end1, sep; // #trsact in 1st file, the ID of the last permed trsact of 1st file
38
+ size_t eles_org; // #elements in the original file
39
+ WEIGHT total_w, total_pw, total_w_org, total_pw_org;
40
+ WEIGHT th; // threshold for frequency of items
41
+ PERM *perm, *trperm; // original item permutation loaded from permutation file (and inverse)
42
+
43
+ // lower/upper bound of #elements in a column/row. colunmn or row of out of range will be ignored
44
+ VEC_ID clm_lb, clm_ub;
45
+ QUEUE_ID row_lb, row_ub;
46
+ WEIGHT w_lb, w_ub;
47
+
48
+ VEC_ID str_num; // number of database (itemset stream/string datasets) in T
49
+ VEC_ID *head, *strID; // the head (beginning) of each stream, stream ID of each transaction
50
+ int occ_unit;
51
+
52
+ // for finding same transactions
53
+ QUEUE jump, *OQ; // queue of non-empty buckets, used in find_same_transactions
54
+ VEC_ID *mark; // marks for transactions
55
+ QUEUE_INT **shift; // memory for shift positions of each transaction
56
+ char *sc; // flag for non-active (in-frequent) items
57
+
58
+ // for extra transactions
59
+ VEC_ID new_t; // the start ID of un-used transactions
60
+ BASE buf; // buffer for transaction
61
+ BASE wbuf; // buffer for itemweights
62
+ } TRSACT;
63
+
64
+ #define TRSACT_FRQSORT 65536 // sort transactions in decreasing order
65
+ #define TRSACT_ITEMWEIGHT 131072 // initialize itemweight by transaction weights
66
+ #define TRSACT_SHRINK 262144 // do not allocate memory for shrink, but do for mining
67
+ #define TRSACT_MULTI_STREAM 524288 // separate the datasets at each empty transaction
68
+ #define TRSACT_UNION 1048576 // take union of transactions, at the database reduction
69
+ #define TRSACT_INTSEC 2097152 // take intersection of transactions, at the database reduction
70
+ #define TRSACT_MAKE_NEW 4194304 // make new transaction for each
71
+ #define TRSACT_ALLOC_OCC 8388608 // make new transaction for each
72
+ #define TRSACT_DELIV_SC 16777216 // look T->sc when delivery
73
+ #define TRSACT_NEGATIVE 33554432 // flag for whether some transaction weights are negative or not
74
+ //#define TRSACT_INIT_SHRINK 65536 // allocate memory for database reduction
75
+ #define TRSACT_WRITE_PERM 67108864 // write item-order to file
76
+
77
+ #ifndef TRSACT_DEFAULT_WEIGHT
78
+ #define TRSACT_DEFAULT_WEIGHT 0 // default weight of the transaction, for missing weights in weight file
79
+ #endif
80
+
81
+ /* print transactions */
82
+ void TRSACT_print (TRSACT *T, QUEUE *occ, PERM *p);
83
+ void TRSACT_prop_print (TRSACT *T);
84
+
85
+ /**************************************************************/
86
+ void TRSACT_init (TRSACT *T);
87
+
88
+ /**************************************************************/
89
+ void TRSACT_end (TRSACT *T);
90
+
91
+ /*****************************************/
92
+ /* scan file "fp" with weight file wfp and count #items, #transactions in the file. */
93
+ /* count weight only if wfp!=NULL */
94
+ /* T->rows_org, clms_org, eles_org := #items, #transactions, #all items */
95
+ /* ignore the transactions of size not in range T->clm_lb - clm_ub */
96
+ /* T->total_w, total_pw := sum of (positive) weights of transactions */
97
+ /* C.clmt[i],C.cw[i] := the number/(sum of weights) of transactions including i */
98
+ /****************************************/
99
+ void TRSACT_file_count (TRSACT *T, FILE_COUNT *C, FILE2 *fp, char *wf);
100
+
101
+ /* allocate memory, set permutation, and free C.clmt,rowt,rw,cw */
102
+ int TRSACT_alloc (TRSACT *T, char *pfname, FILE_COUNT *C);
103
+
104
+ /* load the file to allocated memory according to permutation, and free C.rw, C.cw */
105
+ void TRSACT_file_read (TRSACT *T, FILE2 *fp, FILE_COUNT *C, VEC_ID *t, int flag);
106
+
107
+ /*****************************************/
108
+ /* load transaction file to TRSACT */
109
+ void TRSACT_load (TRSACT *T, char *fname, char *fname2, char *wfname, char *wfname2, char *pfname);
110
+
111
+ /* occurrence deliver (only counting) */
112
+ /* WARNING: next cell of the last item of each transaction must be INTHUGE */
113
+ /* compute occurrence for items less than max item, in the database induced
114
+ by occ */
115
+ /* if jump!=0, all i with non-zero occ[i].t will be inserted to jump */
116
+ /* be careful for overflow of jump */
117
+ /* if occ==NULL, scan all transactions */
118
+ /* flag&1: count only positive weights */
119
+ void TRSACT_delivery_iter (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, VEC_ID t, QUEUE_INT m);
120
+ void TRSACT_delivery (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, QUEUE *occ, QUEUE_INT m);
121
+ // QUEUE *TRSACT_alloc_occ (TRSACT *T, QUEUE_INT end);
122
+ //QUEUE_ID TRSACT_occ_dup (SETFAMILY *S, QUEUE *OQ, QUEUE *jump, WEIGHT *occ_w, WEIGHT *occ_pw);
123
+
124
+ /**************************************************************/
125
+ /* Find identical transactions in a subset of transactions, by radix-sort like method */
126
+ /* infrequent items (refer LCM_occ) and items larger than item_max are ignored */
127
+ /* INPUT: T:transactions, occ:subset of T represented by indices, result:array for output, item_max:largest item not to be ignored */
128
+ /* OUTPUT: if transactions i1, i2,..., ik are the same, they have same value in T->mark[i]
129
+ (not all) isolated transaction may have mark 1 */
130
+ /* use 0 to end-1 of QQ temporary, and QQ[i].t and QQ[i].s have to be 0. */
131
+ /*************************************************************************/
132
+ void TRSACT_find_same (TRSACT *T, QUEUE *occ, QUEUE_INT end);
133
+
134
+ /* copy transaction t to tt (only items i s.t. pw[i]>=th) **/
135
+ void TRSACT_copy (TRSACT *T, VEC_ID tt, VEC_ID t, QUEUE_INT end);
136
+
137
+ /* intersection of transaction t and tt (only items i s.t. pw[i]>=th) **/
138
+ /* shift is the array of pointers indicates the start of each transaction **/
139
+ void TRSACT_suffix_and (TRSACT *T, VEC_ID tt, VEC_ID t);
140
+
141
+ /* take union of transaction t to tt (only items i s.t. pw[i]>=th) */
142
+ /* CAUSION: t has to be placed at the last of trsact_buf2. */
143
+ /* if the size of t inclreases, the following memory will be overwrited */
144
+ /* if memory (T->buf) is short, do nothing and return 1 */
145
+ void TRSACT_itemweight_union (TRSACT *T, VEC_ID tt, VEC_ID t);
146
+
147
+
148
+ /*****/
149
+ /* remove duplicated transactions from occ, and add the weight of the removed trsacts to the representative one */
150
+ /* duplicated trsacts are in occ[item_max]. Clear the queue when return */
151
+ /* T->flag&TRSACT_MAKE_NEW: make new trsact for representative
152
+ T->flag&TRSACT_INTSEC: take suffix intersection of the same trsacts
153
+ T->flag&TRSACT_UNION: take union of the same trsacts */
154
+ void TRSACT_merge_trsact (TRSACT *T, QUEUE *o, QUEUE_INT end);
155
+
156
+ /* remove the unified transactions from occ (consider T->occ_unit) */
157
+ void TRSACT_reduce_occ (TRSACT *T, QUEUE *occ);
158
+
159
+ #ifdef _alist_h_
160
+
161
+ /* occurrence deliver (only counting), for MALIST */
162
+ //void TRSACT_MALIST_delivery (TRSACT *T, QUEUE *jump, WEIGHT *w, WEIGHT *pw, MALIST *occ, ALIST_ID l, QUEUE_INT m);
163
+ //void TRSACT_MALIST_occ_deliver (TRSACT *TT, MALIST *occ, int l, int item_max);
164
+
165
+ #endif
166
+
167
+ #endif
@@ -0,0 +1,583 @@
1
+ /* library for vector and sparse vector, and matrix */
2
+ /* Takeaki Uno 27/Dec/2008 */
3
+
4
+ #ifndef _vec_c_
5
+ #define _vec_c_
6
+
7
+ #include"vec.h"
8
+ #include"stdlib2.c"
9
+ #include"queue.c"
10
+
11
+ MAT INIT_MAT = {TYPE_MAT,NULL,0,0,NULL,NULL,0,0,0};
12
+ SVEC INIT_SVEC_ELE = {0,0};
13
+ SVEC INIT_SVEC = {TYPE_SVEC,NULL,0,0};
14
+ SMAT INIT_SMAT = {TYPE_SMAT,NULL,0,0,NULL,NULL,0,0,0,0};
15
+ SETFAMILY INIT_SETFAMILY = INIT_SETFAMILY_;
16
+
17
+ QSORT_TYPE (SVEC_VAL, SVEC_VAL)
18
+ QSORT_TYPE (SVEC_VAL2, SVEC_VAL2)
19
+
20
+ /* allocate memory according to rows and rowt */
21
+ void VEC_alloc (VEC *V, VEC_ID clms){
22
+ *V = INIT_VEC;
23
+ V->end = clms;
24
+ calloc2 (V->v, clms+1, "VEC_alloc: V->v", EXIT);
25
+ }
26
+
27
+ /* terminate routine for VEC */
28
+ void VEC_end (VEC *V){
29
+ free2 (V->v);
30
+ *V = INIT_VEC;
31
+ }
32
+
33
+ /* allocate memory according to rows and rowt */
34
+ void MAT_alloc (MAT *M, VEC_ID rows, VEC_ID clms){
35
+ VEC_ID i;
36
+ calloc2 (M->v, rows+1, "MAT_alloc: M->v", EXIT);
37
+ calloc2 (M->buf, (clms+1) * (rows+1), "MAT_alloc: M->v", {free(M->v);EXIT;});
38
+ M->end = rows;
39
+ M->clms = clms;
40
+ FLOOP (i, 0, rows){
41
+ M->v[i].end = M->v[i].t = clms;
42
+ M->v[i].v = M->buf + i*(clms+1);
43
+ }
44
+ }
45
+
46
+ /* terminate routine for MAT */
47
+ void MAT_end (MAT *M){
48
+ free2 (M->buf);
49
+ free2 (M->buf2);
50
+ free2 (M->v);
51
+ *M = INIT_MAT;
52
+ }
53
+
54
+ /* allocate memory */
55
+ void SVEC_alloc (SVEC *V, VEC_ID end){
56
+ *V = INIT_SVEC;
57
+ calloc2 (V->v, end+1, "SVEC_alloc: V->v", EXIT);
58
+ V->end = end;
59
+ V->t = 0;
60
+ }
61
+
62
+ /* terminate routine for SVEC */
63
+ void SVEC_end (SVEC *V){
64
+ free2 (V->v);
65
+ *V = INIT_SVEC;
66
+ }
67
+
68
+ /* allocate memory according to rows and rowt */
69
+ void SMAT_alloc (SMAT *M, VEC_ID rows, VEC_ID *rowt, VEC_ID clms, size_t eles){
70
+ VEC_ID i;
71
+ if ( eles == 0 ) ARY_SUM (M->ele_end, rowt, 0, rows); else M->ele_end = eles;
72
+ calloc2 (M->buf, M->ele_end*((M->flag&LOAD_DBLBUF)?2:1) +rows +2, "SMAT_alloc: buf", EXIT);
73
+ malloc2 (M->v, rows+1, "SMAT_alloc: M->v", {free(M->buf);EXIT;});
74
+ ARY_FILL (M->v, 0, rows, INIT_SVEC);
75
+ M->end = rows;
76
+ M->clms = clms;
77
+ if ( rowt ){
78
+ FLOOP (i, 0, rows){
79
+ M->v[i].v = i? M->v[i-1].v + rowt[i-1] +1: M->buf;
80
+ M->v[i].end = rowt[i];
81
+ }
82
+ }
83
+ }
84
+
85
+ /* terminate routine for MAT */
86
+ void SMAT_end (SMAT *M){
87
+ free2 (M->buf);
88
+ free2 (M->buf2);
89
+ free2 (M->v);
90
+ *M = INIT_SMAT;
91
+ }
92
+
93
+
94
+
95
+ /* allocate memory according to rows and rowt */
96
+ /* if eles == 0, compute eles from rowt and rows */
97
+ void SETFAMILY_alloc (SETFAMILY *M, VEC_ID rows, VEC_ID *rowt, VEC_ID clms, size_t eles){
98
+ VEC_ID i;
99
+ char *buf;
100
+ if ( eles == 0 ) ARY_SUM (M->ele_end, rowt, 0, rows); else M->ele_end = eles;
101
+ calloc2 (buf, (M->ele_end*((M->flag&LOAD_DBLBUF)?2:1) +((M->flag&LOAD_DBLBUF)?MAX(rows,clms):rows)+2)*M->unit, "SETFAMILY_alloc: buf", EXIT);
102
+ M->buf = (QUEUE_INT *)buf;
103
+ malloc2 (M->v, rows+1, "SETFAMILY_alloc: M->v", {free(M->buf);EXIT;});
104
+ ARY_FILL (M->v, 0, rows, INIT_QUEUE);
105
+ M->end = rows;
106
+ M->clms = clms;
107
+ if ( rowt ){
108
+ FLOOP (i, 0, rows){
109
+ M->v[i].v = (QUEUE_INT *)buf;
110
+ buf += (rowt[i] +1)*M->unit;
111
+ M->v[i].end = rowt[i]+1;
112
+ }
113
+ }
114
+ }
115
+
116
+ /* allocate memory according to rows and rowt */
117
+ /* if eles == 0, compute eles from rowt and rows */
118
+ void SETFAMILY_alloc_weight (SETFAMILY *M){
119
+ VEC_ID i;
120
+ calloc2 (M->w, M->end +1, "SETFAMILY_alloc_weight: w", EXIT);
121
+ calloc2 (M->wbuf, M->ele_end*((M->flag&LOAD_DBLBUF)?2:1)+1, "SETFAMILY_alloc_weight: *w", {free(M->w);EXIT;});
122
+ FLOOP (i, 1, M->t) M->w[i] = i? M->w[i-1] + M->v[i-1].t: M->wbuf;
123
+ }
124
+
125
+ /* terminate routine for MAT */
126
+ void SETFAMILY_end (SETFAMILY *M){
127
+ free2 (M->buf);
128
+ free2 (M->buf2);
129
+ free2 (M->v);
130
+ free2 (M->wbuf);
131
+ free2 (M->w);
132
+ *M = INIT_SETFAMILY;
133
+ }
134
+
135
+ /****************************************************************/
136
+ /****************************************************************/
137
+ /****************************************************************/
138
+
139
+ /* read binary file for MAT */
140
+ /* each unit-byte will be one number. if unit<0, the sign of unit is flipped, and each value is minesed the half of the maximum */
141
+ void MAT_load_bin (MAT *M, FILE2 *fp, int unit){
142
+ VEC_ID flag=0, i, j, jj;
143
+ size_t siz=0;
144
+ VEC_VAL z, neg=0;
145
+
146
+ if ( unit < 0 ){
147
+ unit = -unit; flag = 1; neg=128;
148
+ FLOOP (jj, 0, unit-1) neg *= 256;
149
+ }
150
+ if ( M->t == 0 ){ // determine #rows if M->t is 0 (not specified)
151
+ fseek(fp->fp, 0, SEEK_END);
152
+ siz = ftell(fp->fp);
153
+ fseek(fp->fp, 0, SEEK_SET);
154
+ M->t = (VEC_ID)(siz / unit / M->clms);
155
+ if ( M->flag & LOAD_TPOSE ) SWAP_VEC_ID (M->t, M->clms);
156
+ }
157
+ MAT_alloc (M, M->t, M->clms); if (ERROR_MES) return;
158
+ M->end = M->t;
159
+ FLOOP (i, 0, M->t){
160
+ FLOOP (j, 0, M->clms){
161
+ z=0; FLOOP (jj, 0, unit){ z *= 256; z += FILE2_getc (fp); }
162
+ if ( flag ) z -= neg;
163
+ if ( M->flag & LOAD_TPOSE ) M->v[j].v[i] = z;
164
+ else M->v[i].v[j] = z;
165
+ }
166
+ }
167
+ }
168
+
169
+ /* segmentation fault for illegal files */
170
+ /* count/read the number in file for MAT */
171
+ /* if *rows>0, only read count the numbers in a row, for the first scan. */
172
+ void MAT_file_load (MAT *M, FILE2 *fp){
173
+ QUEUE_ID c;
174
+ VEC_ID t=0;
175
+ double p;
176
+
177
+ for ( t=0 ; (FILE_err&2)==0 ; t++){
178
+ ARY_SCAN (c, double, *fp, 0);
179
+ if ( M->flag & LOAD_TPOSE ){
180
+ if ( M->t == 0 ){ M->t = c; if ( M->clms>0 ) break; }
181
+ } else if ( M->clms == 0 ){ M->clms = c; if ( M->t>0 ) break; }
182
+ }
183
+ if ( M->flag & LOAD_TPOSE ){ if ( M->clms==0 ) M->clms = t;} else if ( M->t==0 ) M->t = t;
184
+ FILE2_reset (fp);
185
+ M->end = M->t;
186
+ MAT_alloc (M, M->t, M->clms); if (ERROR_MES) return;
187
+ FLOOP (t, 0, M->t ){
188
+ FLOOP (c, 0, M->clms){
189
+ p = FILE2_read_double(fp);
190
+ if ( M->flag&LOAD_TPOSE ) M->v[c].v[t] = p;
191
+ else M->v[t].v[c] = p;
192
+ if ( c>= ((M->flag&LOAD_TPOSE)? M->t: M->clms) ) break;
193
+ }
194
+ if ( !FILE_err ) FILE2_read_until_newline (fp);
195
+ if ( c>= ((M->flag&LOAD_TPOSE)? M->clms: M->t) ) break;
196
+ }
197
+ }
198
+
199
+ /* load file with switching the format according to the flag */
200
+ void MAT_load (MAT *M, char *fname){
201
+ FILE2 fp;
202
+ int unit=0;
203
+ #ifdef USE_MATH
204
+ VEC_ID i;
205
+ #endif
206
+ if ( M->flag & VEC_LOAD_BIN ) unit = 1;
207
+ else if ( M->flag & VEC_LOAD_BIN2 ) unit = 2;
208
+ else if ( M->flag & VEC_LOAD_BIN4 ) unit = 4;
209
+ if ( M->flag & VEC_LOAD_CENTERIZE ) unit = -unit;
210
+
211
+ FILE2_open (fp, fname, "rb", "MAT_load", EXIT);
212
+ if ( unit ) MAT_load_bin (M, &fp, unit);
213
+ else MAT_file_load (M, &fp);
214
+ FILE2_close (&fp); if (ERROR_MES) EXIT;
215
+ #ifdef USE_MATH
216
+ if ( M->flag&VEC_NORMALIZE ) FLOOP (i, 0, M->t) ARY_NORMALIZE (M->v[i].v,M->v[i].t);
217
+ #endif
218
+ }
219
+
220
+
221
+ /* scan file and read the numbers for SMAT */
222
+ /* flag&1? SMAT, SETFAMILY, flag&2? tuple list format: array list :*/
223
+ void SMAT_file_load (SMAT *M, FILE2 *fp){
224
+ SVEC_VAL z=0;
225
+ VEC_ID flag= (M->type==TYPE_SMAT), t, x, y;
226
+ FILE_COUNT C;
227
+
228
+ C = FILE2_count (fp, (M->flag&(LOAD_ELE+LOAD_TPOSE)) | FILE_COUNT_ROWT, 0, 0, 0, 0, 0);
229
+ if ( M->clms == 0 ) M->clms = C.clms;
230
+ if ( M->t == 0 ) M->t = C.rows;
231
+ if ( flag ) SMAT_alloc (M, M->t, C.rowt, M->clms, 0);
232
+ else SETFAMILY_alloc ((SETFAMILY *)M, M->t, C.rowt, M->clms, 0);
233
+ free2 (C.rowt);
234
+ if ( ERROR_MES ) return;
235
+ FILE2_reset (fp);
236
+ t=0;
237
+ do {
238
+ if ( M->flag&LOAD_ELE ){
239
+ x = (VEC_ID)FILE2_read_int (fp);
240
+ y = (VEC_ID)FILE2_read_int (fp);
241
+ if ( flag ) z = FILE2_read_double (fp);
242
+ if ( FILE_err&4 ) goto LOOP_END2;
243
+ FILE2_read_until_newline (fp);
244
+ } else {
245
+ x = t;
246
+ y = (VEC_ID)FILE2_read_int (fp);
247
+ if ( FILE_err&4 ) goto LOOP_END2;
248
+ if ( flag ) z = FILE2_read_double (fp);
249
+ }
250
+ if ( M->flag&LOAD_TPOSE ) SWAP_VEC_ID (x, y);
251
+ // printf ("%d %d %d %d\n", x, M->t, y, M->clms);
252
+ if ( y >= M->clms || x >= M->t ) goto LOOP_END2;
253
+ // printf ("## %d %d\n", x, y);
254
+ if ( flag ){
255
+ M->v[x].v[M->v[x].t].i = y;
256
+ M->v[x].v[M->v[x].t].a = z;
257
+ M->v[x].t++;
258
+ } else ARY_INS (((SETFAMILY *)M)->v[x], y);
259
+ LOOP_END2:;
260
+ if ( !(M->flag&LOAD_ELE) && (FILE_err&3) ){ t++; if ( t >= M->t ) break; }
261
+ } while ( (FILE_err&2)==0 );
262
+ }
263
+
264
+ /* scan file and read the numbers for SMAT */
265
+ /* flag&1? SMAT, SETFAMILY, flag&2? tuple list format: array list :*/
266
+ void SETFAMILY_load_weight (SETFAMILY *M, char *fname){
267
+ FILE2 fp;
268
+ VEC_ID i;
269
+ QUEUE_ID j;
270
+ if ( M->flag&LOAD_TPOSE ) error ("transope and weight can't be specified simultaneously", EXIT);
271
+ FILE2_open (fp, fname, "r", "SETFAMILY_load_weight", EXIT);
272
+ SETFAMILY_alloc_weight (M);
273
+ FLOOP (i, 0, M->t){
274
+ FLOOP (j, 0, M->v[i].t)
275
+ M->w[i][j] = (WEIGHT)FILE2_read_double (&fp);
276
+ FILE2_read_until_newline (&fp);
277
+ }
278
+ }
279
+
280
+
281
+ /* load file with switching the format according to the flag */
282
+ void SMAT_load (SMAT *M, char *fname){
283
+ FILE2 fp;
284
+ VEC_ID i;
285
+ M->type = TYPE_SMAT;
286
+ FILE2_open (fp, fname, "r", "SMAT_load", EXIT);
287
+ SMAT_file_load (M, &fp);
288
+ FILE2_close (&fp); if (ERROR_MES) EXIT;
289
+ FLOOP (i, 0, M->t) M->v[i].v[M->v[i].t].i = M->clms; // end mark
290
+
291
+ #ifdef USE_MATH
292
+ if ( M->flag&VEC_NORMALIZE ) FLOOP (i, 0, M->t) SVEC_normalize (&M->v[i]); // normalize
293
+ #endif
294
+ if (M->flag&LOAD_INCSORT)
295
+ FLOOP (i, 0, M->t) qsort_VEC_ID ((VEC_ID *)(M->v[i].v), M->v[i].t, sizeof(SVEC_ELE));
296
+ if (M->flag&LOAD_DECSORT)
297
+ FLOOP (i, 0, M->t) qsort_VEC_ID ((VEC_ID *)(M->v[i].v), M->v[i].t, -(int)sizeof(SVEC_ELE));
298
+ if (M->flag&LOAD_RM_DUP)
299
+ FLOOP (i, 0, M->t) MQUE_UNIFY (M->v[i], SVEC_VAL);
300
+ M->eles = M->ele_end;
301
+ }
302
+
303
+ /* sort and duplication check */
304
+ void SETFAMILY_sort (SETFAMILY *M){
305
+ VEC_ID i;
306
+ PERM *p;
307
+ WEIGHT *ww;
308
+ QUEUE Q;
309
+ int flag = (M->flag&LOAD_INCSORT)? 1: ((M->flag&LOAD_DECSORT)? -1: 0);
310
+ if ( flag ){ // sort items in each row
311
+ malloc2 (p, M->clms, "SETFAMILY_sort: p", EXIT);
312
+ FLOOP (i, 0, M->t)
313
+ QUEUE_perm_WEIGHT (&M->v[i], M->w?M->w[i]:NULL, p, flag);
314
+ free (p);
315
+ }
316
+ flag = ((M->flag&LOAD_SIZSORT)? ((M->flag&LOAD_DECROWSORT)? -1: 1): 0) *sizeof(QUEUE);
317
+ if ( flag ){ // sort the rows
318
+ p = qsort_perm_VECt ((VEC *)M->v, M->t, flag);
319
+ ARY_INVPERMUTE_ (M->w, p, ww, M->t);
320
+ ARY_INVPERMUTE (M->v, p, Q, M->t, "SETFAMILY_sort: ARY_INVPERMUTE", EXIT);
321
+ free (p);
322
+ }
323
+ if (M->flag&LOAD_RM_DUP){ // unify the duplicated edges
324
+ FLOOP (i, 0, M->t)
325
+ QUEUE_rm_dup_WEIGHT (&M->v[i], M->w?M->w[i]:NULL);
326
+ }
327
+ }
328
+
329
+ /* scan file and load the data from file to SMAT structure */
330
+ void SETFAMILY_load (SETFAMILY *M, char *fname, char *wfname){
331
+ FILE2 fp;
332
+ VEC_ID i;
333
+ M->type = TYPE_SETFAMILY;
334
+ FILE2_open (fp, fname, "r", "SETFAMILY_load", EXIT);
335
+ SMAT_file_load ((SMAT *)M, &fp);
336
+ FILE2_close (&fp); if(ERROR_MES) EXIT;
337
+ FLOOP (i, 0, M->t) M->v[i].v[M->v[i].t] = M->clms; // end mark
338
+
339
+ if ( !(M->flag&LOAD_ELE) && wfname ){
340
+ SETFAMILY_load_weight (M, wfname);
341
+ if ( ERROR_MES ){ SETFAMILY_end (M); EXIT; }
342
+ }
343
+
344
+ SETFAMILY_sort (M);
345
+ M->eles = M->ele_end;
346
+ }
347
+
348
+ /* print routines */
349
+ void MAT_print (FILE *fp, MAT *M){
350
+ VEC *V;
351
+ MQUE_FLOOP (*M, V) ARY_FPRINT (fp, V->v, 0, V->t, VEC_VALF" ");
352
+ }
353
+ void SVEC_print (FILE *fp, SVEC *V){
354
+ SVEC_ELE *x;
355
+ MQUE_FLOOP (*V, x) fprintf (fp, "("QUEUE_IDF","SVEC_VALF") ", (*x).i, (*x).a);
356
+ fputc ('\n', fp);
357
+ }
358
+ void SMAT_print (FILE *fp, SMAT *M){
359
+ SVEC *V;
360
+ MQUE_FLOOP (*M, V) SVEC_print (fp, V);
361
+ }
362
+ void SETFAMILY_print (FILE *fp, SETFAMILY *M){
363
+ QUEUE *V;
364
+ MQUE_FLOOP (*M, V) ARY_FPRINT (fp, V->v, 0, V->t, QUEUE_INTF" ");
365
+ }
366
+
367
+ /*
368
+ void SETFAMILY_print_WEIGHT (FILE *fp, SETFAMILY *M){
369
+ if ( M->w ){
370
+ printf (","); fprint_WEIGHT (stdout, M->w[i][j]); }
371
+ printf ("\n");
372
+ }
373
+ */
374
+
375
+ /****************************************************************/
376
+ /** Inner product routines **************************************/
377
+ /****************************************************************/
378
+ SVEC_VAL2 SVEC_inpro (SVEC *V1, SVEC *V2){
379
+ VEC_ID i1, i2=0;
380
+ SVEC_VAL2 sum=0;
381
+ FLOOP (i1, 0, V1->t){
382
+ while (V2->v[i2].i < V1->v[i1].i) i2++;
383
+ if (V2->v[i2].i == V1->v[i1].i) sum += ((SVEC_VAL2)V2->v[i2].a)*V1->v[i1].a;
384
+ }
385
+ return (sum);
386
+ }
387
+
388
+
389
+ /* get ith vector */
390
+ void *MVEC_getvec (void *M, int i, int flag){
391
+ MAT *MM = (MAT *)M;
392
+ if (MM->type==TYPE_MAT) return (&MM->v[i]);
393
+ if (MM->type==TYPE_SMAT) return (&((SVEC *)M)->v[i]);
394
+ if (MM->type==TYPE_SETFAMILY) return (&((QUEUE *)M)->v[i]);
395
+ return (NULL);
396
+ }
397
+
398
+ #ifdef USE_MATH
399
+
400
+ /****************************************************************/
401
+ /** Norm computation and normalization ************************/
402
+ /****************************************************************/
403
+ double SVEC_norm (SVEC *V){
404
+ SVEC_ELE *v;
405
+ double sum=0;
406
+ MQUE_FLOOP (*V, v) sum += ((double)(v->a)) * (v->a);
407
+ return (sqrt(sum));
408
+ }
409
+ void SVEC_normalize (SVEC *V){
410
+ SVEC_ELE *v;
411
+ double norm = SVEC_norm (V);
412
+ MQUE_FLOOP (*V, v) v->a /= norm;
413
+ }
414
+
415
+ /****************************************************************/
416
+ /** Euclidean distance routines *********************************/
417
+ /****************************************************************/
418
+
419
+ /* compute the inner product of two vectors */
420
+ double VEC_eucdist (VEC *V1, VEC *V2){
421
+ VEC_ID i, end=MIN(V1->end,V2->end);
422
+ double sum=0, a0, a1, a2, a3;
423
+ for (i=0 ; i<end ; i+=4){
424
+ a0 = ((double)V1->v[i])- ((double)V2->v[i]);
425
+ a1 = ((double)V1->v[i+1])- ((double)V2->v[i+1]);
426
+ a2 = ((double)V1->v[i+2])- ((double)V2->v[i+2]);
427
+ a3 = ((double)V1->v[i+3])- ((double)V2->v[i+3]);
428
+ sum += a0*a0 + a1*a1 + a2*a2 + a3*a3;
429
+ }
430
+ if ( i+1<end ){
431
+ a0 = ((double)V1->v[i])- ((double)V2->v[i]);
432
+ a1 = ((double)V1->v[i+1])- ((double)V2->v[i+1]);
433
+ sum += a0*a0 + a1*a1;
434
+ if ( i+2<end ){ a2 = ((double)V1->v[i+2])- ((double)V2->v[i+2]); sum += a2*a2; }
435
+ } else if ( i<end ){ a0 = ((double)V1->v[i])- ((double)V2->v[i]); sum += a0*a0; }
436
+ return (sqrt(sum));
437
+ }
438
+
439
+ /* compute the inner product of two vectors */
440
+ double SVEC_eucdist (SVEC *V1, SVEC *V2){
441
+ VEC_ID i1, i2;
442
+ double sum=0, a;
443
+ for ( i1=i2=0 ; i1<V1->t && i2<V2->t ; ){
444
+ if (V2->v[i2].i > V1->v[i1].i) a = V1->v[i1].a;
445
+ else if (V2->v[i2].i < V1->v[i1].i) a = V2->v[i2].a;
446
+ else a = ((double)V2->v[i2].a) - ((double)V1->v[i1].a);
447
+ sum += a*a;
448
+ }
449
+ return (sqrt(sum));
450
+ }
451
+
452
+ /* compute the inner product of two vectors */
453
+ double VEC_SVEC_eucdist (VEC *V1, SVEC *V2){
454
+ VEC_ID i, i2=0;
455
+ double sum=0, a;
456
+ FLOOP (i, 0, V1->end){
457
+ if ( i < V2->v[i2].i ) a = V1->v[i];
458
+ else { a = ((double)V1->v[i]) - ((double)V2->v[i2].a); i2++; }
459
+ sum += a*a;
460
+ }
461
+ return (sqrt(sum));
462
+ }
463
+
464
+ /**********************************************************/
465
+ /* Euclidean distance of vector and set */
466
+ double VEC_QUEUE_eucdist (VEC *V, QUEUE *Q){
467
+ VEC_ID i;
468
+ QUEUE_ID i2=0;
469
+ double sum=0, a;
470
+ FLOOP (i, 0, V->end){
471
+ if ( i < Q->v[i2] ) a = V->v[i];
472
+ else { a = ((double)V->v[i]) - 1.0; i2++; }
473
+ sum += a*a;
474
+ }
475
+ return (sqrt(sum));
476
+ }
477
+
478
+ /* compute Euclidean distance of two sets */
479
+ double QUEUE_eucdist (QUEUE *Q1, QUEUE *Q2){
480
+ double f;
481
+ MQUE_UNION(f, *Q1, *Q2);
482
+ return (sqrt(f));
483
+ }
484
+
485
+ double MVEC_norm (void *V){
486
+ VEC *VV = (VEC *)V;
487
+ double p;
488
+ if (VV->type==TYPE_VEC){ ARY_NORM (p, VV->v, VV->t); return (p); }
489
+ if (VV->type==TYPE_SVEC) return (SVEC_norm ((SVEC *)V));
490
+ if (VV->type==TYPE_QUEUE) return (sqrt(((QUEUE*)V)->t));
491
+ return (0.0);
492
+ }
493
+
494
+ double MMAT_norm_i (void *M, int i){
495
+ MAT *MM = (MAT *)M;
496
+ double p;
497
+ if (MM->type==TYPE_MAT){ ARY_NORM (p, MM->v[i].v, MM->v[i].t); return (p); }
498
+ if (MM->type==TYPE_SMAT) return (SVEC_norm (&((SMAT *)M)->v[i]));
499
+ if (MM->type==TYPE_SETFAMILY) return (sqrt (((SETFAMILY *)M)->v[i].t));
500
+ return (0.0);
501
+ }
502
+
503
+ double MVEC_eucdist (void *V, void *U){
504
+ VEC *VV = (VEC *)V;
505
+ double p;
506
+ if (VV->type==TYPE_VEC) return (VEC_eucdist ((VEC *)V, (VEC *)U));
507
+ if (VV->type==TYPE_SVEC) return (SVEC_eucdist ((SVEC *)V, (SVEC *)U));
508
+ if (VV->type==TYPE_QUEUE){ MQUE_DIF (p, *((QUEUE *)V), *((QUEUE *)U)); return (sqrt(p));}
509
+ return (0.0);
510
+ }
511
+
512
+ double MMAT_eucdist_ij (void *M, int i, int j){
513
+ MAT *MM=(MAT *)M;
514
+ double p;
515
+ if (MM->type==TYPE_MAT) return (VEC_eucdist ( &MM->v[i], &MM->v[j] ));
516
+ if (MM->type==TYPE_SMAT) return (SVEC_eucdist ( &((SMAT *)M)->v[i], &((SMAT *)M)->v[j]));
517
+ if (MM->type==TYPE_SETFAMILY){ MQUE_DIF (p, ((SETFAMILY *)M)->v[i], ((SETFAMILY *)M)->v[j]); return (sqrt(p)); }
518
+ return (0.0);
519
+ }
520
+
521
+
522
+ #endif
523
+
524
+ /**********************************************************/
525
+ /** multi-vector routines ******************************/
526
+ /**********************************************************/
527
+
528
+ /* compute the inner product, Euclidean distance for multi vector */
529
+ double MVEC_inpro (void *V, void *U){
530
+ VEC *VV = (VEC *)V, *UU = (VEC *)U;
531
+ double p;
532
+ if (VV->type==TYPE_VEC){
533
+ if (UU->type==TYPE_VEC){ ARY_INPRO (p, VV->v, UU->v, VV->t); return (p); }
534
+ if (UU->type==TYPE_SVEC){ ARY_SVEC_INPRO (p, *((SVEC *)U), VV->v); return (p); }
535
+ if (UU->type==TYPE_QUEUE){ ARY_QUEUE_INPRO (p, *((QUEUE *)U), VV->v); return (p); }
536
+ }
537
+ if (VV->type==TYPE_SVEC){
538
+ if (UU->type==TYPE_VEC){ ARY_SVEC_INPRO (p, *((SVEC *)V), UU->v); return (p);}
539
+ if (UU->type==TYPE_SVEC) return (SVEC_inpro ((SVEC *)V, (SVEC *)U));
540
+ // if (UU->type==TYPE_QUEUE) return (VEC_QUEUE_inpro (V, U));
541
+ }
542
+ if (VV->type==TYPE_QUEUE){
543
+ if (UU->type==TYPE_VEC){ ARY_QUEUE_INPRO (p, *((QUEUE *)V), UU->v); return (p); }
544
+ // else if (UU->type==TYPE_SVEC) return (SVEC_inpro (V, U));
545
+ if (UU->type==TYPE_QUEUE){ MQUE_INTSEC (p, *((QUEUE *)V), *((QUEUE *)U)); return (p);}
546
+ }
547
+ return (0.0);
548
+ }
549
+
550
+ double MVEC_double_inpro (void *V, double *w){
551
+ VEC *VV = (VEC *)V;
552
+ double p;
553
+ if (VV->type==TYPE_VEC){ ARY_INPRO (p, VV->v, w, VV->t); return (p); }
554
+ if (VV->type==TYPE_SVEC){ ARY_SVEC_INPRO (p, *((SVEC *)V), w); return (p); }
555
+ if (VV->type==TYPE_QUEUE){ ARY_QUEUE_INPRO (p, *((QUEUE *)V), w); return (p); }
556
+ return (0.0);
557
+ }
558
+
559
+ /* compute the inner product, euclidean distance for i,jth vector */
560
+ double MMAT_inpro_ij (void *M, int i, int j){
561
+ MAT *MM = (MAT *)M;
562
+ double p;
563
+ if (MM->type==TYPE_MAT){ ARY_INPRO (p, MM->v[i].v, MM->v[j].v, MM->v[j].t); return (p); }
564
+ if (MM->type==TYPE_SMAT) return (SVEC_inpro (&((SMAT *)M)->v[i], &((SMAT *)M)->v[j]));
565
+ if (MM->type==TYPE_SETFAMILY){
566
+ p = QUEUE_intsec_ (&((SETFAMILY *)M)->v[i], &((SETFAMILY *)M)->v[j]); return (p); }
567
+ return (0.0);
568
+ }
569
+
570
+ double MMAT_double_inpro_i (void *M, int i, double *w){
571
+ MAT *MM = (MAT *)M;
572
+ double p;
573
+ if (MM->type==TYPE_MAT){ ARY_INPRO (p, MM->v[i].v, w, MM->v[i].t); return (p); }
574
+ if (MM->type==TYPE_SMAT){ ARY_SVEC_INPRO (p, ((SMAT *)M)->v[i], w); return (p); }
575
+ if (MM->type==TYPE_SETFAMILY){ ARY_QUEUE_INPRO (p, ((SETFAMILY *)M)->v[i], w); return (p); }
576
+ return (0.0);
577
+ }
578
+
579
+
580
+
581
+ #endif
582
+
583
+