nysol-take 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. checksums.yaml +7 -0
  2. data/bin/mbiclique.rb +317 -0
  3. data/bin/mbipolish.rb +362 -0
  4. data/bin/mccomp.rb +235 -0
  5. data/bin/mclique.rb +295 -0
  6. data/bin/mclique2g.rb +105 -0
  7. data/bin/mcliqueInfo.rb +203 -0
  8. data/bin/mfriends.rb +202 -0
  9. data/bin/mgdiff.rb +252 -0
  10. data/bin/mhifriend.rb +456 -0
  11. data/bin/mhipolish.rb +465 -0
  12. data/bin/mitemset.rb +168 -0
  13. data/bin/mpal.rb +410 -0
  14. data/bin/mpolishing.rb +399 -0
  15. data/bin/msequence.rb +165 -0
  16. data/bin/mtra2g.rb +476 -0
  17. data/bin/mtra2gc.rb +360 -0
  18. data/ext/grhfilrun/extconf.rb +12 -0
  19. data/ext/grhfilrun/grhfilrun.c +85 -0
  20. data/ext/grhfilrun/src/_sspc.c +358 -0
  21. data/ext/grhfilrun/src/aheap.c +545 -0
  22. data/ext/grhfilrun/src/aheap.h +251 -0
  23. data/ext/grhfilrun/src/base.c +92 -0
  24. data/ext/grhfilrun/src/base.h +59 -0
  25. data/ext/grhfilrun/src/fstar.c +497 -0
  26. data/ext/grhfilrun/src/fstar.h +80 -0
  27. data/ext/grhfilrun/src/grhfil.c +214 -0
  28. data/ext/grhfilrun/src/itemset.c +713 -0
  29. data/ext/grhfilrun/src/itemset.h +170 -0
  30. data/ext/grhfilrun/src/problem.c +415 -0
  31. data/ext/grhfilrun/src/problem.h +179 -0
  32. data/ext/grhfilrun/src/queue.c +533 -0
  33. data/ext/grhfilrun/src/queue.h +182 -0
  34. data/ext/grhfilrun/src/sample.c +19 -0
  35. data/ext/grhfilrun/src/sspc.c +597 -0
  36. data/ext/grhfilrun/src/sspc2.c +491 -0
  37. data/ext/grhfilrun/src/stdlib2.c +1482 -0
  38. data/ext/grhfilrun/src/stdlib2.h +892 -0
  39. data/ext/grhfilrun/src/trsact.c +817 -0
  40. data/ext/grhfilrun/src/trsact.h +160 -0
  41. data/ext/grhfilrun/src/vec.c +745 -0
  42. data/ext/grhfilrun/src/vec.h +172 -0
  43. data/ext/lcmrun/extconf.rb +20 -0
  44. data/ext/lcmrun/lcmrun.cpp +99 -0
  45. data/ext/lcmrun/src/aheap.c +216 -0
  46. data/ext/lcmrun/src/aheap.h +111 -0
  47. data/ext/lcmrun/src/base.c +92 -0
  48. data/ext/lcmrun/src/base.h +59 -0
  49. data/ext/lcmrun/src/itemset.c +496 -0
  50. data/ext/lcmrun/src/itemset.h +157 -0
  51. data/ext/lcmrun/src/lcm.c +427 -0
  52. data/ext/lcmrun/src/problem.c +349 -0
  53. data/ext/lcmrun/src/problem.h +177 -0
  54. data/ext/lcmrun/src/queue.c +528 -0
  55. data/ext/lcmrun/src/queue.h +176 -0
  56. data/ext/lcmrun/src/sgraph.c +359 -0
  57. data/ext/lcmrun/src/sgraph.h +173 -0
  58. data/ext/lcmrun/src/stdlib2.c +1282 -0
  59. data/ext/lcmrun/src/stdlib2.h +823 -0
  60. data/ext/lcmrun/src/trsact.c +747 -0
  61. data/ext/lcmrun/src/trsact.h +159 -0
  62. data/ext/lcmrun/src/vec.c +731 -0
  63. data/ext/lcmrun/src/vec.h +171 -0
  64. data/ext/lcmseq0run/extconf.rb +20 -0
  65. data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
  66. data/ext/lcmseq0run/src/aheap.c +216 -0
  67. data/ext/lcmseq0run/src/aheap.h +111 -0
  68. data/ext/lcmseq0run/src/base.c +92 -0
  69. data/ext/lcmseq0run/src/base.h +59 -0
  70. data/ext/lcmseq0run/src/itemset.c +518 -0
  71. data/ext/lcmseq0run/src/itemset.h +157 -0
  72. data/ext/lcmseq0run/src/itemset_zero.c +522 -0
  73. data/ext/lcmseq0run/src/lcm_seq.c +446 -0
  74. data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
  75. data/ext/lcmseq0run/src/problem.c +439 -0
  76. data/ext/lcmseq0run/src/problem.h +179 -0
  77. data/ext/lcmseq0run/src/problem_zero.c +439 -0
  78. data/ext/lcmseq0run/src/queue.c +533 -0
  79. data/ext/lcmseq0run/src/queue.h +182 -0
  80. data/ext/lcmseq0run/src/stdlib2.c +1350 -0
  81. data/ext/lcmseq0run/src/stdlib2.h +864 -0
  82. data/ext/lcmseq0run/src/trsact.c +747 -0
  83. data/ext/lcmseq0run/src/trsact.h +159 -0
  84. data/ext/lcmseq0run/src/vec.c +779 -0
  85. data/ext/lcmseq0run/src/vec.h +172 -0
  86. data/ext/lcmseqrun/extconf.rb +20 -0
  87. data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
  88. data/ext/lcmseqrun/src/aheap.c +216 -0
  89. data/ext/lcmseqrun/src/aheap.h +111 -0
  90. data/ext/lcmseqrun/src/base.c +92 -0
  91. data/ext/lcmseqrun/src/base.h +59 -0
  92. data/ext/lcmseqrun/src/itemset.c +518 -0
  93. data/ext/lcmseqrun/src/itemset.h +157 -0
  94. data/ext/lcmseqrun/src/itemset_zero.c +522 -0
  95. data/ext/lcmseqrun/src/lcm_seq.c +447 -0
  96. data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
  97. data/ext/lcmseqrun/src/problem.c +439 -0
  98. data/ext/lcmseqrun/src/problem.h +179 -0
  99. data/ext/lcmseqrun/src/problem_zero.c +439 -0
  100. data/ext/lcmseqrun/src/queue.c +533 -0
  101. data/ext/lcmseqrun/src/queue.h +182 -0
  102. data/ext/lcmseqrun/src/stdlib2.c +1350 -0
  103. data/ext/lcmseqrun/src/stdlib2.h +864 -0
  104. data/ext/lcmseqrun/src/trsact.c +747 -0
  105. data/ext/lcmseqrun/src/trsact.h +159 -0
  106. data/ext/lcmseqrun/src/vec.c +779 -0
  107. data/ext/lcmseqrun/src/vec.h +172 -0
  108. data/ext/lcmtransrun/extconf.rb +18 -0
  109. data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
  110. data/ext/macerun/extconf.rb +20 -0
  111. data/ext/macerun/macerun.cpp +57 -0
  112. data/ext/macerun/src/aheap.c +217 -0
  113. data/ext/macerun/src/aheap.h +112 -0
  114. data/ext/macerun/src/itemset.c +491 -0
  115. data/ext/macerun/src/itemset.h +158 -0
  116. data/ext/macerun/src/mace.c +503 -0
  117. data/ext/macerun/src/problem.c +346 -0
  118. data/ext/macerun/src/problem.h +174 -0
  119. data/ext/macerun/src/queue.c +529 -0
  120. data/ext/macerun/src/queue.h +177 -0
  121. data/ext/macerun/src/sgraph.c +360 -0
  122. data/ext/macerun/src/sgraph.h +174 -0
  123. data/ext/macerun/src/stdlib2.c +993 -0
  124. data/ext/macerun/src/stdlib2.h +811 -0
  125. data/ext/macerun/src/vec.c +634 -0
  126. data/ext/macerun/src/vec.h +170 -0
  127. data/ext/sspcrun/extconf.rb +20 -0
  128. data/ext/sspcrun/src/_sspc.c +358 -0
  129. data/ext/sspcrun/src/aheap.c +545 -0
  130. data/ext/sspcrun/src/aheap.h +251 -0
  131. data/ext/sspcrun/src/base.c +92 -0
  132. data/ext/sspcrun/src/base.h +59 -0
  133. data/ext/sspcrun/src/fstar.c +496 -0
  134. data/ext/sspcrun/src/fstar.h +80 -0
  135. data/ext/sspcrun/src/grhfil.c +213 -0
  136. data/ext/sspcrun/src/itemset.c +713 -0
  137. data/ext/sspcrun/src/itemset.h +170 -0
  138. data/ext/sspcrun/src/problem.c +415 -0
  139. data/ext/sspcrun/src/problem.h +179 -0
  140. data/ext/sspcrun/src/queue.c +533 -0
  141. data/ext/sspcrun/src/queue.h +182 -0
  142. data/ext/sspcrun/src/sample.c +19 -0
  143. data/ext/sspcrun/src/sspc.c +598 -0
  144. data/ext/sspcrun/src/sspc2.c +491 -0
  145. data/ext/sspcrun/src/stdlib2.c +1482 -0
  146. data/ext/sspcrun/src/stdlib2.h +892 -0
  147. data/ext/sspcrun/src/trsact.c +817 -0
  148. data/ext/sspcrun/src/trsact.h +160 -0
  149. data/ext/sspcrun/src/vec.c +745 -0
  150. data/ext/sspcrun/src/vec.h +172 -0
  151. data/ext/sspcrun/sspcrun.cpp +54 -0
  152. data/lib/nysol/enumLcmEp.rb +338 -0
  153. data/lib/nysol/enumLcmEsp.rb +284 -0
  154. data/lib/nysol/enumLcmIs.rb +275 -0
  155. data/lib/nysol/enumLcmSeq.rb +143 -0
  156. data/lib/nysol/items.rb +201 -0
  157. data/lib/nysol/seqDB.rb +256 -0
  158. data/lib/nysol/take.rb +39 -0
  159. data/lib/nysol/taxonomy.rb +113 -0
  160. data/lib/nysol/traDB.rb +257 -0
  161. metadata +239 -0
@@ -0,0 +1,170 @@
1
+ /* library for sparse vector */
2
+ /* Takeaki Uno 27/Dec/2008 */
3
+
4
+ #ifndef _vec_h_
5
+ #define _vec_h_
6
+
7
+ #define STDLIB2_USE_MATH
8
+
9
+ #include"math.h"
10
+ #include"queue.h"
11
+
12
+ #ifndef SVEC_VAL
13
+ #ifdef SVEC_VAL_INT
14
+ #define SVEC_VAL int
15
+ #define SVEC_VAL2 LONG
16
+ #define SVEC_VAL_END INTHUGE
17
+ #define SVEC_VAL2_END LONGHUGE
18
+ #define SVEC_VALF "%d"
19
+ #else
20
+ #define SVEC_VAL double
21
+ #define SVEC_VAL2 double
22
+ #define SVEC_VAL_END DOUBLEHUGE
23
+ #define SVEC_VAL2_END DOUBLEHUGE
24
+ #define SVEC_VALF "%f"
25
+ #endif
26
+ #endif
27
+
28
+ #define VEC_LOAD_BIN 16777216 // read binary file
29
+ #define VEC_LOAD_BIN2 33554432 // read binary file with 2byte for each number
30
+ #define VEC_LOAD_BIN4 67108864 // read binary file with 4byte for each number
31
+ #define VEC_LOAD_CENTERIZE 134217728 // read binary file, and minus the half(128) from each number
32
+ #define VEC_NORMALIZE 268435456 // read binary file, and minus the half(128) from each number
33
+
34
+ /* matrix */
35
+ typedef struct {
36
+ unsigned char type; // mark to identify type of the structure
37
+ char *fname; // input file name
38
+ int flag; // flag
39
+
40
+ VEC *v;
41
+ VEC_ID end;
42
+ VEC_ID t;
43
+ VEC_VAL *buf, *buf2;
44
+ VEC_ID clms;
45
+ size_t eles;
46
+ } MAT;
47
+
48
+ /* sparse vector, element */
49
+ typedef struct {
50
+ QUEUE_ID i;
51
+ SVEC_VAL a;
52
+ } SVEC_ELE;
53
+
54
+ /* sparse vector, vector */
55
+ typedef struct {
56
+ unsigned char type; // mark to identify type of the structure
57
+ SVEC_ELE *v;
58
+ VEC_ID end;
59
+ VEC_ID t;
60
+ } SVEC;
61
+
62
+ /* sparse vector, matrix */
63
+ typedef struct {
64
+ unsigned char type; // mark to identify type of the structure
65
+ char *fname; // input file name
66
+ int flag; // flag
67
+
68
+ SVEC *v;
69
+ VEC_ID end;
70
+ VEC_ID t;
71
+ SVEC_ELE *buf, *buf2;
72
+ VEC_ID clms;
73
+ size_t eles, ele_end;
74
+ } SMAT;
75
+
76
+ /* set family */
77
+ typedef struct {
78
+ unsigned char type; // mark to identify type of the structure
79
+ char *fname; // input file name
80
+ int flag; // flag
81
+
82
+ QUEUE *v;
83
+ VEC_ID end;
84
+ VEC_ID t;
85
+ QUEUE_INT *buf, *buf2;
86
+ VEC_ID clms;
87
+ size_t eles, ele_end;
88
+ WEIGHT *cw, *rw, **w, *wbuf;
89
+ int unit;
90
+ char *wfname, *cwfname, *rwfname; // weight file name
91
+ } SETFAMILY;
92
+
93
+ #define INIT_SETFAMILY_ {TYPE_SETFAMILY,NULL,0,NULL,0,0,NULL,NULL,0,0,0,NULL,NULL,NULL,NULL,sizeof(QUEUE_INT),NULL,NULL,NULL}
94
+
95
+ extern MAT INIT_MAT;
96
+ extern SVEC INIT_SVEC;
97
+ extern SMAT INIT_SMAT;
98
+ extern SETFAMILY INIT_SETFAMILY;
99
+
100
+ QSORT_TYPE_HEADER (SVEC_VAL, SVEC_VAL)
101
+ QSORT_TYPE_HEADER (SVEC_VAL2, SVEC_VAL2)
102
+
103
+ #define ARY_QUEUE_INPRO(f,U,V) do{(f)=0;FLOOP(common_QUEUE_ID, 0, (QUEUE_ID)(U).t)(f)+=(V)[(U).v[common_QUEUE_ID]];}while(0)
104
+ #define ARY_SVEC_INPRO(f,U,V) do{(f)=0;FLOOP(common_VEC_ID, 0, (VEC_ID)(U).t)(f)+=((double)(U).v[common_VEC_ID].a)*(V)[(U).v[common_VEC_ID].i];}while(0)
105
+
106
+ /* terminate routine for VEC */
107
+ void VEC_end (VEC *V);
108
+ void MAT_end (MAT *M);
109
+ void SVEC_end (SVEC *V);
110
+ void SMAT_end (SMAT *M);
111
+ void SETFAMILY_end (SETFAMILY *M);
112
+
113
+ /* allocate memory according to rows and rowt */
114
+ void VEC_alloc (VEC *V, VEC_ID clms);
115
+ void MAT_alloc (MAT *M, VEC_ID rows, VEC_ID clms);
116
+ void SVEC_alloc (SVEC *V, VEC_ID end);
117
+ void SMAT_alloc (SMAT *M, VEC_ID rows, VEC_ID *rowt, VEC_ID clms, size_t eles);
118
+ void SETFAMILY_alloc (SETFAMILY *M, VEC_ID rows, VEC_ID *rowt, VEC_ID clms, size_t eles);
119
+ void SETFAMILY_alloc_weight (SETFAMILY *M);
120
+
121
+ /* count/read the number in file for MAT */
122
+ /* if *rows>0, only read count the numbers in a row, for the first scan. */
123
+ void MAT_load_bin (MAT *M, FILE2 *fp, int unit);
124
+ void MAT_file_load (MAT *M, FILE2 *fp);
125
+ void MAT_load (MAT *M);
126
+ void SMAT_load (SMAT *M);
127
+ void SETFAMILY_load (SETFAMILY *M);
128
+ void SETFAMILY_load_weight (SETFAMILY *M);
129
+ void SETFAMILY_load_row_weight (SETFAMILY *M);
130
+ void SETFAMILY_load_column_weight (SETFAMILY *M);
131
+
132
+ void MAT_print (FILE *fp, MAT *M);
133
+ void SVEC_print (FILE *fp, SVEC *M);
134
+ void SMAT_print (FILE *fp, SMAT *M);
135
+ void SETFAMILY_print (FILE *fp, SETFAMILY *M);
136
+ void SETFAMILY_print_weight (FILE *fp, SETFAMILY *M);
137
+
138
+
139
+ /* norm, normalization **************************/
140
+ double SVEC_norm (SVEC *V);
141
+ void SVEC_normalize (SVEC *V);
142
+
143
+ /* inner product **************************/
144
+ SVEC_VAL2 SVEC_inpro (SVEC *V1, SVEC *V2);
145
+
146
+ /** Euclidean distance routines *********************************/
147
+ double VEC_eucdist (VEC *V1, VEC *V2);
148
+ double SVEC_eucdist (SVEC *V1, SVEC *V2);
149
+ double VEC_SVEC_eucdist (VEC *V1, SVEC *V2);
150
+ double QUEUE_eucdist (QUEUE *Q1, QUEUE *Q2);
151
+ double VEC_QUEUE_eucdist (VEC *V, QUEUE *Q);
152
+
153
+ void VEC_rand_gaussian (VEC *V);
154
+
155
+ double VEC_linfdist (VEC *V1, VEC *V2);
156
+
157
+ /* compute the inner product, Euclidean distance for multi vector */
158
+ double MVEC_norm (void *V);
159
+ double MVEC_inpro (void *V, void *U);
160
+ double MVEC_double_inpro (void *V, double *p);
161
+ double MVEC_eucdist (void *V, void *U);
162
+
163
+ /* compute the inner product, euclidean distance for i,jth vector */
164
+ double MMAT_inpro_ij (void *M, int i, int j);
165
+ double MMAT_double_inpro_i (void *M, int i, double *p);
166
+ double MMAT_eucdist_ij (void *M, int i, int j);
167
+ double MMAT_norm_i (void *M, int i);
168
+
169
+
170
+ #endif
@@ -0,0 +1,20 @@
1
+ require "rubygems"
2
+ require "mkmf"
3
+
4
+ unless have_library("kgmod3")
5
+ puts("need libkgmod.")
6
+ puts("refer https://github.com/nysol/mcmd")
7
+ exit 1
8
+ end
9
+
10
+
11
+ cp = "$(srcdir)"
12
+
13
+ $CFLAGS = " -O3 -Wall -I. -I#{cp}/src -DB_STATIC -D_NO_MAIN_ -DLINE -fPIC -Wno-error=format-security"
14
+ $CPPFLAGS = " -O3 -Wall -I. -I#{cp}/src -DB_STATIC -D_NO_MAIN_ -DLINE -fPIC -Wno-error=format-security"
15
+ $CXXFLAGS = " -O3 -Wall -I. -I#{cp}/src -DB_STATIC -D_NO_MAIN_ -DLINE -fPIC -Wno-error=format-security"
16
+
17
+ $LOCAL_LIBS += " -lstdc++ -lkgmod3 -lm"
18
+
19
+ create_makefile("nysol/sspcrun")
20
+
@@ -0,0 +1,358 @@
1
+ /* SSPC: Similar Set Pair Comparison */
2
+ /* 2007/11/30 Takeaki Uno, e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, do not forget to
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about this code for the users.
11
+ For the commercial use, please make a contact to Takeaki Uno. */
12
+
13
+ #ifndef _sspc_c_
14
+ #define _sspc_c_
15
+
16
+ #define WEIGHT_DOUBLE
17
+
18
+ #include"trsact.c"
19
+ #include"problem.c"
20
+
21
+ #define SSPC_INCLUSION 1
22
+ #define SSPC_SIMILARITY 2
23
+ #define SSPC_INTERSECTION 4
24
+ #define SSPC_RESEMBLANCE 8
25
+ #define SSPC_INNERPRODUCT 16
26
+ #define SSPC_MININT 32
27
+ #define SSPC_MAXINT 64
28
+ #define SSPC_COUNT 128
29
+ #define SSPC_MATRIX 256
30
+
31
+
32
+ void SSPC_error (){
33
+ ERROR_MES = "command explanation";
34
+ print_err ("SSPC: [ISCfQq] [options] input-filename ratio/threshold [output-filename]\n\
35
+ %%:show progress, _:no message, +:write solutions in append mode\n\
36
+ #:count the number of similar records for each record\n\
37
+ i(inclusion): find pairs [ratio] of items (weighted sum) of one is included in the other (1st is included in 2nd)\n\
38
+ I(both-inclusion): find pairs s.t. the size (weight sum) of intersection is [ratio] of both\n\
39
+ S:set similarity measure to |A\\cap B| / max{|A|,|B|}\n\
40
+ s:set similarity measure to |A\\cap B| / min{|A|,|B|}\n\
41
+ T(intersection): find pairs having common [threshld] items\n\
42
+ R(resemblance): find pairs s.t. |A\\capB|/|A\\cupB| >= [threshld]\n\
43
+ C(cosign distance): find pairs s.t. inner product of their normalized vectors >= [threshld]\n\
44
+ f,Q:output ratio/size of pairs following/preceding to the pairs\n\
45
+ N:normalize the ID of latter sets, in -c mode\n\
46
+ t:transpose the database so that i-th transaction will be item i\n\
47
+ [options]\n\
48
+ -2 [num]:2nd input file name\n\
49
+ -K [num]:output [num] pairs of most large intersections\n\
50
+ -w [filename]:read item weights from [filename]\n\
51
+ -l,-u [num]:ignore transactions with size (weight sum) less/more than [num]\n\
52
+ -L,-U [num]: ignore items appearing less/more than [num]\n\
53
+ -c [num]:compare transactions of IDs less than num and the others (if 0 is given, automatically set to the boundary of the 1st and 2nd file)\n\
54
+ -b [num]:ignore pairs having no common item of at least [num]th frequency\n\
55
+ -B [num]:ignore pairs having no common item of frequency at most [num]\n\
56
+ -# [num]:stop after outputting [num] solutions\n\
57
+ -, [char]:give the separator of the numbers in the output\n\
58
+ -Q [filename]:replace the output numbers according to the permutation table given by [filename]\n\
59
+ # the 1st letter of input-filename cannot be '-'.\n\
60
+ # if the output file name is -, the solutions will be output to standard output.\n");
61
+ EXIT;
62
+ //items have to begin from 1\n");
63
+ }
64
+
65
+ // c:multi stream transaction mode, separated by an empty transaction
66
+
67
+ /***********************************************************************/
68
+ /* read parameters given by command line */
69
+ /***********************************************************************/
70
+ void SSPC_read_param (int argc, char *argv[], PROBLEM *PP){
71
+ int c=1;
72
+ ITEMSET *II = &PP->II;
73
+ TRSACT *TT = &PP->TT;
74
+
75
+ if ( argc < c+3 ){ SSPC_error (); return; }
76
+
77
+ if ( !strchr (argv[c], '_') ){ II->flag |= SHOW_MESSAGE; TT->flag |= SHOW_MESSAGE; }
78
+ if ( strchr (argv[c], '%') ) II->flag |= SHOW_PROGRESS;
79
+ if ( strchr (argv[c], '+') ) II->flag |= ITEMSET_APPEND;
80
+ if ( strchr (argv[c], 'f') ) II->flag |= ITEMSET_FREQ;
81
+ if ( strchr (argv[c], 'Q') ) II->flag |= ITEMSET_PRE_FREQ;
82
+ if ( strchr (argv[c], 'M') ) PP->problem |= SSPC_MATRIX;
83
+ if ( strchr (argv[c], 'i') ) PP->problem = SSPC_INCLUSION;
84
+ else if ( strchr (argv[c], 'I') ) PP->problem = SSPC_SIMILARITY;
85
+ else if ( strchr (argv[c], 'T') ) PP->problem = SSPC_INTERSECTION;
86
+ else if ( strchr (argv[c], 's') ) PP->problem = SSPC_MININT;
87
+ else if ( strchr (argv[c], 'S') ) PP->problem = SSPC_MAXINT;
88
+ else if ( strchr (argv[c], 'R') ) PP->problem = SSPC_RESEMBLANCE;
89
+ else if ( strchr (argv[c], 'C') ) PP->problem = SSPC_INNERPRODUCT;
90
+ else error ("i, I, s, S, R, T or C command has to be specified", EXIT);
91
+ if ( strchr (argv[c], '#') ) PP->problem |= SSPC_COUNT;
92
+ if ( strchr (argv[c], 'N') ) PP->problem |= PROBLEM_NORMALIZE;
93
+ if ( !strchr (argv[c], 't') ) TT->flag |= LOAD_TPOSE;
94
+ c++;
95
+
96
+ while ( argv[c][0] == '-' ){
97
+ switch (argv[c][1]){
98
+ case 'K': II->topk.end = atoi(argv[c+1]);
99
+ break; case 'L': TT->row_lb = atoi(argv[c+1]);
100
+ break; case 'U': TT->row_ub = atoi(argv[c+1]);
101
+ break; case 'l': TT->w_lb = atof(argv[c+1]);
102
+ break; case 'u': TT->w_ub = atof(argv[c+1]);
103
+ break; case 'w': PP->TT.wfname = argv[c+1];
104
+ break; case 'c': PP->dir = 1; TT->sep = atoi(argv[c+1]);
105
+ break; case '2': PP->TT.fname2 = argv[c+1];
106
+ break; case 'b': PP->II.len_lb = atoi(argv[c+1]);
107
+ break; case 'B': PP->II.len_ub = atoi(argv[c+1]);
108
+ break; case '#': II->max_solutions = atoi(argv[c+1]);
109
+ break; case ',': II->separator = argv[c+1][0];
110
+ break; case 'Q': PP->outperm_fname = argv[c+1];
111
+ break; default: goto NEXT;
112
+ }
113
+ c += 2;
114
+ if ( argc<c+2 ){ SSPC_error (); return; }
115
+ }
116
+
117
+ NEXT:;
118
+ if ( PP->problem & SSPC_MATRIX ) PP->MM.fname = argv[c];
119
+ else PP->TT.fname = argv[c];
120
+ if ( II->topk.end==0 ) II->frq_lb = atof(argv[c+1]);
121
+ if ( argc>c+2 ) PP->output_fname = argv[c+2];
122
+ }
123
+
124
+
125
+ /*************************************************************************/
126
+ /* SSPC main routine */
127
+ /*************************************************************************/
128
+ void SSPC (PROBLEM *PP){
129
+ ITEMSET *II = &PP->II;
130
+ TRSACT *TT = &PP->TT;
131
+ QUEUE_ID i, j, begin = PP->dir>0?TT->sep:1, f, ii=0;
132
+ QUEUE_INT *x, **o = NULL, *oi, *oj, cnt;
133
+ WEIGHT *w, f1, f2, c, cc;
134
+ double sq =0;
135
+ int count=0, fs = SSPC_INTERSECTION +SSPC_RESEMBLANCE +SSPC_INNERPRODUCT+SSPC_MAXINT+SSPC_MININT;
136
+
137
+ // initialization
138
+ calloc2 (w, TT->T.clms*2, EXIT);
139
+ if ( PP->problem & SSPC_INNERPRODUCT ) FLOOP (i, 0, TT->T.clms) TT->w[i] *= TT->w[i];
140
+ TRSACT_delivery (TT, &TT->jump, w, w+TT->T.clms, NULL, TT->T.clms);
141
+ FLOOP (i, 0, PP->dir?TT->sep:1) TT->OQ[i].end = 0;
142
+ II->itemset.t = 2;
143
+
144
+ // skipping items of large frequencies
145
+ if ( TT->flag & LOAD_SIZSORT ){
146
+ malloc2 (o, TT->T.clms, EXIT);
147
+ FLOOP (i, 0, TT->T.clms){
148
+ o[i] = TT->OQ[i].v;
149
+ TT->OQ[i].v[TT->OQ[i].t] = INTHUGE; // put end-mark at the last; also used in main loop
150
+ for ( j=0 ; TT->OQ[i].v[j] < PP->II.len_lb ; j++ );
151
+ TT->OQ[i].v = &TT->OQ[i].v[j]; TT->OQ[i].t -= j;
152
+ }
153
+ }
154
+ // main loop
155
+
156
+ FLOOP (i, begin, TT->T.clms){
157
+ if ( II->flag & SHOW_PROGRESS ){
158
+ if ( count < i*100/TT->T.clms ){ count++; fprintf (stderr, "%d%%\n", count); }
159
+ }
160
+ cnt = 0;
161
+ II->itemset.v[0] = ((PP->problem&PROBLEM_NORMALIZE)&& PP->dir>0)? i-TT->sep: i;
162
+ if ( PP->problem & SSPC_INNERPRODUCT ) sq = sqrt (w[i]);
163
+ TRSACT_delivery (TT, &TT->jump, PP->occ_w, PP->occ_pw, &TT->OQ[i], (PP->dir>0)?TT->sep:i);
164
+
165
+ MQUE_FLOOP (TT->jump, x){
166
+ II->itemset.v[1] = *x;
167
+ c = PP->occ_w[*x];
168
+
169
+ //if ( TT->OQ[i].t>0 ) printf ("%f %d\n", c, TT->OQ[i].t);
170
+ if ( TT->flag & LOAD_SIZSORT ){
171
+ for (oi=o[i],oj=o[*x] ; *oi<PP->II.len_lb ; oi++ ){
172
+ while ( *oj < *oi ) oj++;
173
+ if ( *oi == *oj ) c += TT->w[*oi];
174
+ }
175
+ }
176
+ if ( PP->problem & fs ){
177
+ if ( PP->problem & SSPC_INTERSECTION ) II->frq = c;
178
+ else if ( PP->problem & SSPC_INNERPRODUCT ) II->frq = c / sq / sqrt(w[*x]);
179
+ else if ( (PP->problem & SSPC_RESEMBLANCE) && (cc= w[i] +w[*x] -c) != 0 ) II->frq = c/cc;
180
+ else if ( (PP->problem & SSPC_MAXINT) && (cc=MAX(w[i],w[*x])) != 0 ) II->frq = c/cc;
181
+ else if ( (PP->problem & SSPC_MININT) && (cc=MIN(w[i],w[*x])) != 0 ) II->frq = c/cc;
182
+ else continue;
183
+ if ( II->frq >= II->frq_lb ){
184
+ if ( PP->problem & SSPC_COUNT ) cnt++;
185
+ else ITEMSET_output_itemset (II, NULL, 0);
186
+ }
187
+ } else {
188
+ f1 = w[i]*II->frq_lb; f2 = w[*x]*II->frq_lb; // size of i and *x
189
+ if ( PP->problem & SSPC_SIMILARITY ){
190
+ f = ( (c >= f1) && (c >= f2) );
191
+ II->frq = MIN(c/w[i], c/w[*x]);
192
+ } else if ( PP->problem & SSPC_INCLUSION ){
193
+ if ( c >= f2 ){
194
+ II->frq = c/w[*x];
195
+ II->itemset.v[0] = *x; II->itemset.v[1] = i-PP->root;
196
+ if ( PP->problem & SSPC_COUNT ) cnt++;
197
+ else ITEMSET_output_itemset (II, NULL, 0);
198
+ II->itemset.v[0] = i-PP->root; II->itemset.v[1] = *x;
199
+ }
200
+ f = ( c >= f1 );
201
+ II->frq = c/w[i];
202
+ } else continue;
203
+ if ( f ){
204
+ if ( PP->problem & SSPC_COUNT ) cnt++;
205
+ else ITEMSET_output_itemset (II, NULL, 0);
206
+ }
207
+ }
208
+ TT->OQ[*x].end = 0;
209
+ }
210
+ TT->OQ[i].end = 0;
211
+ if ( PP->problem & SSPC_COUNT ){
212
+ while ( ii<II->perm[i] ){
213
+ FILE2_putc (&II->multi_fp[0], '\n');
214
+ FILE2_flush (&II->multi_fp[0]);
215
+ ii++;
216
+ }
217
+ FILE2_print_int (&II->multi_fp[0], cnt, 0);
218
+ FILE2_putc (&II->multi_fp[0], '\n');
219
+ FILE2_flush (&II->multi_fp[0]);
220
+ II->sc[2] += cnt;
221
+ ii++;
222
+ }
223
+ }
224
+
225
+ // termination
226
+ if ( TT->flag & LOAD_SIZSORT ){
227
+ FLOOP (i, 0, TT->T.clms){
228
+ TT->OQ[i].t += TT->OQ[i].v - o[i];
229
+ TT->OQ[i].v = o[i];
230
+ }
231
+ }
232
+ mfree (w, o);
233
+ }
234
+
235
+
236
+ /*************************************************************************/
237
+ /* SSPC matrix version */
238
+ /*************************************************************************/
239
+ void SSPCmat (PROBLEM *PP){
240
+ ITEMSET *II = &PP->II;
241
+ MAT *MM = &PP->MM;
242
+ QUEUE_ID i, j, x, begin = PP->dir>0?0:1, f, ii=0;
243
+ QUEUE_INT cnt;
244
+ WEIGHT *w, f1, f2, c, cc;
245
+ double sq =0;
246
+ int fs = SSPC_INTERSECTION +SSPC_RESEMBLANCE +SSPC_INNERPRODUCT+SSPC_MAXINT+SSPC_MININT;
247
+
248
+ II->frq_lb = II->frq_lb * II->frq_lb;
249
+
250
+ // initialization
251
+ // calloc2 (w, MM->t, EXIT);
252
+ // if ( PP->problem & SSPC_INNERPRODUCT ) FLOOP (i, 0, MM->clms) TT->w[i] *= TT->w[i];
253
+ // TRSACT_delivery (TT, &TT->jump, w, w+TT->T.clms, NULL, TT->T.clms);
254
+ // FLOOP (i, 0, PP->dir?TT->sep:1) TT->OQ[i].end = 0;
255
+ II->itemset.t = 2;
256
+
257
+ // skipping items of large frequencies
258
+ // if ( TT->flag & LOAD_SIZSORT ){
259
+ // malloc2 (o, TT->T.clms, EXIT);
260
+ // FLOOP (i, 0, TT->T.clms){
261
+ // o[i] = TT->OQ[i].v;
262
+ // TT->OQ[i].v[TT->OQ[i].t] = INTHUGE; // put end-mark at the last; also used in main loop
263
+ // for ( j=0 ; TT->OQ[i].v[j] < PP->II.len_lb ; j++ );
264
+ // TT->OQ[i].v = &TT->OQ[i].v[j]; TT->OQ[i].t -= j;
265
+ // }
266
+ // }
267
+
268
+ // main loop
269
+ FLOOP (i, begin, MM->t){
270
+ cnt = 0;
271
+ II->itemset.v[0] = ((PP->problem&PROBLEM_NORMALIZE)&& PP->dir>0)? i-MM->clms: i; // i-TT->sep
272
+ if ( PP->problem || 1 ){
273
+ PP->occ_w[i] = 0;
274
+ FLOOP (x, 0, MM->clms) PP->occ_w[i] += MM->v[i].v[x] * MM->v[i].v[x];
275
+ }
276
+
277
+ FLOOP (j, 0, PP->dir>0?begin:i){
278
+ II->itemset.v[1] = j;
279
+ f = 0; sq = 0;
280
+ FLOOP (x, 0, MM->clms) sq += MM->v[i].v[x] * MM->v[j].v[x];
281
+ if ( sq / PP->occ_w[i] / PP->occ_w[j] > II->frq_lb ) f = 1;
282
+
283
+ if ( f ){
284
+ if ( PP->problem & SSPC_COUNT ) cnt++;
285
+ else ITEMSET_output_itemset (II, NULL, 0);
286
+ }
287
+ }
288
+ if ( PP->problem & SSPC_COUNT ){
289
+ while ( ii<II->perm[i] ){
290
+ FILE2_putc (&II->multi_fp[0], '\n');
291
+ FILE2_flush (&II->multi_fp[0]);
292
+ ii++;
293
+ }
294
+ FILE2_print_int (&II->multi_fp[0], cnt, 0);
295
+ FILE2_putc (&II->multi_fp[0], '\n');
296
+ FILE2_flush (&II->multi_fp[0]);
297
+ II->sc[2] += cnt;
298
+ ii++;
299
+ }
300
+ }
301
+
302
+ // termination
303
+ // mfree (w, o);
304
+ }
305
+
306
+
307
+
308
+ /*************************************************************************/
309
+ /* main function of SSPC */
310
+ /*************************************************************************/
311
+ int SSPC_main (int argc, char *argv[]){
312
+ PROBLEM PP;
313
+ SETFAMILY *T = &PP.TT.T;
314
+ QUEUE_ID i;
315
+
316
+ PROBLEM_init (&PP);
317
+ SSPC_read_param (argc, argv, &PP);
318
+ if ( ERROR_MES ) return (1);
319
+
320
+ PP.TT.flag |= LOAD_INCSORT + TRSACT_ALLOC_OCC;
321
+ if ( PP.II.len_ub<INTHUGE || PP.II.len_lb>0 ) PP.TT.flag |= LOAD_SIZSORT+LOAD_DECROWSORT;
322
+ PROBLEM_load (&PP);
323
+ if ( PP.II.len_ub < INTHUGE ){
324
+ FLOOP (i, 0, PP.TT.T.t) if ( PP.TT.T.v[i].t <= PP.II.len_ub ){ PP.II.len_lb = i; break; }
325
+ }
326
+ PROBLEM_alloc (&PP, T->clms, 0, 0, PP.TT.perm, PROBLEM_OCC_W +PROBLEM_OCC_T);
327
+ PP.TT.perm = NULL;
328
+ realloc2 (PP.TT.w, MAX(T->t, T->clms)+1, EXIT);
329
+ ARY_FILL (PP.TT.w, 0, MAX(T->t, T->clms)+1, 1);
330
+
331
+ // delivery
332
+ //TRSACT_print (&PP.TT, NULL, PP.II.perm);
333
+ print_mes (&PP.TT, "separated at %d\n", PP.TT.sep);
334
+ QUEUE_delivery (PP.TT.OQ, NULL, NULL, T->v, &PP.TT.OQ[T->clms], T->t, T->clms);
335
+
336
+ if ( !ERROR_MES && PP.TT.T.clms>1 ){
337
+ if ( PP.problem & SSPC_MATRIX ){ SSPCmat (&PP); }
338
+ else SSPC (&PP);
339
+ print_mes (&PP.TT, LONGF " pairs are found\n", PP.II.sc[2]);
340
+ }
341
+
342
+ ITEMSET_merge_counters (&PP.II);
343
+ internal_params.l1 = PP.II.solutions;
344
+
345
+ PROBLEM_end (&PP);
346
+ return (ERROR_MES?1:0);
347
+ }
348
+
349
+ /*******************************************************************************/
350
+ #ifndef _NO_MAIN_
351
+ #define _NO_MAIN_
352
+ int main (int argc, char *argv[]){
353
+ return (SSPC_main (argc, argv) );
354
+ }
355
+ #endif
356
+ /*******************************************************************************/
357
+
358
+ #endif