nysol-take 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. checksums.yaml +7 -0
  2. data/bin/mbiclique.rb +317 -0
  3. data/bin/mbipolish.rb +362 -0
  4. data/bin/mccomp.rb +235 -0
  5. data/bin/mclique.rb +295 -0
  6. data/bin/mclique2g.rb +105 -0
  7. data/bin/mcliqueInfo.rb +203 -0
  8. data/bin/mfriends.rb +202 -0
  9. data/bin/mgdiff.rb +252 -0
  10. data/bin/mhifriend.rb +456 -0
  11. data/bin/mhipolish.rb +465 -0
  12. data/bin/mitemset.rb +168 -0
  13. data/bin/mpal.rb +410 -0
  14. data/bin/mpolishing.rb +399 -0
  15. data/bin/msequence.rb +165 -0
  16. data/bin/mtra2g.rb +476 -0
  17. data/bin/mtra2gc.rb +360 -0
  18. data/ext/grhfilrun/extconf.rb +12 -0
  19. data/ext/grhfilrun/grhfilrun.c +85 -0
  20. data/ext/grhfilrun/src/_sspc.c +358 -0
  21. data/ext/grhfilrun/src/aheap.c +545 -0
  22. data/ext/grhfilrun/src/aheap.h +251 -0
  23. data/ext/grhfilrun/src/base.c +92 -0
  24. data/ext/grhfilrun/src/base.h +59 -0
  25. data/ext/grhfilrun/src/fstar.c +497 -0
  26. data/ext/grhfilrun/src/fstar.h +80 -0
  27. data/ext/grhfilrun/src/grhfil.c +214 -0
  28. data/ext/grhfilrun/src/itemset.c +713 -0
  29. data/ext/grhfilrun/src/itemset.h +170 -0
  30. data/ext/grhfilrun/src/problem.c +415 -0
  31. data/ext/grhfilrun/src/problem.h +179 -0
  32. data/ext/grhfilrun/src/queue.c +533 -0
  33. data/ext/grhfilrun/src/queue.h +182 -0
  34. data/ext/grhfilrun/src/sample.c +19 -0
  35. data/ext/grhfilrun/src/sspc.c +597 -0
  36. data/ext/grhfilrun/src/sspc2.c +491 -0
  37. data/ext/grhfilrun/src/stdlib2.c +1482 -0
  38. data/ext/grhfilrun/src/stdlib2.h +892 -0
  39. data/ext/grhfilrun/src/trsact.c +817 -0
  40. data/ext/grhfilrun/src/trsact.h +160 -0
  41. data/ext/grhfilrun/src/vec.c +745 -0
  42. data/ext/grhfilrun/src/vec.h +172 -0
  43. data/ext/lcmrun/extconf.rb +20 -0
  44. data/ext/lcmrun/lcmrun.cpp +99 -0
  45. data/ext/lcmrun/src/aheap.c +216 -0
  46. data/ext/lcmrun/src/aheap.h +111 -0
  47. data/ext/lcmrun/src/base.c +92 -0
  48. data/ext/lcmrun/src/base.h +59 -0
  49. data/ext/lcmrun/src/itemset.c +496 -0
  50. data/ext/lcmrun/src/itemset.h +157 -0
  51. data/ext/lcmrun/src/lcm.c +427 -0
  52. data/ext/lcmrun/src/problem.c +349 -0
  53. data/ext/lcmrun/src/problem.h +177 -0
  54. data/ext/lcmrun/src/queue.c +528 -0
  55. data/ext/lcmrun/src/queue.h +176 -0
  56. data/ext/lcmrun/src/sgraph.c +359 -0
  57. data/ext/lcmrun/src/sgraph.h +173 -0
  58. data/ext/lcmrun/src/stdlib2.c +1282 -0
  59. data/ext/lcmrun/src/stdlib2.h +823 -0
  60. data/ext/lcmrun/src/trsact.c +747 -0
  61. data/ext/lcmrun/src/trsact.h +159 -0
  62. data/ext/lcmrun/src/vec.c +731 -0
  63. data/ext/lcmrun/src/vec.h +171 -0
  64. data/ext/lcmseq0run/extconf.rb +20 -0
  65. data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
  66. data/ext/lcmseq0run/src/aheap.c +216 -0
  67. data/ext/lcmseq0run/src/aheap.h +111 -0
  68. data/ext/lcmseq0run/src/base.c +92 -0
  69. data/ext/lcmseq0run/src/base.h +59 -0
  70. data/ext/lcmseq0run/src/itemset.c +518 -0
  71. data/ext/lcmseq0run/src/itemset.h +157 -0
  72. data/ext/lcmseq0run/src/itemset_zero.c +522 -0
  73. data/ext/lcmseq0run/src/lcm_seq.c +446 -0
  74. data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
  75. data/ext/lcmseq0run/src/problem.c +439 -0
  76. data/ext/lcmseq0run/src/problem.h +179 -0
  77. data/ext/lcmseq0run/src/problem_zero.c +439 -0
  78. data/ext/lcmseq0run/src/queue.c +533 -0
  79. data/ext/lcmseq0run/src/queue.h +182 -0
  80. data/ext/lcmseq0run/src/stdlib2.c +1350 -0
  81. data/ext/lcmseq0run/src/stdlib2.h +864 -0
  82. data/ext/lcmseq0run/src/trsact.c +747 -0
  83. data/ext/lcmseq0run/src/trsact.h +159 -0
  84. data/ext/lcmseq0run/src/vec.c +779 -0
  85. data/ext/lcmseq0run/src/vec.h +172 -0
  86. data/ext/lcmseqrun/extconf.rb +20 -0
  87. data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
  88. data/ext/lcmseqrun/src/aheap.c +216 -0
  89. data/ext/lcmseqrun/src/aheap.h +111 -0
  90. data/ext/lcmseqrun/src/base.c +92 -0
  91. data/ext/lcmseqrun/src/base.h +59 -0
  92. data/ext/lcmseqrun/src/itemset.c +518 -0
  93. data/ext/lcmseqrun/src/itemset.h +157 -0
  94. data/ext/lcmseqrun/src/itemset_zero.c +522 -0
  95. data/ext/lcmseqrun/src/lcm_seq.c +447 -0
  96. data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
  97. data/ext/lcmseqrun/src/problem.c +439 -0
  98. data/ext/lcmseqrun/src/problem.h +179 -0
  99. data/ext/lcmseqrun/src/problem_zero.c +439 -0
  100. data/ext/lcmseqrun/src/queue.c +533 -0
  101. data/ext/lcmseqrun/src/queue.h +182 -0
  102. data/ext/lcmseqrun/src/stdlib2.c +1350 -0
  103. data/ext/lcmseqrun/src/stdlib2.h +864 -0
  104. data/ext/lcmseqrun/src/trsact.c +747 -0
  105. data/ext/lcmseqrun/src/trsact.h +159 -0
  106. data/ext/lcmseqrun/src/vec.c +779 -0
  107. data/ext/lcmseqrun/src/vec.h +172 -0
  108. data/ext/lcmtransrun/extconf.rb +18 -0
  109. data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
  110. data/ext/macerun/extconf.rb +20 -0
  111. data/ext/macerun/macerun.cpp +57 -0
  112. data/ext/macerun/src/aheap.c +217 -0
  113. data/ext/macerun/src/aheap.h +112 -0
  114. data/ext/macerun/src/itemset.c +491 -0
  115. data/ext/macerun/src/itemset.h +158 -0
  116. data/ext/macerun/src/mace.c +503 -0
  117. data/ext/macerun/src/problem.c +346 -0
  118. data/ext/macerun/src/problem.h +174 -0
  119. data/ext/macerun/src/queue.c +529 -0
  120. data/ext/macerun/src/queue.h +177 -0
  121. data/ext/macerun/src/sgraph.c +360 -0
  122. data/ext/macerun/src/sgraph.h +174 -0
  123. data/ext/macerun/src/stdlib2.c +993 -0
  124. data/ext/macerun/src/stdlib2.h +811 -0
  125. data/ext/macerun/src/vec.c +634 -0
  126. data/ext/macerun/src/vec.h +170 -0
  127. data/ext/sspcrun/extconf.rb +20 -0
  128. data/ext/sspcrun/src/_sspc.c +358 -0
  129. data/ext/sspcrun/src/aheap.c +545 -0
  130. data/ext/sspcrun/src/aheap.h +251 -0
  131. data/ext/sspcrun/src/base.c +92 -0
  132. data/ext/sspcrun/src/base.h +59 -0
  133. data/ext/sspcrun/src/fstar.c +496 -0
  134. data/ext/sspcrun/src/fstar.h +80 -0
  135. data/ext/sspcrun/src/grhfil.c +213 -0
  136. data/ext/sspcrun/src/itemset.c +713 -0
  137. data/ext/sspcrun/src/itemset.h +170 -0
  138. data/ext/sspcrun/src/problem.c +415 -0
  139. data/ext/sspcrun/src/problem.h +179 -0
  140. data/ext/sspcrun/src/queue.c +533 -0
  141. data/ext/sspcrun/src/queue.h +182 -0
  142. data/ext/sspcrun/src/sample.c +19 -0
  143. data/ext/sspcrun/src/sspc.c +598 -0
  144. data/ext/sspcrun/src/sspc2.c +491 -0
  145. data/ext/sspcrun/src/stdlib2.c +1482 -0
  146. data/ext/sspcrun/src/stdlib2.h +892 -0
  147. data/ext/sspcrun/src/trsact.c +817 -0
  148. data/ext/sspcrun/src/trsact.h +160 -0
  149. data/ext/sspcrun/src/vec.c +745 -0
  150. data/ext/sspcrun/src/vec.h +172 -0
  151. data/ext/sspcrun/sspcrun.cpp +54 -0
  152. data/lib/nysol/enumLcmEp.rb +338 -0
  153. data/lib/nysol/enumLcmEsp.rb +284 -0
  154. data/lib/nysol/enumLcmIs.rb +275 -0
  155. data/lib/nysol/enumLcmSeq.rb +143 -0
  156. data/lib/nysol/items.rb +201 -0
  157. data/lib/nysol/seqDB.rb +256 -0
  158. data/lib/nysol/take.rb +39 -0
  159. data/lib/nysol/taxonomy.rb +113 -0
  160. data/lib/nysol/traDB.rb +257 -0
  161. metadata +239 -0
@@ -0,0 +1,491 @@
1
+ /* SSPC: Similar Set Pair Comparison */
2
+ /* 2007/11/30 Takeaki Uno, e-mail:uno@nii.jp,
3
+ homepage: http://research.nii.ac.jp/~uno/index.html */
4
+ /* This program is available for only academic use, basically.
5
+ Anyone can modify this program, but he/she has to write down
6
+ the change of the modification on the top of the source code.
7
+ Neither contact nor appointment to Takeaki Uno is needed.
8
+ If one wants to re-distribute this code, do not forget to
9
+ refer the newest code, and show the link to homepage of
10
+ Takeaki Uno, to notify the news about this code for the users.
11
+ For the commercial use, please make a contact to Takeaki Uno. */
12
+
13
+ #ifndef _sspc_c_
14
+ #define _sspc_c_
15
+
16
+ #define WEIGHT_DOUBLE
17
+
18
+ #include"trsact.c"
19
+ #include"problem.c"
20
+
21
+ #define SSPC_INCLUSION 1
22
+ #define SSPC_SIMILARITY 2
23
+ #define SSPC_INTERSECTION 4
24
+ #define SSPC_RESEMBLANCE 8
25
+ #define SSPC_INNERPRODUCT 16
26
+ #define SSPC_MININT 32
27
+ #define SSPC_MAXINT 64
28
+ #define SSPC_PMI 128
29
+ #define SSPC_COUNT 2048
30
+ #define SSPC_MATRIX 4096
31
+ #define SSPC_UNIFY 8192
32
+ #define SSPC_NO_NEIB 16384
33
+ #define SSPC_POLISH 32768
34
+
35
+
36
+ void SSPC_error (){
37
+ ERROR_MES = "command explanation";
38
+ print_err ("SSPC: [ISCfQq] [options] input-filename ratio/threshold [output-filename]\n\
39
+ %%:show progress, _:no message, +:write solutions in append mode\n\
40
+ #:count the number of similar records for each record\n\
41
+ i(inclusion): find pairs [ratio] of items (weighted sum) of one is included in the other (1st is included in 2nd)\n\
42
+ I(both-inclusion): find pairs s.t. the size (weight sum) of intersection is [ratio] of both\n\
43
+ S:set similarity measure to |A\\cap B| / max{|A|,|B|}\n\
44
+ s:set similarity measure to |A\\cap B| / min{|A|,|B|}\n\
45
+ T(intersection): find pairs having common [threshld] items\n\
46
+ R(resemblance): find pairs s.t. |A\\capB|/|A\\cupB| >= [threshld]\n\
47
+ P(PMI): find pairs s.t. log (|A\\capB|*|all| / (|A|*|B|)) >= [threshld]\n\
48
+ C(cosign distance): find pairs s.t. inner product of their normalized vectors >= [threshld]\n\
49
+ f,Q:output ratio/size of pairs following/preceding to the pairs\n\
50
+ D:the first entry is ID, and unify the records with the same ID\n\
51
+ N:normalize the ID of latter sets, in -c mode\n\
52
+ n:do not consider a and b in the set when comparing a and b\n\
53
+ Y:output elements of each set that contribute to no similarity\n\
54
+ t:transpose the database so that i-th transaction will be item i\n\
55
+ [options]\n\
56
+ -2 [num]:2nd input file name\n\
57
+ -K [num]:output [num] pairs of most large intersections\n\
58
+ -w [filename]:read item weights from [filename]\n\
59
+ -l,-u [num]:ignore transactions with size (weight sum) less/more than [num]\n\
60
+ -L,-U [num]: ignore items appearing less/more than [num]\n\
61
+ -c [num]:compare transactions of IDs less than num and the others (if 0 is given, automatically set to the boundary of the 1st and 2nd file)\n\
62
+ -b [num]:ignore pairs having no common item of at least [num]th frequency\n\
63
+ -B [num]:ignore pairs having no common item of frequency at most [num]\n\
64
+ -T [num]:ignore pairs whose intersection size is less than [num]\n\
65
+ -# [num]:stop after outputting [num] solutions\n\
66
+ -, [char]:give the separator of the numbers in the output\n\
67
+ -Q [filename]:replace the output numbers according to the permutation table given by [filename]\n\
68
+ # the 1st letter of input-filename cannot be '-'.\n\
69
+ # if the output file name is -, the solutions will be output to standard output.\n");
70
+ EXIT;
71
+ //items have to begin from 1\n");
72
+ }
73
+
74
+ // c:multi stream transaction mode, separated by an empty transaction
75
+
76
+ /***********************************************************************/
77
+ /* read parameters given by command line */
78
+ /***********************************************************************/
79
+ void SSPC_read_param (int argc, char *argv[], PROBLEM *PP){
80
+ int c=1;
81
+ ITEMSET *II = &PP->II;
82
+ TRSACT *TT = &PP->TT;
83
+ PP->th = 0;
84
+
85
+ if ( argc < c+3 ){ SSPC_error (); return; }
86
+
87
+ if ( !strchr (argv[c], '_') ){ II->flag |= SHOW_MESSAGE; TT->flag |= SHOW_MESSAGE; }
88
+ if ( strchr (argv[c], '%') ) II->flag |= SHOW_PROGRESS;
89
+ if ( strchr (argv[c], '+') ) II->flag |= ITEMSET_APPEND;
90
+ if ( strchr (argv[c], 'f') ) II->flag |= ITEMSET_FREQ;
91
+ if ( strchr (argv[c], 'Q') ) II->flag |= ITEMSET_PRE_FREQ;
92
+ if ( strchr (argv[c], 'M') ) PP->problem |= SSPC_MATRIX;
93
+ if ( strchr (argv[c], 'i') ) PP->problem = SSPC_INCLUSION;
94
+ else if ( strchr (argv[c], 'I') ) PP->problem = SSPC_SIMILARITY;
95
+ else if ( strchr (argv[c], 'T') ) PP->problem = SSPC_INTERSECTION;
96
+ else if ( strchr (argv[c], 's') ) PP->problem = SSPC_MININT;
97
+ else if ( strchr (argv[c], 'S') ) PP->problem = SSPC_MAXINT;
98
+ else if ( strchr (argv[c], 'R') ) PP->problem = SSPC_RESEMBLANCE;
99
+ else if ( strchr (argv[c], 'P') ) PP->problem = SSPC_PMI;
100
+ else if ( strchr (argv[c], 'C') ) PP->problem = SSPC_INNERPRODUCT;
101
+ else error ("i, I, s, S, R, T or C command has to be specified", EXIT);
102
+ if ( strchr (argv[c], '#') ) PP->problem |= SSPC_COUNT;
103
+ if ( strchr (argv[c], 'N') ) PP->problem |= PROBLEM_NORMALIZE;
104
+ if ( strchr (argv[c], 'D') ) PP->problem |= SSPC_UNIFY;
105
+ if ( strchr (argv[c], 'n') ) PP->problem |= SSPC_NO_NEIB;
106
+ if ( strchr (argv[c], 'Y') ) PP->problem |= SSPC_POLISH;
107
+ if ( !strchr (argv[c], 't') ) TT->flag |= LOAD_TPOSE;
108
+ c++;
109
+
110
+ while ( argv[c][0] == '-' ){
111
+ switch (argv[c][1]){
112
+ case 'K': II->topk.end = atoi(argv[c+1]);
113
+ break; case 'L': TT->row_lb = atoi(argv[c+1]);
114
+ break; case 'U': TT->row_ub = atoi(argv[c+1]);
115
+ break; case 'l': TT->w_lb = atof(argv[c+1]);
116
+ break; case 'u': TT->w_ub = atof(argv[c+1]);
117
+ break; case 'w': PP->TT.wfname = argv[c+1];
118
+ break; case 'c': PP->dir = 1; TT->sep = atoi(argv[c+1]);
119
+ break; case '2': PP->TT.fname2 = argv[c+1];
120
+ break; case 'b': PP->II.len_lb = atoi(argv[c+1]);
121
+ break; case 'B': PP->II.len_ub = atoi(argv[c+1]);
122
+ break; case 'T': PP->th = atoi(argv[c+1]);
123
+ break; case '#': II->max_solutions = atoi(argv[c+1]);
124
+ break; case ',': II->separator = argv[c+1][0];
125
+ break; case 'Q': PP->outperm_fname = argv[c+1];
126
+ break; default: goto NEXT;
127
+ }
128
+ c += 2;
129
+ if ( argc<c+2 ){ SSPC_error (); return; }
130
+ }
131
+
132
+ NEXT:;
133
+ if ( PP->problem & SSPC_MATRIX ) PP->MM.fname = argv[c];
134
+ else PP->TT.fname = argv[c];
135
+ if ( II->topk.end==0 ) II->frq_lb = atof(argv[c+1]);
136
+ if ( argc>c+2 ) PP->output_fname = argv[c+2];
137
+ }
138
+
139
+ void SSPC_output (PROBLEM *PP, QUEUE_INT *cnt, QUEUE_INT i){
140
+ QUEUE_INT *x;
141
+ if ( PP->problem & SSPC_POLISH ){
142
+ MQUE_FLOOP (PP->OQ[i], x) PP->vecchr[*x] = 1;
143
+ } else if ( PP->problem & SSPC_COUNT ) (*cnt)++;
144
+ else ITEMSET_output_itemset (&PP->II, NULL, 0);
145
+ }
146
+
147
+ /*************************************************************************/
148
+ /* SSPC main routine */
149
+ /*************************************************************************/
150
+ void SSPC (PROBLEM *PP){
151
+ ITEMSET *II = &PP->II;
152
+ TRSACT *TT = &PP->TT;
153
+ QUEUE J = INIT_QUEUE;
154
+ QUEUE_ID i, j, begin = (PP->problem&SSPC_POLISH)?0:(PP->dir>0?TT->sep:1), f, ii=0, m, t;
155
+ QUEUE_INT *x, **o = NULL, *oi, *oj, cnt, id;
156
+ WEIGHT *w, f1, f2, c, cc, *y;
157
+ char *u = NULL, *mark = NULL;
158
+ int pf = TT->flag&TRSACT_NEGATIVE;
159
+ double sq =0;
160
+ int count=0, fs=SSPC_INTERSECTION +SSPC_RESEMBLANCE +SSPC_INNERPRODUCT +SSPC_MAXINT +SSPC_MININT + SSPC_PMI;
161
+
162
+ // initialization
163
+ calloc2 (w, TT->T.clms*2, EXIT);
164
+ if ( PP->problem & SSPC_NO_NEIB ) calloc2 (mark, TT->T.clms, EXIT);
165
+ if ( PP->problem & SSPC_INNERPRODUCT ) FLOOP (i, 0, TT->T.clms) TT->w[i] *= TT->w[i];
166
+ TRSACT_delivery (TT, &TT->jump, w, w+TT->T.clms, NULL, TT->T.clms);
167
+ // FLOOP (i, 0, PP->dir?TT->sep:1) TT->OQ[i].end = 0;
168
+ FLOOP (i, 0, TT->T.clms) TT->OQ[i].end = 0;
169
+ II->itemset.t = 2;
170
+
171
+ // skipping items of large frequencies
172
+ if ( TT->flag & LOAD_SIZSORT ){
173
+ malloc2 (o, TT->T.clms, EXIT);
174
+ FLOOP (i, 0, TT->T.clms){
175
+ o[i] = TT->OQ[i].v;
176
+ TT->OQ[i].v[TT->OQ[i].t] = INTHUGE; // put end-mark at the last; also used in main loop
177
+ for (j=0 ; TT->OQ[i].v[j] < PP->II.len_lb ; j++);
178
+ TT->OQ[i].v = &TT->OQ[i].v[j]; TT->OQ[i].t -= j;
179
+ }
180
+ }
181
+
182
+ if (PP->problem & SSPC_UNIFY){
183
+ FLOOP (i, 0, TT->T.t){ // first entry is ID
184
+ id = TT->T.v[i].v[0];
185
+ FLOOP (ii, 0, TT->T.v[i].t-1) TT->T.v[i].v[ii] = TT->T.v[i].v[ii+1];
186
+ TT->T.v[i].t--;
187
+ TT->T.v[i].v[TT->T.v[i].t] = id;
188
+ }
189
+ QUEUE_alloc (&J, TT->T.clms);
190
+ calloc2 (u, TT->T.clms, EXIT);
191
+ ARY_FILL (w, 0, TT->T.clms*2, 0); // weight is to be re-computed
192
+ id = TT->T.t; J.t = J.s = 0;
193
+ FLOOP (i, 0, TT->T.t){
194
+ if ( id != TT->T.v[i].v[TT->T.v[i].t] ){
195
+ id = TT->T.v[i].v[TT->T.v[i].t];
196
+ MQUE_FLOOP (J, x) u[*x] = 0;
197
+ J.t = J.s = 0;
198
+ }
199
+
200
+ MQUE_FLOOP (TT->T.v[i], x){
201
+ if ( u[*x] == 0 ){ QUE_INS (J, *x); u[*x] = 1; }
202
+ else continue;
203
+ w[*x] += TT->w[t]; w[*x+TT->T.clms] += TT->pw[t];
204
+ }
205
+ }
206
+ MQUE_FLOOP (J, x){ u[*x] = 0; }
207
+ J.t = J.s = 0;
208
+ }
209
+
210
+ // main loop
211
+ FLOOP (i, begin, TT->T.clms){
212
+ //printf ("## %d\n", i);
213
+ if ( II->flag & SHOW_PROGRESS ){
214
+ if ( count < i*100/TT->T.clms ){ count++; fprintf (stderr, "%d%%\n", count); }
215
+ }
216
+ cnt = 0;
217
+ II->itemset.v[0] = ((PP->problem&PROBLEM_NORMALIZE)&& PP->dir>0)? i-TT->sep: i;
218
+ if ( PP->problem & SSPC_INNERPRODUCT ) sq = sqrt (w[i]);
219
+
220
+ // delivery
221
+ if ( PP->problem & SSPC_POLISH ) m = PP->TT.T.clms;
222
+ else m = (PP->dir>0)?TT->sep:i;
223
+ TT->jump.t = TT->jump.s;
224
+ if (PP->problem & SSPC_UNIFY) id = TT->T.v[t].v[TT->T.v[TT->OQ[i].s].t];
225
+ FLOOP (ii, TT->OQ[i].s, TT->OQ[i].t){
226
+ t = TT->OQ[i].v[ii];
227
+ if (PP->problem & SSPC_NO_NEIB){ mark[t] = 1; } // for no_neib
228
+ if ( TT->T.w ) y = TT->T.w[t]; else y = 0;
229
+ if (PP->problem & SSPC_UNIFY){ // for unify
230
+ if ( id != TT->T.v[t].v[TT->T.v[t].t] ){
231
+ id = TT->T.v[t].v[TT->T.v[t].t];
232
+ if ( t != TT->OQ[i].s ){
233
+ MQUE_FLOOP (J, x) u[*x] = 0;
234
+ J.t = J.s = 0;
235
+ }
236
+ }
237
+ } // unify end
238
+
239
+ MQUE_MLOOP (TT->T.v[t], x, m){
240
+ if ( TT->OQ[*x].end == 0 ){
241
+ QUE_INS (TT->jump, *x);
242
+ PP->occ_w[*x] = 0;
243
+ if ( pf ) PP->occ_pw[*x] = 0;
244
+ }
245
+ if (PP->problem & SSPC_UNIFY){
246
+ if ( u[*x] == 0 ){ QUE_INS (J, *x); u[*x] = 1; }
247
+ else continue;
248
+ }
249
+ TT->OQ[*x].end++;
250
+ if ( y ){
251
+ PP->occ_w[*x] += *y; if ( *y>0 && pf) PP->occ_pw[*x] += *y;
252
+ y++;
253
+ } else {
254
+ PP->occ_w[*x] += TT->w[t]; if ( pf ) PP->occ_pw[*x] += TT->pw[t];
255
+ }
256
+ if ( PP->problem & SSPC_POLISH ) QUE_INS (PP->OQ[*x], t); // make occ for data polish
257
+ }
258
+ }
259
+
260
+ if (PP->problem & SSPC_UNIFY){ // for unify
261
+ MQUE_FLOOP (J, x) u[*x] = 0;
262
+ J.t = J.s = 0;
263
+ } // unify end
264
+
265
+ MQUE_FLOOP (TT->jump, x){
266
+ if ( *x == i ) goto SKIP;
267
+ II->itemset.v[1] = *x;
268
+ c = PP->occ_w[*x];
269
+
270
+ if ( TT->flag & LOAD_SIZSORT ){
271
+ for (oi=o[i],oj=o[*x] ; *oi<PP->II.len_lb ; oi++ ){
272
+ while ( *oj < *oi ) oj++;
273
+ if ( *oi == *oj ) c += TT->w[*oi];
274
+ }
275
+ }
276
+ if (PP->problem & SSPC_NO_NEIB){ // for no_neib
277
+ if ( mark[*x] ){ w[*x] -= TT->w[i]; w[i] -= TT->w[*x]; }
278
+ }
279
+ if ( c < PP->th ) goto SKIP; // threshold for the intersection size
280
+ if ( PP->problem & fs ){
281
+ if ( PP->problem & SSPC_INTERSECTION ) II->frq = c;
282
+ else if ( PP->problem & SSPC_INNERPRODUCT ) II->frq = c / sq / sqrt(w[*x]);
283
+ else if ( (PP->problem & SSPC_RESEMBLANCE) && (cc= w[i] +w[*x] -c) != 0 ) II->frq = c/cc;
284
+ else if ( (PP->problem & SSPC_MAXINT) && (cc=MAX(w[i],w[*x])) != 0 ) II->frq = c/cc;
285
+ else if ( (PP->problem & SSPC_MININT) && (cc=MIN(w[i],w[*x])) != 0 ) II->frq = c/cc;
286
+ else if ( (PP->problem & SSPC_PMI) && (cc=w[i]*w[*x]) != 0 ) II->frq = log( ( c * TT->T.t) / cc ) / -log ( c / TT->T.t);
287
+ else continue;
288
+ if ( II->frq >= II->frq_lb ) SSPC_output (PP, &cnt, *x);
289
+ } else {
290
+ f1 = w[i]*II->frq_lb; f2 = w[*x]*II->frq_lb; // size of i and *x
291
+ if ( PP->problem & SSPC_SIMILARITY ){
292
+ f = ( (c >= f1) && (c >= f2) );
293
+ II->frq = MIN(c/w[i], c/w[*x]);
294
+ } else if ( PP->problem & SSPC_INCLUSION ){
295
+ if ( c >= f2 ){
296
+ II->frq = c/w[*x];
297
+ II->itemset.v[0] = *x; II->itemset.v[1] = i-PP->root;
298
+ SSPC_output (PP, &cnt, *x);
299
+ II->itemset.v[0] = i-PP->root; II->itemset.v[1] = *x;
300
+ }
301
+ f = ( c >= f1 );
302
+ II->frq = c/w[i];
303
+ } else continue;
304
+ if ( f ) SSPC_output (PP, &cnt, *x);
305
+ }
306
+ SKIP:;
307
+ if (PP->problem & SSPC_NO_NEIB){ // for no_neib
308
+ if ( mark[*x] ){ w[*x] += TT->w[i]; w[i] += TT->w[*x]; }
309
+ }
310
+ TT->OQ[*x].end = 0;
311
+ }
312
+
313
+ if ( PP->problem & SSPC_POLISH ){ // data polish; clear OQ, and marks
314
+ f = 0;
315
+ MQUE_FLOOP (PP->TT.OQ[i], x){
316
+ II->frq = DOUBLEHUGE;
317
+ if ( PP->vecchr[*x] == 1 ){
318
+ FILE2_print_int (&PP->II.multi_fp[0], *x, f);
319
+ f = PP->II.separator;
320
+
321
+ // if ( PP->vecchr[*x] == 0 ){
322
+ // II->itemset.v[0] = i; II->itemset.v[1] = *x;
323
+ // ITEMSET_output_itemset (&PP->II, NULL, 0);
324
+ // QUEUE_print__ (&PP->TT.OQ[i]);
325
+ // QUEUE_print__ (&PP->TT.T.v[*x]);
326
+ }
327
+ PP->vecchr[*x] = 0;
328
+ }
329
+ FILE2_putc (&PP->II.multi_fp[0], '\n');
330
+ FILE2_flush (&PP->II.multi_fp[0]);
331
+ MQUE_FLOOP (TT->jump, x) PP->OQ[*x].t = 0;
332
+ }
333
+
334
+ if (PP->problem & SSPC_NO_NEIB) // for no_neib
335
+ FLOOP (ii, TT->OQ[i].s, TT->OQ[i].t){ mark[TT->OQ[i].v[ii]] = 0; }
336
+ TT->OQ[i].end = 0;
337
+ if ( PP->problem & SSPC_COUNT ){
338
+ while ( ii<II->perm[i] ){
339
+ FILE2_putc (&II->multi_fp[0], '\n');
340
+ FILE2_flush (&II->multi_fp[0]);
341
+ ii++;
342
+ }
343
+ FILE2_print_int (&II->multi_fp[0], cnt, 0);
344
+ FILE2_putc (&II->multi_fp[0], '\n');
345
+ FILE2_flush (&II->multi_fp[0]);
346
+ II->sc[2] += cnt;
347
+ ii++;
348
+ }
349
+ }
350
+
351
+ // termination
352
+ if ( TT->flag & LOAD_SIZSORT ){
353
+ FLOOP (i, 0, TT->T.clms){
354
+ TT->OQ[i].t += TT->OQ[i].v - o[i];
355
+ TT->OQ[i].v = o[i];
356
+ }
357
+ }
358
+ mfree (w, o, u, mark);
359
+ QUEUE_end (&J);
360
+ }
361
+
362
+
363
+ /*************************************************************************/
364
+ /* SSPC matrix version */
365
+ /*************************************************************************/
366
+ void SSPCmat (PROBLEM *PP){
367
+ ITEMSET *II = &PP->II;
368
+ MAT *MM = &PP->MM;
369
+ QUEUE_ID i, j, x, begin = PP->dir>0?0:1, f, ii=0;
370
+ QUEUE_INT cnt;
371
+ WEIGHT *w, f1, f2, c, cc;
372
+ double sq =0;
373
+ int fs = SSPC_INTERSECTION +SSPC_RESEMBLANCE +SSPC_INNERPRODUCT+SSPC_MAXINT+SSPC_MININT;
374
+
375
+ II->frq_lb = II->frq_lb * II->frq_lb;
376
+
377
+ // initialization
378
+ // calloc2 (w, MM->t, EXIT);
379
+ // if ( PP->problem & SSPC_INNERPRODUCT ) FLOOP (i, 0, MM->clms) TT->w[i] *= TT->w[i];
380
+ // TRSACT_delivery (TT, &TT->jump, w, w+TT->T.clms, NULL, TT->T.clms);
381
+ // FLOOP (i, 0, PP.TT.T.clms) TT->OQ[i].end = 0;
382
+ II->itemset.t = 2;
383
+
384
+ // skipping items of large frequencies
385
+ // if ( TT->flag & LOAD_SIZSORT ){
386
+ // malloc2 (o, TT->T.clms, EXIT);
387
+ // FLOOP (i, 0, TT->T.clms){
388
+ // o[i] = TT->OQ[i].v;
389
+ // TT->OQ[i].v[TT->OQ[i].t] = INTHUGE; // put end-mark at the last; also used in main loop
390
+ // for ( j=0 ; TT->OQ[i].v[j] < PP->II.len_lb ; j++ );
391
+ // TT->OQ[i].v = &TT->OQ[i].v[j]; TT->OQ[i].t -= j;
392
+ // }
393
+ // }
394
+
395
+ // main loop
396
+ FLOOP (i, begin, MM->t){
397
+ cnt = 0;
398
+ II->itemset.v[0] = ((PP->problem&PROBLEM_NORMALIZE)&& PP->dir>0)? i-MM->clms: i; // i-TT->sep
399
+ if ( PP->problem || 1 ){
400
+ PP->occ_w[i] = 0;
401
+ FLOOP (x, 0, MM->clms) PP->occ_w[i] += MM->v[i].v[x] * MM->v[i].v[x];
402
+ }
403
+
404
+ FLOOP (j, 0, PP->dir>0?begin:i){
405
+ II->itemset.v[1] = j;
406
+ f = 0; sq = 0;
407
+ FLOOP (x, 0, MM->clms) sq += MM->v[i].v[x] * MM->v[j].v[x];
408
+ if ( sq / PP->occ_w[i] / PP->occ_w[j] > II->frq_lb ) f = 1;
409
+
410
+ if ( f ){
411
+ if ( PP->problem & SSPC_COUNT ) cnt++;
412
+ else ITEMSET_output_itemset (II, NULL, 0);
413
+ }
414
+ }
415
+ if ( PP->problem & SSPC_COUNT ){
416
+ while ( ii<II->perm[i] ){
417
+ FILE2_putc (&II->multi_fp[0], '\n');
418
+ FILE2_flush (&II->multi_fp[0]);
419
+ ii++;
420
+ }
421
+ FILE2_print_int (&II->multi_fp[0], cnt, 0);
422
+ FILE2_putc (&II->multi_fp[0], '\n');
423
+ FILE2_flush (&II->multi_fp[0]);
424
+ II->sc[2] += cnt;
425
+ ii++;
426
+ }
427
+ }
428
+
429
+ // termination
430
+ // mfree (w, o);
431
+ }
432
+
433
+
434
+
435
+ /*************************************************************************/
436
+ /* main function of SSPC */
437
+ /*************************************************************************/
438
+ int SSPC_main (int argc, char *argv[]){
439
+ PROBLEM PP;
440
+ SETFAMILY *T = &PP.TT.T;
441
+ QUEUE_ID i;
442
+
443
+ PROBLEM_init (&PP);
444
+ SSPC_read_param (argc, argv, &PP);
445
+ if ( ERROR_MES ) return (1);
446
+
447
+ PP.TT.flag |= LOAD_INCSORT + TRSACT_ALLOC_OCC;
448
+ if ( PP.II.len_ub<INTHUGE || PP.II.len_lb>0 ) PP.TT.flag |= LOAD_SIZSORT+LOAD_DECROWSORT;
449
+ PROBLEM_load (&PP);
450
+ if ( PP.II.len_ub < INTHUGE ){
451
+ FLOOP (i, 0, PP.TT.T.t) if ( PP.TT.T.v[i].t <= PP.II.len_ub ){ PP.II.len_lb = i; break; }
452
+ }
453
+ PROBLEM_alloc (&PP, T->clms, T->t, 0, PP.TT.perm, PROBLEM_OCC_W +PROBLEM_OCC_T +PROBLEM_VECCHR);
454
+ PP.TT.perm = NULL;
455
+ realloc2 (PP.TT.w, MAX(T->t, T->clms)+1, EXIT);
456
+ ARY_FILL (PP.TT.w, 0, MAX(T->t, T->clms)+1, 1);
457
+
458
+ // if ( PP.problem & SSPC_PMI ){ PP.II.frq_lb = exp (PP.II.frq_lb) / T->t; } // shift the threshold
459
+
460
+ // delivery
461
+ //TRSACT_print (&PP.TT, NULL, PP.II.perm);
462
+ print_mes (&PP.TT, "separated at %d\n", PP.TT.sep);
463
+ QUEUE_delivery (PP.TT.OQ, NULL, NULL, T->v, &PP.TT.OQ[T->clms], T->t, T->clms);
464
+ if ( PP.problem & SSPC_POLISH ){
465
+ FLOOP (i, 0, PP.TT.T.clms) PP.occ_t[i] = PP.TT.OQ[i].t;
466
+ MQUE_ALLOC (PP.OQ, PP.TT.T.clms, PP.occ_t, PP.TT.occ_unit, 1, EXIT);
467
+ }
468
+
469
+ if ( !ERROR_MES && PP.TT.T.clms>1 ){
470
+ if ( PP.problem & SSPC_MATRIX ){ SSPCmat (&PP); }
471
+ else SSPC (&PP);
472
+ print_mes (&PP.TT, LONGF " pairs are found\n", PP.II.sc[2]);
473
+ }
474
+
475
+ ITEMSET_merge_counters (&PP.II);
476
+ internal_params.l1 = PP.II.solutions;
477
+
478
+ // PROBLEM_end (&PP);
479
+ return (ERROR_MES?1:0);
480
+ }
481
+
482
+ /*******************************************************************************/
483
+ #ifndef _NO_MAIN_
484
+ #define _NO_MAIN_
485
+ int main (int argc, char *argv[]){
486
+ return (SSPC_main (argc, argv) );
487
+ }
488
+ #endif
489
+ /*******************************************************************************/
490
+
491
+ #endif