nysol-take 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/mbiclique.rb +317 -0
- data/bin/mbipolish.rb +362 -0
- data/bin/mccomp.rb +235 -0
- data/bin/mclique.rb +295 -0
- data/bin/mclique2g.rb +105 -0
- data/bin/mcliqueInfo.rb +203 -0
- data/bin/mfriends.rb +202 -0
- data/bin/mgdiff.rb +252 -0
- data/bin/mhifriend.rb +456 -0
- data/bin/mhipolish.rb +465 -0
- data/bin/mitemset.rb +168 -0
- data/bin/mpal.rb +410 -0
- data/bin/mpolishing.rb +399 -0
- data/bin/msequence.rb +165 -0
- data/bin/mtra2g.rb +476 -0
- data/bin/mtra2gc.rb +360 -0
- data/ext/grhfilrun/extconf.rb +12 -0
- data/ext/grhfilrun/grhfilrun.c +85 -0
- data/ext/grhfilrun/src/_sspc.c +358 -0
- data/ext/grhfilrun/src/aheap.c +545 -0
- data/ext/grhfilrun/src/aheap.h +251 -0
- data/ext/grhfilrun/src/base.c +92 -0
- data/ext/grhfilrun/src/base.h +59 -0
- data/ext/grhfilrun/src/fstar.c +497 -0
- data/ext/grhfilrun/src/fstar.h +80 -0
- data/ext/grhfilrun/src/grhfil.c +214 -0
- data/ext/grhfilrun/src/itemset.c +713 -0
- data/ext/grhfilrun/src/itemset.h +170 -0
- data/ext/grhfilrun/src/problem.c +415 -0
- data/ext/grhfilrun/src/problem.h +179 -0
- data/ext/grhfilrun/src/queue.c +533 -0
- data/ext/grhfilrun/src/queue.h +182 -0
- data/ext/grhfilrun/src/sample.c +19 -0
- data/ext/grhfilrun/src/sspc.c +597 -0
- data/ext/grhfilrun/src/sspc2.c +491 -0
- data/ext/grhfilrun/src/stdlib2.c +1482 -0
- data/ext/grhfilrun/src/stdlib2.h +892 -0
- data/ext/grhfilrun/src/trsact.c +817 -0
- data/ext/grhfilrun/src/trsact.h +160 -0
- data/ext/grhfilrun/src/vec.c +745 -0
- data/ext/grhfilrun/src/vec.h +172 -0
- data/ext/lcmrun/extconf.rb +20 -0
- data/ext/lcmrun/lcmrun.cpp +99 -0
- data/ext/lcmrun/src/aheap.c +216 -0
- data/ext/lcmrun/src/aheap.h +111 -0
- data/ext/lcmrun/src/base.c +92 -0
- data/ext/lcmrun/src/base.h +59 -0
- data/ext/lcmrun/src/itemset.c +496 -0
- data/ext/lcmrun/src/itemset.h +157 -0
- data/ext/lcmrun/src/lcm.c +427 -0
- data/ext/lcmrun/src/problem.c +349 -0
- data/ext/lcmrun/src/problem.h +177 -0
- data/ext/lcmrun/src/queue.c +528 -0
- data/ext/lcmrun/src/queue.h +176 -0
- data/ext/lcmrun/src/sgraph.c +359 -0
- data/ext/lcmrun/src/sgraph.h +173 -0
- data/ext/lcmrun/src/stdlib2.c +1282 -0
- data/ext/lcmrun/src/stdlib2.h +823 -0
- data/ext/lcmrun/src/trsact.c +747 -0
- data/ext/lcmrun/src/trsact.h +159 -0
- data/ext/lcmrun/src/vec.c +731 -0
- data/ext/lcmrun/src/vec.h +171 -0
- data/ext/lcmseq0run/extconf.rb +20 -0
- data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
- data/ext/lcmseq0run/src/aheap.c +216 -0
- data/ext/lcmseq0run/src/aheap.h +111 -0
- data/ext/lcmseq0run/src/base.c +92 -0
- data/ext/lcmseq0run/src/base.h +59 -0
- data/ext/lcmseq0run/src/itemset.c +518 -0
- data/ext/lcmseq0run/src/itemset.h +157 -0
- data/ext/lcmseq0run/src/itemset_zero.c +522 -0
- data/ext/lcmseq0run/src/lcm_seq.c +446 -0
- data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
- data/ext/lcmseq0run/src/problem.c +439 -0
- data/ext/lcmseq0run/src/problem.h +179 -0
- data/ext/lcmseq0run/src/problem_zero.c +439 -0
- data/ext/lcmseq0run/src/queue.c +533 -0
- data/ext/lcmseq0run/src/queue.h +182 -0
- data/ext/lcmseq0run/src/stdlib2.c +1350 -0
- data/ext/lcmseq0run/src/stdlib2.h +864 -0
- data/ext/lcmseq0run/src/trsact.c +747 -0
- data/ext/lcmseq0run/src/trsact.h +159 -0
- data/ext/lcmseq0run/src/vec.c +779 -0
- data/ext/lcmseq0run/src/vec.h +172 -0
- data/ext/lcmseqrun/extconf.rb +20 -0
- data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
- data/ext/lcmseqrun/src/aheap.c +216 -0
- data/ext/lcmseqrun/src/aheap.h +111 -0
- data/ext/lcmseqrun/src/base.c +92 -0
- data/ext/lcmseqrun/src/base.h +59 -0
- data/ext/lcmseqrun/src/itemset.c +518 -0
- data/ext/lcmseqrun/src/itemset.h +157 -0
- data/ext/lcmseqrun/src/itemset_zero.c +522 -0
- data/ext/lcmseqrun/src/lcm_seq.c +447 -0
- data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
- data/ext/lcmseqrun/src/problem.c +439 -0
- data/ext/lcmseqrun/src/problem.h +179 -0
- data/ext/lcmseqrun/src/problem_zero.c +439 -0
- data/ext/lcmseqrun/src/queue.c +533 -0
- data/ext/lcmseqrun/src/queue.h +182 -0
- data/ext/lcmseqrun/src/stdlib2.c +1350 -0
- data/ext/lcmseqrun/src/stdlib2.h +864 -0
- data/ext/lcmseqrun/src/trsact.c +747 -0
- data/ext/lcmseqrun/src/trsact.h +159 -0
- data/ext/lcmseqrun/src/vec.c +779 -0
- data/ext/lcmseqrun/src/vec.h +172 -0
- data/ext/lcmtransrun/extconf.rb +18 -0
- data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
- data/ext/macerun/extconf.rb +20 -0
- data/ext/macerun/macerun.cpp +57 -0
- data/ext/macerun/src/aheap.c +217 -0
- data/ext/macerun/src/aheap.h +112 -0
- data/ext/macerun/src/itemset.c +491 -0
- data/ext/macerun/src/itemset.h +158 -0
- data/ext/macerun/src/mace.c +503 -0
- data/ext/macerun/src/problem.c +346 -0
- data/ext/macerun/src/problem.h +174 -0
- data/ext/macerun/src/queue.c +529 -0
- data/ext/macerun/src/queue.h +177 -0
- data/ext/macerun/src/sgraph.c +360 -0
- data/ext/macerun/src/sgraph.h +174 -0
- data/ext/macerun/src/stdlib2.c +993 -0
- data/ext/macerun/src/stdlib2.h +811 -0
- data/ext/macerun/src/vec.c +634 -0
- data/ext/macerun/src/vec.h +170 -0
- data/ext/sspcrun/extconf.rb +20 -0
- data/ext/sspcrun/src/_sspc.c +358 -0
- data/ext/sspcrun/src/aheap.c +545 -0
- data/ext/sspcrun/src/aheap.h +251 -0
- data/ext/sspcrun/src/base.c +92 -0
- data/ext/sspcrun/src/base.h +59 -0
- data/ext/sspcrun/src/fstar.c +496 -0
- data/ext/sspcrun/src/fstar.h +80 -0
- data/ext/sspcrun/src/grhfil.c +213 -0
- data/ext/sspcrun/src/itemset.c +713 -0
- data/ext/sspcrun/src/itemset.h +170 -0
- data/ext/sspcrun/src/problem.c +415 -0
- data/ext/sspcrun/src/problem.h +179 -0
- data/ext/sspcrun/src/queue.c +533 -0
- data/ext/sspcrun/src/queue.h +182 -0
- data/ext/sspcrun/src/sample.c +19 -0
- data/ext/sspcrun/src/sspc.c +598 -0
- data/ext/sspcrun/src/sspc2.c +491 -0
- data/ext/sspcrun/src/stdlib2.c +1482 -0
- data/ext/sspcrun/src/stdlib2.h +892 -0
- data/ext/sspcrun/src/trsact.c +817 -0
- data/ext/sspcrun/src/trsact.h +160 -0
- data/ext/sspcrun/src/vec.c +745 -0
- data/ext/sspcrun/src/vec.h +172 -0
- data/ext/sspcrun/sspcrun.cpp +54 -0
- data/lib/nysol/enumLcmEp.rb +338 -0
- data/lib/nysol/enumLcmEsp.rb +284 -0
- data/lib/nysol/enumLcmIs.rb +275 -0
- data/lib/nysol/enumLcmSeq.rb +143 -0
- data/lib/nysol/items.rb +201 -0
- data/lib/nysol/seqDB.rb +256 -0
- data/lib/nysol/take.rb +39 -0
- data/lib/nysol/taxonomy.rb +113 -0
- data/lib/nysol/traDB.rb +257 -0
- metadata +239 -0
@@ -0,0 +1,170 @@
|
|
1
|
+
/* library for sparse vector */
|
2
|
+
/* Takeaki Uno 27/Dec/2008 */
|
3
|
+
|
4
|
+
#ifndef _vec_h_
|
5
|
+
#define _vec_h_
|
6
|
+
|
7
|
+
#define STDLIB2_USE_MATH
|
8
|
+
|
9
|
+
#include"math.h"
|
10
|
+
#include"queue.h"
|
11
|
+
|
12
|
+
#ifndef SVEC_VAL
|
13
|
+
#ifdef SVEC_VAL_INT
|
14
|
+
#define SVEC_VAL int
|
15
|
+
#define SVEC_VAL2 LONG
|
16
|
+
#define SVEC_VAL_END INTHUGE
|
17
|
+
#define SVEC_VAL2_END LONGHUGE
|
18
|
+
#define SVEC_VALF "%d"
|
19
|
+
#else
|
20
|
+
#define SVEC_VAL double
|
21
|
+
#define SVEC_VAL2 double
|
22
|
+
#define SVEC_VAL_END DOUBLEHUGE
|
23
|
+
#define SVEC_VAL2_END DOUBLEHUGE
|
24
|
+
#define SVEC_VALF "%f"
|
25
|
+
#endif
|
26
|
+
#endif
|
27
|
+
|
28
|
+
#define VEC_LOAD_BIN 16777216 // read binary file
|
29
|
+
#define VEC_LOAD_BIN2 33554432 // read binary file with 2byte for each number
|
30
|
+
#define VEC_LOAD_BIN4 67108864 // read binary file with 4byte for each number
|
31
|
+
#define VEC_LOAD_CENTERIZE 134217728 // read binary file, and minus the half(128) from each number
|
32
|
+
#define VEC_NORMALIZE 268435456 // read binary file, and minus the half(128) from each number
|
33
|
+
|
34
|
+
/* matrix */
|
35
|
+
typedef struct {
|
36
|
+
unsigned char type; // mark to identify type of the structure
|
37
|
+
char *fname; // input file name
|
38
|
+
int flag; // flag
|
39
|
+
|
40
|
+
VEC *v;
|
41
|
+
VEC_ID end;
|
42
|
+
VEC_ID t;
|
43
|
+
VEC_VAL *buf, *buf2;
|
44
|
+
VEC_ID clms;
|
45
|
+
size_t eles;
|
46
|
+
} MAT;
|
47
|
+
|
48
|
+
/* sparse vector, element */
|
49
|
+
typedef struct {
|
50
|
+
QUEUE_ID i;
|
51
|
+
SVEC_VAL a;
|
52
|
+
} SVEC_ELE;
|
53
|
+
|
54
|
+
/* sparse vector, vector */
|
55
|
+
typedef struct {
|
56
|
+
unsigned char type; // mark to identify type of the structure
|
57
|
+
SVEC_ELE *v;
|
58
|
+
VEC_ID end;
|
59
|
+
VEC_ID t;
|
60
|
+
} SVEC;
|
61
|
+
|
62
|
+
/* sparse vector, matrix */
|
63
|
+
typedef struct {
|
64
|
+
unsigned char type; // mark to identify type of the structure
|
65
|
+
char *fname; // input file name
|
66
|
+
int flag; // flag
|
67
|
+
|
68
|
+
SVEC *v;
|
69
|
+
VEC_ID end;
|
70
|
+
VEC_ID t;
|
71
|
+
SVEC_ELE *buf, *buf2;
|
72
|
+
VEC_ID clms;
|
73
|
+
size_t eles, ele_end;
|
74
|
+
} SMAT;
|
75
|
+
|
76
|
+
/* set family */
|
77
|
+
typedef struct {
|
78
|
+
unsigned char type; // mark to identify type of the structure
|
79
|
+
char *fname; // input file name
|
80
|
+
int flag; // flag
|
81
|
+
|
82
|
+
QUEUE *v;
|
83
|
+
VEC_ID end;
|
84
|
+
VEC_ID t;
|
85
|
+
QUEUE_INT *buf, *buf2;
|
86
|
+
VEC_ID clms;
|
87
|
+
size_t eles, ele_end;
|
88
|
+
WEIGHT *cw, *rw, **w, *wbuf;
|
89
|
+
int unit;
|
90
|
+
char *wfname, *cwfname, *rwfname; // weight file name
|
91
|
+
} SETFAMILY;
|
92
|
+
|
93
|
+
#define INIT_SETFAMILY_ {TYPE_SETFAMILY,NULL,0,NULL,0,0,NULL,NULL,0,0,0,NULL,NULL,NULL,NULL,sizeof(QUEUE_INT),NULL,NULL,NULL}
|
94
|
+
|
95
|
+
extern MAT INIT_MAT;
|
96
|
+
extern SVEC INIT_SVEC;
|
97
|
+
extern SMAT INIT_SMAT;
|
98
|
+
extern SETFAMILY INIT_SETFAMILY;
|
99
|
+
|
100
|
+
QSORT_TYPE_HEADER (SVEC_VAL, SVEC_VAL)
|
101
|
+
QSORT_TYPE_HEADER (SVEC_VAL2, SVEC_VAL2)
|
102
|
+
|
103
|
+
#define ARY_QUEUE_INPRO(f,U,V) do{(f)=0;FLOOP(common_QUEUE_ID, 0, (QUEUE_ID)(U).t)(f)+=(V)[(U).v[common_QUEUE_ID]];}while(0)
|
104
|
+
#define ARY_SVEC_INPRO(f,U,V) do{(f)=0;FLOOP(common_VEC_ID, 0, (VEC_ID)(U).t)(f)+=((double)(U).v[common_VEC_ID].a)*(V)[(U).v[common_VEC_ID].i];}while(0)
|
105
|
+
|
106
|
+
/* terminate routine for VEC */
|
107
|
+
void VEC_end (VEC *V);
|
108
|
+
void MAT_end (MAT *M);
|
109
|
+
void SVEC_end (SVEC *V);
|
110
|
+
void SMAT_end (SMAT *M);
|
111
|
+
void SETFAMILY_end (SETFAMILY *M);
|
112
|
+
|
113
|
+
/* allocate memory according to rows and rowt */
|
114
|
+
void VEC_alloc (VEC *V, VEC_ID clms);
|
115
|
+
void MAT_alloc (MAT *M, VEC_ID rows, VEC_ID clms);
|
116
|
+
void SVEC_alloc (SVEC *V, VEC_ID end);
|
117
|
+
void SMAT_alloc (SMAT *M, VEC_ID rows, VEC_ID *rowt, VEC_ID clms, size_t eles);
|
118
|
+
void SETFAMILY_alloc (SETFAMILY *M, VEC_ID rows, VEC_ID *rowt, VEC_ID clms, size_t eles);
|
119
|
+
void SETFAMILY_alloc_weight (SETFAMILY *M);
|
120
|
+
|
121
|
+
/* count/read the number in file for MAT */
|
122
|
+
/* if *rows>0, only read count the numbers in a row, for the first scan. */
|
123
|
+
void MAT_load_bin (MAT *M, FILE2 *fp, int unit);
|
124
|
+
void MAT_file_load (MAT *M, FILE2 *fp);
|
125
|
+
void MAT_load (MAT *M);
|
126
|
+
void SMAT_load (SMAT *M);
|
127
|
+
void SETFAMILY_load (SETFAMILY *M);
|
128
|
+
void SETFAMILY_load_weight (SETFAMILY *M);
|
129
|
+
void SETFAMILY_load_row_weight (SETFAMILY *M);
|
130
|
+
void SETFAMILY_load_column_weight (SETFAMILY *M);
|
131
|
+
|
132
|
+
void MAT_print (FILE *fp, MAT *M);
|
133
|
+
void SVEC_print (FILE *fp, SVEC *M);
|
134
|
+
void SMAT_print (FILE *fp, SMAT *M);
|
135
|
+
void SETFAMILY_print (FILE *fp, SETFAMILY *M);
|
136
|
+
void SETFAMILY_print_weight (FILE *fp, SETFAMILY *M);
|
137
|
+
|
138
|
+
|
139
|
+
/* norm, normalization **************************/
|
140
|
+
double SVEC_norm (SVEC *V);
|
141
|
+
void SVEC_normalize (SVEC *V);
|
142
|
+
|
143
|
+
/* inner product **************************/
|
144
|
+
SVEC_VAL2 SVEC_inpro (SVEC *V1, SVEC *V2);
|
145
|
+
|
146
|
+
/** Euclidean distance routines *********************************/
|
147
|
+
double VEC_eucdist (VEC *V1, VEC *V2);
|
148
|
+
double SVEC_eucdist (SVEC *V1, SVEC *V2);
|
149
|
+
double VEC_SVEC_eucdist (VEC *V1, SVEC *V2);
|
150
|
+
double QUEUE_eucdist (QUEUE *Q1, QUEUE *Q2);
|
151
|
+
double VEC_QUEUE_eucdist (VEC *V, QUEUE *Q);
|
152
|
+
|
153
|
+
void VEC_rand_gaussian (VEC *V);
|
154
|
+
|
155
|
+
double VEC_linfdist (VEC *V1, VEC *V2);
|
156
|
+
|
157
|
+
/* compute the inner product, Euclidean distance for multi vector */
|
158
|
+
double MVEC_norm (void *V);
|
159
|
+
double MVEC_inpro (void *V, void *U);
|
160
|
+
double MVEC_double_inpro (void *V, double *p);
|
161
|
+
double MVEC_eucdist (void *V, void *U);
|
162
|
+
|
163
|
+
/* compute the inner product, euclidean distance for i,jth vector */
|
164
|
+
double MMAT_inpro_ij (void *M, int i, int j);
|
165
|
+
double MMAT_double_inpro_i (void *M, int i, double *p);
|
166
|
+
double MMAT_eucdist_ij (void *M, int i, int j);
|
167
|
+
double MMAT_norm_i (void *M, int i);
|
168
|
+
|
169
|
+
|
170
|
+
#endif
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "mkmf"
|
3
|
+
|
4
|
+
unless have_library("kgmod3")
|
5
|
+
puts("need libkgmod.")
|
6
|
+
puts("refer https://github.com/nysol/mcmd")
|
7
|
+
exit 1
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
cp = "$(srcdir)"
|
12
|
+
|
13
|
+
$CFLAGS = " -O3 -Wall -I. -I#{cp}/src -DB_STATIC -D_NO_MAIN_ -DLINE -fPIC -Wno-error=format-security"
|
14
|
+
$CPPFLAGS = " -O3 -Wall -I. -I#{cp}/src -DB_STATIC -D_NO_MAIN_ -DLINE -fPIC -Wno-error=format-security"
|
15
|
+
$CXXFLAGS = " -O3 -Wall -I. -I#{cp}/src -DB_STATIC -D_NO_MAIN_ -DLINE -fPIC -Wno-error=format-security"
|
16
|
+
|
17
|
+
$LOCAL_LIBS += " -lstdc++ -lkgmod3 -lm"
|
18
|
+
|
19
|
+
create_makefile("nysol/sspcrun")
|
20
|
+
|
@@ -0,0 +1,358 @@
|
|
1
|
+
/* SSPC: Similar Set Pair Comparison */
|
2
|
+
/* 2007/11/30 Takeaki Uno, e-mail:uno@nii.jp,
|
3
|
+
homepage: http://research.nii.ac.jp/~uno/index.html */
|
4
|
+
/* This program is available for only academic use, basically.
|
5
|
+
Anyone can modify this program, but he/she has to write down
|
6
|
+
the change of the modification on the top of the source code.
|
7
|
+
Neither contact nor appointment to Takeaki Uno is needed.
|
8
|
+
If one wants to re-distribute this code, do not forget to
|
9
|
+
refer the newest code, and show the link to homepage of
|
10
|
+
Takeaki Uno, to notify the news about this code for the users.
|
11
|
+
For the commercial use, please make a contact to Takeaki Uno. */
|
12
|
+
|
13
|
+
#ifndef _sspc_c_
|
14
|
+
#define _sspc_c_
|
15
|
+
|
16
|
+
#define WEIGHT_DOUBLE
|
17
|
+
|
18
|
+
#include"trsact.c"
|
19
|
+
#include"problem.c"
|
20
|
+
|
21
|
+
#define SSPC_INCLUSION 1
|
22
|
+
#define SSPC_SIMILARITY 2
|
23
|
+
#define SSPC_INTERSECTION 4
|
24
|
+
#define SSPC_RESEMBLANCE 8
|
25
|
+
#define SSPC_INNERPRODUCT 16
|
26
|
+
#define SSPC_MININT 32
|
27
|
+
#define SSPC_MAXINT 64
|
28
|
+
#define SSPC_COUNT 128
|
29
|
+
#define SSPC_MATRIX 256
|
30
|
+
|
31
|
+
|
32
|
+
void SSPC_error (){
|
33
|
+
ERROR_MES = "command explanation";
|
34
|
+
print_err ("SSPC: [ISCfQq] [options] input-filename ratio/threshold [output-filename]\n\
|
35
|
+
%%:show progress, _:no message, +:write solutions in append mode\n\
|
36
|
+
#:count the number of similar records for each record\n\
|
37
|
+
i(inclusion): find pairs [ratio] of items (weighted sum) of one is included in the other (1st is included in 2nd)\n\
|
38
|
+
I(both-inclusion): find pairs s.t. the size (weight sum) of intersection is [ratio] of both\n\
|
39
|
+
S:set similarity measure to |A\\cap B| / max{|A|,|B|}\n\
|
40
|
+
s:set similarity measure to |A\\cap B| / min{|A|,|B|}\n\
|
41
|
+
T(intersection): find pairs having common [threshld] items\n\
|
42
|
+
R(resemblance): find pairs s.t. |A\\capB|/|A\\cupB| >= [threshld]\n\
|
43
|
+
C(cosign distance): find pairs s.t. inner product of their normalized vectors >= [threshld]\n\
|
44
|
+
f,Q:output ratio/size of pairs following/preceding to the pairs\n\
|
45
|
+
N:normalize the ID of latter sets, in -c mode\n\
|
46
|
+
t:transpose the database so that i-th transaction will be item i\n\
|
47
|
+
[options]\n\
|
48
|
+
-2 [num]:2nd input file name\n\
|
49
|
+
-K [num]:output [num] pairs of most large intersections\n\
|
50
|
+
-w [filename]:read item weights from [filename]\n\
|
51
|
+
-l,-u [num]:ignore transactions with size (weight sum) less/more than [num]\n\
|
52
|
+
-L,-U [num]: ignore items appearing less/more than [num]\n\
|
53
|
+
-c [num]:compare transactions of IDs less than num and the others (if 0 is given, automatically set to the boundary of the 1st and 2nd file)\n\
|
54
|
+
-b [num]:ignore pairs having no common item of at least [num]th frequency\n\
|
55
|
+
-B [num]:ignore pairs having no common item of frequency at most [num]\n\
|
56
|
+
-# [num]:stop after outputting [num] solutions\n\
|
57
|
+
-, [char]:give the separator of the numbers in the output\n\
|
58
|
+
-Q [filename]:replace the output numbers according to the permutation table given by [filename]\n\
|
59
|
+
# the 1st letter of input-filename cannot be '-'.\n\
|
60
|
+
# if the output file name is -, the solutions will be output to standard output.\n");
|
61
|
+
EXIT;
|
62
|
+
//items have to begin from 1\n");
|
63
|
+
}
|
64
|
+
|
65
|
+
// c:multi stream transaction mode, separated by an empty transaction
|
66
|
+
|
67
|
+
/***********************************************************************/
|
68
|
+
/* read parameters given by command line */
|
69
|
+
/***********************************************************************/
|
70
|
+
void SSPC_read_param (int argc, char *argv[], PROBLEM *PP){
|
71
|
+
int c=1;
|
72
|
+
ITEMSET *II = &PP->II;
|
73
|
+
TRSACT *TT = &PP->TT;
|
74
|
+
|
75
|
+
if ( argc < c+3 ){ SSPC_error (); return; }
|
76
|
+
|
77
|
+
if ( !strchr (argv[c], '_') ){ II->flag |= SHOW_MESSAGE; TT->flag |= SHOW_MESSAGE; }
|
78
|
+
if ( strchr (argv[c], '%') ) II->flag |= SHOW_PROGRESS;
|
79
|
+
if ( strchr (argv[c], '+') ) II->flag |= ITEMSET_APPEND;
|
80
|
+
if ( strchr (argv[c], 'f') ) II->flag |= ITEMSET_FREQ;
|
81
|
+
if ( strchr (argv[c], 'Q') ) II->flag |= ITEMSET_PRE_FREQ;
|
82
|
+
if ( strchr (argv[c], 'M') ) PP->problem |= SSPC_MATRIX;
|
83
|
+
if ( strchr (argv[c], 'i') ) PP->problem = SSPC_INCLUSION;
|
84
|
+
else if ( strchr (argv[c], 'I') ) PP->problem = SSPC_SIMILARITY;
|
85
|
+
else if ( strchr (argv[c], 'T') ) PP->problem = SSPC_INTERSECTION;
|
86
|
+
else if ( strchr (argv[c], 's') ) PP->problem = SSPC_MININT;
|
87
|
+
else if ( strchr (argv[c], 'S') ) PP->problem = SSPC_MAXINT;
|
88
|
+
else if ( strchr (argv[c], 'R') ) PP->problem = SSPC_RESEMBLANCE;
|
89
|
+
else if ( strchr (argv[c], 'C') ) PP->problem = SSPC_INNERPRODUCT;
|
90
|
+
else error ("i, I, s, S, R, T or C command has to be specified", EXIT);
|
91
|
+
if ( strchr (argv[c], '#') ) PP->problem |= SSPC_COUNT;
|
92
|
+
if ( strchr (argv[c], 'N') ) PP->problem |= PROBLEM_NORMALIZE;
|
93
|
+
if ( !strchr (argv[c], 't') ) TT->flag |= LOAD_TPOSE;
|
94
|
+
c++;
|
95
|
+
|
96
|
+
while ( argv[c][0] == '-' ){
|
97
|
+
switch (argv[c][1]){
|
98
|
+
case 'K': II->topk.end = atoi(argv[c+1]);
|
99
|
+
break; case 'L': TT->row_lb = atoi(argv[c+1]);
|
100
|
+
break; case 'U': TT->row_ub = atoi(argv[c+1]);
|
101
|
+
break; case 'l': TT->w_lb = atof(argv[c+1]);
|
102
|
+
break; case 'u': TT->w_ub = atof(argv[c+1]);
|
103
|
+
break; case 'w': PP->TT.wfname = argv[c+1];
|
104
|
+
break; case 'c': PP->dir = 1; TT->sep = atoi(argv[c+1]);
|
105
|
+
break; case '2': PP->TT.fname2 = argv[c+1];
|
106
|
+
break; case 'b': PP->II.len_lb = atoi(argv[c+1]);
|
107
|
+
break; case 'B': PP->II.len_ub = atoi(argv[c+1]);
|
108
|
+
break; case '#': II->max_solutions = atoi(argv[c+1]);
|
109
|
+
break; case ',': II->separator = argv[c+1][0];
|
110
|
+
break; case 'Q': PP->outperm_fname = argv[c+1];
|
111
|
+
break; default: goto NEXT;
|
112
|
+
}
|
113
|
+
c += 2;
|
114
|
+
if ( argc<c+2 ){ SSPC_error (); return; }
|
115
|
+
}
|
116
|
+
|
117
|
+
NEXT:;
|
118
|
+
if ( PP->problem & SSPC_MATRIX ) PP->MM.fname = argv[c];
|
119
|
+
else PP->TT.fname = argv[c];
|
120
|
+
if ( II->topk.end==0 ) II->frq_lb = atof(argv[c+1]);
|
121
|
+
if ( argc>c+2 ) PP->output_fname = argv[c+2];
|
122
|
+
}
|
123
|
+
|
124
|
+
|
125
|
+
/*************************************************************************/
|
126
|
+
/* SSPC main routine */
|
127
|
+
/*************************************************************************/
|
128
|
+
void SSPC (PROBLEM *PP){
|
129
|
+
ITEMSET *II = &PP->II;
|
130
|
+
TRSACT *TT = &PP->TT;
|
131
|
+
QUEUE_ID i, j, begin = PP->dir>0?TT->sep:1, f, ii=0;
|
132
|
+
QUEUE_INT *x, **o = NULL, *oi, *oj, cnt;
|
133
|
+
WEIGHT *w, f1, f2, c, cc;
|
134
|
+
double sq =0;
|
135
|
+
int count=0, fs = SSPC_INTERSECTION +SSPC_RESEMBLANCE +SSPC_INNERPRODUCT+SSPC_MAXINT+SSPC_MININT;
|
136
|
+
|
137
|
+
// initialization
|
138
|
+
calloc2 (w, TT->T.clms*2, EXIT);
|
139
|
+
if ( PP->problem & SSPC_INNERPRODUCT ) FLOOP (i, 0, TT->T.clms) TT->w[i] *= TT->w[i];
|
140
|
+
TRSACT_delivery (TT, &TT->jump, w, w+TT->T.clms, NULL, TT->T.clms);
|
141
|
+
FLOOP (i, 0, PP->dir?TT->sep:1) TT->OQ[i].end = 0;
|
142
|
+
II->itemset.t = 2;
|
143
|
+
|
144
|
+
// skipping items of large frequencies
|
145
|
+
if ( TT->flag & LOAD_SIZSORT ){
|
146
|
+
malloc2 (o, TT->T.clms, EXIT);
|
147
|
+
FLOOP (i, 0, TT->T.clms){
|
148
|
+
o[i] = TT->OQ[i].v;
|
149
|
+
TT->OQ[i].v[TT->OQ[i].t] = INTHUGE; // put end-mark at the last; also used in main loop
|
150
|
+
for ( j=0 ; TT->OQ[i].v[j] < PP->II.len_lb ; j++ );
|
151
|
+
TT->OQ[i].v = &TT->OQ[i].v[j]; TT->OQ[i].t -= j;
|
152
|
+
}
|
153
|
+
}
|
154
|
+
// main loop
|
155
|
+
|
156
|
+
FLOOP (i, begin, TT->T.clms){
|
157
|
+
if ( II->flag & SHOW_PROGRESS ){
|
158
|
+
if ( count < i*100/TT->T.clms ){ count++; fprintf (stderr, "%d%%\n", count); }
|
159
|
+
}
|
160
|
+
cnt = 0;
|
161
|
+
II->itemset.v[0] = ((PP->problem&PROBLEM_NORMALIZE)&& PP->dir>0)? i-TT->sep: i;
|
162
|
+
if ( PP->problem & SSPC_INNERPRODUCT ) sq = sqrt (w[i]);
|
163
|
+
TRSACT_delivery (TT, &TT->jump, PP->occ_w, PP->occ_pw, &TT->OQ[i], (PP->dir>0)?TT->sep:i);
|
164
|
+
|
165
|
+
MQUE_FLOOP (TT->jump, x){
|
166
|
+
II->itemset.v[1] = *x;
|
167
|
+
c = PP->occ_w[*x];
|
168
|
+
|
169
|
+
//if ( TT->OQ[i].t>0 ) printf ("%f %d\n", c, TT->OQ[i].t);
|
170
|
+
if ( TT->flag & LOAD_SIZSORT ){
|
171
|
+
for (oi=o[i],oj=o[*x] ; *oi<PP->II.len_lb ; oi++ ){
|
172
|
+
while ( *oj < *oi ) oj++;
|
173
|
+
if ( *oi == *oj ) c += TT->w[*oi];
|
174
|
+
}
|
175
|
+
}
|
176
|
+
if ( PP->problem & fs ){
|
177
|
+
if ( PP->problem & SSPC_INTERSECTION ) II->frq = c;
|
178
|
+
else if ( PP->problem & SSPC_INNERPRODUCT ) II->frq = c / sq / sqrt(w[*x]);
|
179
|
+
else if ( (PP->problem & SSPC_RESEMBLANCE) && (cc= w[i] +w[*x] -c) != 0 ) II->frq = c/cc;
|
180
|
+
else if ( (PP->problem & SSPC_MAXINT) && (cc=MAX(w[i],w[*x])) != 0 ) II->frq = c/cc;
|
181
|
+
else if ( (PP->problem & SSPC_MININT) && (cc=MIN(w[i],w[*x])) != 0 ) II->frq = c/cc;
|
182
|
+
else continue;
|
183
|
+
if ( II->frq >= II->frq_lb ){
|
184
|
+
if ( PP->problem & SSPC_COUNT ) cnt++;
|
185
|
+
else ITEMSET_output_itemset (II, NULL, 0);
|
186
|
+
}
|
187
|
+
} else {
|
188
|
+
f1 = w[i]*II->frq_lb; f2 = w[*x]*II->frq_lb; // size of i and *x
|
189
|
+
if ( PP->problem & SSPC_SIMILARITY ){
|
190
|
+
f = ( (c >= f1) && (c >= f2) );
|
191
|
+
II->frq = MIN(c/w[i], c/w[*x]);
|
192
|
+
} else if ( PP->problem & SSPC_INCLUSION ){
|
193
|
+
if ( c >= f2 ){
|
194
|
+
II->frq = c/w[*x];
|
195
|
+
II->itemset.v[0] = *x; II->itemset.v[1] = i-PP->root;
|
196
|
+
if ( PP->problem & SSPC_COUNT ) cnt++;
|
197
|
+
else ITEMSET_output_itemset (II, NULL, 0);
|
198
|
+
II->itemset.v[0] = i-PP->root; II->itemset.v[1] = *x;
|
199
|
+
}
|
200
|
+
f = ( c >= f1 );
|
201
|
+
II->frq = c/w[i];
|
202
|
+
} else continue;
|
203
|
+
if ( f ){
|
204
|
+
if ( PP->problem & SSPC_COUNT ) cnt++;
|
205
|
+
else ITEMSET_output_itemset (II, NULL, 0);
|
206
|
+
}
|
207
|
+
}
|
208
|
+
TT->OQ[*x].end = 0;
|
209
|
+
}
|
210
|
+
TT->OQ[i].end = 0;
|
211
|
+
if ( PP->problem & SSPC_COUNT ){
|
212
|
+
while ( ii<II->perm[i] ){
|
213
|
+
FILE2_putc (&II->multi_fp[0], '\n');
|
214
|
+
FILE2_flush (&II->multi_fp[0]);
|
215
|
+
ii++;
|
216
|
+
}
|
217
|
+
FILE2_print_int (&II->multi_fp[0], cnt, 0);
|
218
|
+
FILE2_putc (&II->multi_fp[0], '\n');
|
219
|
+
FILE2_flush (&II->multi_fp[0]);
|
220
|
+
II->sc[2] += cnt;
|
221
|
+
ii++;
|
222
|
+
}
|
223
|
+
}
|
224
|
+
|
225
|
+
// termination
|
226
|
+
if ( TT->flag & LOAD_SIZSORT ){
|
227
|
+
FLOOP (i, 0, TT->T.clms){
|
228
|
+
TT->OQ[i].t += TT->OQ[i].v - o[i];
|
229
|
+
TT->OQ[i].v = o[i];
|
230
|
+
}
|
231
|
+
}
|
232
|
+
mfree (w, o);
|
233
|
+
}
|
234
|
+
|
235
|
+
|
236
|
+
/*************************************************************************/
|
237
|
+
/* SSPC matrix version */
|
238
|
+
/*************************************************************************/
|
239
|
+
void SSPCmat (PROBLEM *PP){
|
240
|
+
ITEMSET *II = &PP->II;
|
241
|
+
MAT *MM = &PP->MM;
|
242
|
+
QUEUE_ID i, j, x, begin = PP->dir>0?0:1, f, ii=0;
|
243
|
+
QUEUE_INT cnt;
|
244
|
+
WEIGHT *w, f1, f2, c, cc;
|
245
|
+
double sq =0;
|
246
|
+
int fs = SSPC_INTERSECTION +SSPC_RESEMBLANCE +SSPC_INNERPRODUCT+SSPC_MAXINT+SSPC_MININT;
|
247
|
+
|
248
|
+
II->frq_lb = II->frq_lb * II->frq_lb;
|
249
|
+
|
250
|
+
// initialization
|
251
|
+
// calloc2 (w, MM->t, EXIT);
|
252
|
+
// if ( PP->problem & SSPC_INNERPRODUCT ) FLOOP (i, 0, MM->clms) TT->w[i] *= TT->w[i];
|
253
|
+
// TRSACT_delivery (TT, &TT->jump, w, w+TT->T.clms, NULL, TT->T.clms);
|
254
|
+
// FLOOP (i, 0, PP->dir?TT->sep:1) TT->OQ[i].end = 0;
|
255
|
+
II->itemset.t = 2;
|
256
|
+
|
257
|
+
// skipping items of large frequencies
|
258
|
+
// if ( TT->flag & LOAD_SIZSORT ){
|
259
|
+
// malloc2 (o, TT->T.clms, EXIT);
|
260
|
+
// FLOOP (i, 0, TT->T.clms){
|
261
|
+
// o[i] = TT->OQ[i].v;
|
262
|
+
// TT->OQ[i].v[TT->OQ[i].t] = INTHUGE; // put end-mark at the last; also used in main loop
|
263
|
+
// for ( j=0 ; TT->OQ[i].v[j] < PP->II.len_lb ; j++ );
|
264
|
+
// TT->OQ[i].v = &TT->OQ[i].v[j]; TT->OQ[i].t -= j;
|
265
|
+
// }
|
266
|
+
// }
|
267
|
+
|
268
|
+
// main loop
|
269
|
+
FLOOP (i, begin, MM->t){
|
270
|
+
cnt = 0;
|
271
|
+
II->itemset.v[0] = ((PP->problem&PROBLEM_NORMALIZE)&& PP->dir>0)? i-MM->clms: i; // i-TT->sep
|
272
|
+
if ( PP->problem || 1 ){
|
273
|
+
PP->occ_w[i] = 0;
|
274
|
+
FLOOP (x, 0, MM->clms) PP->occ_w[i] += MM->v[i].v[x] * MM->v[i].v[x];
|
275
|
+
}
|
276
|
+
|
277
|
+
FLOOP (j, 0, PP->dir>0?begin:i){
|
278
|
+
II->itemset.v[1] = j;
|
279
|
+
f = 0; sq = 0;
|
280
|
+
FLOOP (x, 0, MM->clms) sq += MM->v[i].v[x] * MM->v[j].v[x];
|
281
|
+
if ( sq / PP->occ_w[i] / PP->occ_w[j] > II->frq_lb ) f = 1;
|
282
|
+
|
283
|
+
if ( f ){
|
284
|
+
if ( PP->problem & SSPC_COUNT ) cnt++;
|
285
|
+
else ITEMSET_output_itemset (II, NULL, 0);
|
286
|
+
}
|
287
|
+
}
|
288
|
+
if ( PP->problem & SSPC_COUNT ){
|
289
|
+
while ( ii<II->perm[i] ){
|
290
|
+
FILE2_putc (&II->multi_fp[0], '\n');
|
291
|
+
FILE2_flush (&II->multi_fp[0]);
|
292
|
+
ii++;
|
293
|
+
}
|
294
|
+
FILE2_print_int (&II->multi_fp[0], cnt, 0);
|
295
|
+
FILE2_putc (&II->multi_fp[0], '\n');
|
296
|
+
FILE2_flush (&II->multi_fp[0]);
|
297
|
+
II->sc[2] += cnt;
|
298
|
+
ii++;
|
299
|
+
}
|
300
|
+
}
|
301
|
+
|
302
|
+
// termination
|
303
|
+
// mfree (w, o);
|
304
|
+
}
|
305
|
+
|
306
|
+
|
307
|
+
|
308
|
+
/*************************************************************************/
|
309
|
+
/* main function of SSPC */
|
310
|
+
/*************************************************************************/
|
311
|
+
int SSPC_main (int argc, char *argv[]){
|
312
|
+
PROBLEM PP;
|
313
|
+
SETFAMILY *T = &PP.TT.T;
|
314
|
+
QUEUE_ID i;
|
315
|
+
|
316
|
+
PROBLEM_init (&PP);
|
317
|
+
SSPC_read_param (argc, argv, &PP);
|
318
|
+
if ( ERROR_MES ) return (1);
|
319
|
+
|
320
|
+
PP.TT.flag |= LOAD_INCSORT + TRSACT_ALLOC_OCC;
|
321
|
+
if ( PP.II.len_ub<INTHUGE || PP.II.len_lb>0 ) PP.TT.flag |= LOAD_SIZSORT+LOAD_DECROWSORT;
|
322
|
+
PROBLEM_load (&PP);
|
323
|
+
if ( PP.II.len_ub < INTHUGE ){
|
324
|
+
FLOOP (i, 0, PP.TT.T.t) if ( PP.TT.T.v[i].t <= PP.II.len_ub ){ PP.II.len_lb = i; break; }
|
325
|
+
}
|
326
|
+
PROBLEM_alloc (&PP, T->clms, 0, 0, PP.TT.perm, PROBLEM_OCC_W +PROBLEM_OCC_T);
|
327
|
+
PP.TT.perm = NULL;
|
328
|
+
realloc2 (PP.TT.w, MAX(T->t, T->clms)+1, EXIT);
|
329
|
+
ARY_FILL (PP.TT.w, 0, MAX(T->t, T->clms)+1, 1);
|
330
|
+
|
331
|
+
// delivery
|
332
|
+
//TRSACT_print (&PP.TT, NULL, PP.II.perm);
|
333
|
+
print_mes (&PP.TT, "separated at %d\n", PP.TT.sep);
|
334
|
+
QUEUE_delivery (PP.TT.OQ, NULL, NULL, T->v, &PP.TT.OQ[T->clms], T->t, T->clms);
|
335
|
+
|
336
|
+
if ( !ERROR_MES && PP.TT.T.clms>1 ){
|
337
|
+
if ( PP.problem & SSPC_MATRIX ){ SSPCmat (&PP); }
|
338
|
+
else SSPC (&PP);
|
339
|
+
print_mes (&PP.TT, LONGF " pairs are found\n", PP.II.sc[2]);
|
340
|
+
}
|
341
|
+
|
342
|
+
ITEMSET_merge_counters (&PP.II);
|
343
|
+
internal_params.l1 = PP.II.solutions;
|
344
|
+
|
345
|
+
PROBLEM_end (&PP);
|
346
|
+
return (ERROR_MES?1:0);
|
347
|
+
}
|
348
|
+
|
349
|
+
/*******************************************************************************/
|
350
|
+
#ifndef _NO_MAIN_
|
351
|
+
#define _NO_MAIN_
|
352
|
+
int main (int argc, char *argv[]){
|
353
|
+
return (SSPC_main (argc, argv) );
|
354
|
+
}
|
355
|
+
#endif
|
356
|
+
/*******************************************************************************/
|
357
|
+
|
358
|
+
#endif
|