nysol-take 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/mbiclique.rb +317 -0
- data/bin/mbipolish.rb +362 -0
- data/bin/mccomp.rb +235 -0
- data/bin/mclique.rb +295 -0
- data/bin/mclique2g.rb +105 -0
- data/bin/mcliqueInfo.rb +203 -0
- data/bin/mfriends.rb +202 -0
- data/bin/mgdiff.rb +252 -0
- data/bin/mhifriend.rb +456 -0
- data/bin/mhipolish.rb +465 -0
- data/bin/mitemset.rb +168 -0
- data/bin/mpal.rb +410 -0
- data/bin/mpolishing.rb +399 -0
- data/bin/msequence.rb +165 -0
- data/bin/mtra2g.rb +476 -0
- data/bin/mtra2gc.rb +360 -0
- data/ext/grhfilrun/extconf.rb +12 -0
- data/ext/grhfilrun/grhfilrun.c +85 -0
- data/ext/grhfilrun/src/_sspc.c +358 -0
- data/ext/grhfilrun/src/aheap.c +545 -0
- data/ext/grhfilrun/src/aheap.h +251 -0
- data/ext/grhfilrun/src/base.c +92 -0
- data/ext/grhfilrun/src/base.h +59 -0
- data/ext/grhfilrun/src/fstar.c +497 -0
- data/ext/grhfilrun/src/fstar.h +80 -0
- data/ext/grhfilrun/src/grhfil.c +214 -0
- data/ext/grhfilrun/src/itemset.c +713 -0
- data/ext/grhfilrun/src/itemset.h +170 -0
- data/ext/grhfilrun/src/problem.c +415 -0
- data/ext/grhfilrun/src/problem.h +179 -0
- data/ext/grhfilrun/src/queue.c +533 -0
- data/ext/grhfilrun/src/queue.h +182 -0
- data/ext/grhfilrun/src/sample.c +19 -0
- data/ext/grhfilrun/src/sspc.c +597 -0
- data/ext/grhfilrun/src/sspc2.c +491 -0
- data/ext/grhfilrun/src/stdlib2.c +1482 -0
- data/ext/grhfilrun/src/stdlib2.h +892 -0
- data/ext/grhfilrun/src/trsact.c +817 -0
- data/ext/grhfilrun/src/trsact.h +160 -0
- data/ext/grhfilrun/src/vec.c +745 -0
- data/ext/grhfilrun/src/vec.h +172 -0
- data/ext/lcmrun/extconf.rb +20 -0
- data/ext/lcmrun/lcmrun.cpp +99 -0
- data/ext/lcmrun/src/aheap.c +216 -0
- data/ext/lcmrun/src/aheap.h +111 -0
- data/ext/lcmrun/src/base.c +92 -0
- data/ext/lcmrun/src/base.h +59 -0
- data/ext/lcmrun/src/itemset.c +496 -0
- data/ext/lcmrun/src/itemset.h +157 -0
- data/ext/lcmrun/src/lcm.c +427 -0
- data/ext/lcmrun/src/problem.c +349 -0
- data/ext/lcmrun/src/problem.h +177 -0
- data/ext/lcmrun/src/queue.c +528 -0
- data/ext/lcmrun/src/queue.h +176 -0
- data/ext/lcmrun/src/sgraph.c +359 -0
- data/ext/lcmrun/src/sgraph.h +173 -0
- data/ext/lcmrun/src/stdlib2.c +1282 -0
- data/ext/lcmrun/src/stdlib2.h +823 -0
- data/ext/lcmrun/src/trsact.c +747 -0
- data/ext/lcmrun/src/trsact.h +159 -0
- data/ext/lcmrun/src/vec.c +731 -0
- data/ext/lcmrun/src/vec.h +171 -0
- data/ext/lcmseq0run/extconf.rb +20 -0
- data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
- data/ext/lcmseq0run/src/aheap.c +216 -0
- data/ext/lcmseq0run/src/aheap.h +111 -0
- data/ext/lcmseq0run/src/base.c +92 -0
- data/ext/lcmseq0run/src/base.h +59 -0
- data/ext/lcmseq0run/src/itemset.c +518 -0
- data/ext/lcmseq0run/src/itemset.h +157 -0
- data/ext/lcmseq0run/src/itemset_zero.c +522 -0
- data/ext/lcmseq0run/src/lcm_seq.c +446 -0
- data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
- data/ext/lcmseq0run/src/problem.c +439 -0
- data/ext/lcmseq0run/src/problem.h +179 -0
- data/ext/lcmseq0run/src/problem_zero.c +439 -0
- data/ext/lcmseq0run/src/queue.c +533 -0
- data/ext/lcmseq0run/src/queue.h +182 -0
- data/ext/lcmseq0run/src/stdlib2.c +1350 -0
- data/ext/lcmseq0run/src/stdlib2.h +864 -0
- data/ext/lcmseq0run/src/trsact.c +747 -0
- data/ext/lcmseq0run/src/trsact.h +159 -0
- data/ext/lcmseq0run/src/vec.c +779 -0
- data/ext/lcmseq0run/src/vec.h +172 -0
- data/ext/lcmseqrun/extconf.rb +20 -0
- data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
- data/ext/lcmseqrun/src/aheap.c +216 -0
- data/ext/lcmseqrun/src/aheap.h +111 -0
- data/ext/lcmseqrun/src/base.c +92 -0
- data/ext/lcmseqrun/src/base.h +59 -0
- data/ext/lcmseqrun/src/itemset.c +518 -0
- data/ext/lcmseqrun/src/itemset.h +157 -0
- data/ext/lcmseqrun/src/itemset_zero.c +522 -0
- data/ext/lcmseqrun/src/lcm_seq.c +447 -0
- data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
- data/ext/lcmseqrun/src/problem.c +439 -0
- data/ext/lcmseqrun/src/problem.h +179 -0
- data/ext/lcmseqrun/src/problem_zero.c +439 -0
- data/ext/lcmseqrun/src/queue.c +533 -0
- data/ext/lcmseqrun/src/queue.h +182 -0
- data/ext/lcmseqrun/src/stdlib2.c +1350 -0
- data/ext/lcmseqrun/src/stdlib2.h +864 -0
- data/ext/lcmseqrun/src/trsact.c +747 -0
- data/ext/lcmseqrun/src/trsact.h +159 -0
- data/ext/lcmseqrun/src/vec.c +779 -0
- data/ext/lcmseqrun/src/vec.h +172 -0
- data/ext/lcmtransrun/extconf.rb +18 -0
- data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
- data/ext/macerun/extconf.rb +20 -0
- data/ext/macerun/macerun.cpp +57 -0
- data/ext/macerun/src/aheap.c +217 -0
- data/ext/macerun/src/aheap.h +112 -0
- data/ext/macerun/src/itemset.c +491 -0
- data/ext/macerun/src/itemset.h +158 -0
- data/ext/macerun/src/mace.c +503 -0
- data/ext/macerun/src/problem.c +346 -0
- data/ext/macerun/src/problem.h +174 -0
- data/ext/macerun/src/queue.c +529 -0
- data/ext/macerun/src/queue.h +177 -0
- data/ext/macerun/src/sgraph.c +360 -0
- data/ext/macerun/src/sgraph.h +174 -0
- data/ext/macerun/src/stdlib2.c +993 -0
- data/ext/macerun/src/stdlib2.h +811 -0
- data/ext/macerun/src/vec.c +634 -0
- data/ext/macerun/src/vec.h +170 -0
- data/ext/sspcrun/extconf.rb +20 -0
- data/ext/sspcrun/src/_sspc.c +358 -0
- data/ext/sspcrun/src/aheap.c +545 -0
- data/ext/sspcrun/src/aheap.h +251 -0
- data/ext/sspcrun/src/base.c +92 -0
- data/ext/sspcrun/src/base.h +59 -0
- data/ext/sspcrun/src/fstar.c +496 -0
- data/ext/sspcrun/src/fstar.h +80 -0
- data/ext/sspcrun/src/grhfil.c +213 -0
- data/ext/sspcrun/src/itemset.c +713 -0
- data/ext/sspcrun/src/itemset.h +170 -0
- data/ext/sspcrun/src/problem.c +415 -0
- data/ext/sspcrun/src/problem.h +179 -0
- data/ext/sspcrun/src/queue.c +533 -0
- data/ext/sspcrun/src/queue.h +182 -0
- data/ext/sspcrun/src/sample.c +19 -0
- data/ext/sspcrun/src/sspc.c +598 -0
- data/ext/sspcrun/src/sspc2.c +491 -0
- data/ext/sspcrun/src/stdlib2.c +1482 -0
- data/ext/sspcrun/src/stdlib2.h +892 -0
- data/ext/sspcrun/src/trsact.c +817 -0
- data/ext/sspcrun/src/trsact.h +160 -0
- data/ext/sspcrun/src/vec.c +745 -0
- data/ext/sspcrun/src/vec.h +172 -0
- data/ext/sspcrun/sspcrun.cpp +54 -0
- data/lib/nysol/enumLcmEp.rb +338 -0
- data/lib/nysol/enumLcmEsp.rb +284 -0
- data/lib/nysol/enumLcmIs.rb +275 -0
- data/lib/nysol/enumLcmSeq.rb +143 -0
- data/lib/nysol/items.rb +201 -0
- data/lib/nysol/seqDB.rb +256 -0
- data/lib/nysol/take.rb +39 -0
- data/lib/nysol/taxonomy.rb +113 -0
- data/lib/nysol/traDB.rb +257 -0
- metadata +239 -0
@@ -0,0 +1,172 @@
|
|
1
|
+
/* library for sparse vector */
|
2
|
+
/* Takeaki Uno 27/Dec/2008 */
|
3
|
+
|
4
|
+
#ifndef _vec_h_
|
5
|
+
#define _vec_h_
|
6
|
+
|
7
|
+
//#define USE_MATH
|
8
|
+
|
9
|
+
#include"math.h"
|
10
|
+
#include"queue.h"
|
11
|
+
|
12
|
+
#ifndef SVEC_VAL
|
13
|
+
#ifdef SVEC_VAL_INT
|
14
|
+
#define SVEC_VAL int
|
15
|
+
#define SVEC_VAL2 LONG
|
16
|
+
#define SVEC_VAL_END INTHUGE
|
17
|
+
#define SVEC_VAL2_END LONGHUGE
|
18
|
+
#define SVEC_VALF "%d"
|
19
|
+
#else
|
20
|
+
#define SVEC_VAL double
|
21
|
+
#define SVEC_VAL2 double
|
22
|
+
#define SVEC_VAL_END DOUBLEHUGE
|
23
|
+
#define SVEC_VAL2_END DOUBLEHUGE
|
24
|
+
#define SVEC_VALF "%f"
|
25
|
+
#endif
|
26
|
+
#endif
|
27
|
+
|
28
|
+
#define VEC_LOAD_BIN 16777216 // read binary file
|
29
|
+
#define VEC_LOAD_BIN2 33554432 // read binary file with 2byte for each number
|
30
|
+
#define VEC_LOAD_BIN4 67108864 // read binary file with 4byte for each number
|
31
|
+
#define VEC_LOAD_CENTERIZE 134217728 // read binary file, and minus the half(128) from each number
|
32
|
+
#define VEC_NORMALIZE 268435456 // read binary file, and minus the half(128) from each number
|
33
|
+
|
34
|
+
/* matrix */
|
35
|
+
typedef struct {
|
36
|
+
unsigned char type; // mark to identify type of the structure
|
37
|
+
char *fname, *wfname; // input/weight file name
|
38
|
+
int flag; // flag
|
39
|
+
|
40
|
+
VEC *v;
|
41
|
+
VEC_ID end;
|
42
|
+
VEC_ID t;
|
43
|
+
VEC_VAL *buf, *buf2;
|
44
|
+
VEC_ID clms;
|
45
|
+
size_t eles;
|
46
|
+
VEC_VAL *buf_org, *buf2_org;
|
47
|
+
} MAT;
|
48
|
+
|
49
|
+
/* sparse vector, element */
|
50
|
+
typedef struct {
|
51
|
+
QUEUE_ID i;
|
52
|
+
SVEC_VAL a;
|
53
|
+
} SVEC_ELE;
|
54
|
+
|
55
|
+
/* sparse vector, vector */
|
56
|
+
typedef struct {
|
57
|
+
unsigned char type; // mark to identify type of the structure
|
58
|
+
SVEC_ELE *v;
|
59
|
+
VEC_ID end;
|
60
|
+
VEC_ID t;
|
61
|
+
} SVEC;
|
62
|
+
|
63
|
+
/* sparse vector, matrix */
|
64
|
+
typedef struct {
|
65
|
+
unsigned char type; // mark to identify type of the structure
|
66
|
+
char *fname, *wfname; // input/weight file name
|
67
|
+
int flag; // flag
|
68
|
+
|
69
|
+
SVEC *v;
|
70
|
+
VEC_ID end;
|
71
|
+
VEC_ID t;
|
72
|
+
SVEC_ELE *buf, *buf2;
|
73
|
+
VEC_ID clms;
|
74
|
+
size_t eles, ele_end;
|
75
|
+
} SMAT;
|
76
|
+
|
77
|
+
/* set family */
|
78
|
+
typedef struct {
|
79
|
+
unsigned char type; // mark to identify type of the structure
|
80
|
+
char *fname, *wfname; // input/weight file name
|
81
|
+
int flag; // flag
|
82
|
+
|
83
|
+
QUEUE *v;
|
84
|
+
VEC_ID end;
|
85
|
+
VEC_ID t;
|
86
|
+
QUEUE_INT *buf, *buf2;
|
87
|
+
VEC_ID clms;
|
88
|
+
size_t eles, ele_end;
|
89
|
+
WEIGHT *cw, *rw, **w, *wbuf;
|
90
|
+
int unit;
|
91
|
+
char *cwfname, *rwfname; // weight file name
|
92
|
+
PERM *rperm, *cperm; // row permutation
|
93
|
+
} SETFAMILY;
|
94
|
+
|
95
|
+
#define INIT_SETFAMILY_ {TYPE_SETFAMILY,NULL,NULL,0,NULL,0,0,NULL,NULL,0,0,0,NULL,NULL,NULL,NULL,sizeof(QUEUE_INT),NULL,NULL,NULL,NULL}
|
96
|
+
|
97
|
+
extern MAT INIT_MAT;
|
98
|
+
extern SVEC INIT_SVEC;
|
99
|
+
extern SMAT INIT_SMAT;
|
100
|
+
extern SETFAMILY INIT_SETFAMILY;
|
101
|
+
|
102
|
+
QSORT_TYPE_HEADER (SVEC_VAL, SVEC_VAL)
|
103
|
+
QSORT_TYPE_HEADER (SVEC_VAL2, SVEC_VAL2)
|
104
|
+
|
105
|
+
#define ARY_QUEUE_INPRO(f,U,V) do{(f)=0;FLOOP(common_QUEUE_ID, 0, (QUEUE_ID)(U).t)(f)+=(V)[(U).v[common_QUEUE_ID]];}while(0)
|
106
|
+
#define ARY_SVEC_INPRO(f,U,V) do{(f)=0;FLOOP(common_VEC_ID, 0, (VEC_ID)(U).t)(f)+=((double)(U).v[common_VEC_ID].a)*(V)[(U).v[common_VEC_ID].i];}while(0)
|
107
|
+
|
108
|
+
/* terminate routine for VEC */
|
109
|
+
void VEC_end (VEC *V);
|
110
|
+
void MAT_end (MAT *M);
|
111
|
+
void SVEC_end (SVEC *V);
|
112
|
+
void SMAT_end (SMAT *M);
|
113
|
+
void SETFAMILY_end (SETFAMILY *M);
|
114
|
+
|
115
|
+
/* allocate memory according to rows and rowt */
|
116
|
+
void VEC_alloc (VEC *V, VEC_ID clms);
|
117
|
+
void MAT_alloc (MAT *M, VEC_ID rows, VEC_ID clms);
|
118
|
+
void SVEC_alloc (SVEC *V, VEC_ID end);
|
119
|
+
void SMAT_alloc (SMAT *M, VEC_ID rows, VEC_ID *rowt, VEC_ID clms, size_t eles);
|
120
|
+
void SETFAMILY_alloc (SETFAMILY *M, VEC_ID rows, VEC_ID *rowt, VEC_ID clms, size_t eles);
|
121
|
+
void SETFAMILY_alloc_weight (SETFAMILY *M, QUEUE_ID *t);
|
122
|
+
|
123
|
+
/* count/read the number in file for MAT */
|
124
|
+
/* if *rows>0, only read count the numbers in a row, for the first scan. */
|
125
|
+
void MAT_load_bin (MAT *M, FILE2 *fp, int unit);
|
126
|
+
void MAT_file_load (MAT *M, FILE2 *fp);
|
127
|
+
void MAT_load (MAT *M);
|
128
|
+
void SMAT_load (SMAT *M);
|
129
|
+
void SETFAMILY_load (SETFAMILY *M);
|
130
|
+
void SETFAMILY_load_weight (SETFAMILY *M);
|
131
|
+
void SETFAMILY_load_row_weight (SETFAMILY *M);
|
132
|
+
void SETFAMILY_load_column_weight (SETFAMILY *M);
|
133
|
+
|
134
|
+
void MAT_print (FILE *fp, MAT *M);
|
135
|
+
void SVEC_print (FILE *fp, SVEC *M);
|
136
|
+
void SMAT_print (FILE *fp, SMAT *M);
|
137
|
+
void SETFAMILY_print (FILE *fp, SETFAMILY *M);
|
138
|
+
void SETFAMILY_print_weight (FILE *fp, SETFAMILY *M);
|
139
|
+
|
140
|
+
|
141
|
+
/* norm, normalization **************************/
|
142
|
+
double SVEC_norm (SVEC *V);
|
143
|
+
void SVEC_normalize (SVEC *V);
|
144
|
+
|
145
|
+
/* inner product **************************/
|
146
|
+
SVEC_VAL2 SVEC_inpro (SVEC *V1, SVEC *V2);
|
147
|
+
|
148
|
+
/** Euclidean distance routines *********************************/
|
149
|
+
double VEC_eucdist (VEC *V1, VEC *V2);
|
150
|
+
double SVEC_eucdist (SVEC *V1, SVEC *V2);
|
151
|
+
double VEC_SVEC_eucdist (VEC *V1, SVEC *V2);
|
152
|
+
double QUEUE_eucdist (QUEUE *Q1, QUEUE *Q2);
|
153
|
+
double VEC_QUEUE_eucdist (VEC *V, QUEUE *Q);
|
154
|
+
|
155
|
+
void VEC_rand_gaussian (VEC *V);
|
156
|
+
|
157
|
+
double VEC_linfdist (VEC *V1, VEC *V2);
|
158
|
+
|
159
|
+
/* compute the inner product, Euclidean distance for multi vector */
|
160
|
+
double MVEC_norm (void *V);
|
161
|
+
double MVEC_inpro (void *V, void *U);
|
162
|
+
double MVEC_double_inpro (void *V, double *p);
|
163
|
+
double MVEC_eucdist (void *V, void *U);
|
164
|
+
|
165
|
+
/* compute the inner product, euclidean distance for i,jth vector */
|
166
|
+
double MMAT_inpro_ij (void *M, int i, int j);
|
167
|
+
double MMAT_double_inpro_i (void *M, int i, double *p);
|
168
|
+
double MMAT_eucdist_ij (void *M, int i, int j);
|
169
|
+
double MMAT_norm_i (void *M, int i);
|
170
|
+
|
171
|
+
|
172
|
+
#endif
|
@@ -0,0 +1,54 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include <stdlib.h>
|
3
|
+
#include <unistd.h>
|
4
|
+
#include <fcntl.h>
|
5
|
+
#include <sys/stat.h>
|
6
|
+
#include <string>
|
7
|
+
#include <ruby.h>
|
8
|
+
#include "src/sspc.c"
|
9
|
+
#include <kgMethod.h>
|
10
|
+
|
11
|
+
extern "C" {
|
12
|
+
void Init_sspcrun(void);
|
13
|
+
}
|
14
|
+
|
15
|
+
VALUE sspcrun(VALUE self,VALUE argvV){
|
16
|
+
|
17
|
+
string argstr=RSTRING_PTR(argvV);
|
18
|
+
vector<char *> opts = kglib::splitToken(const_cast<char*>(argstr.c_str()), ' ',true);
|
19
|
+
|
20
|
+
// 引数文字列へのポインタの領域はここでauto変数に確保する
|
21
|
+
kglib::kgAutoPtr2<char*> argv;
|
22
|
+
char** vv;
|
23
|
+
try{
|
24
|
+
argv.set(new char*[opts.size()+1]);
|
25
|
+
vv = argv.get();
|
26
|
+
}catch(...){
|
27
|
+
rb_raise(rb_eRuntimeError,"memory allocation error");
|
28
|
+
}
|
29
|
+
|
30
|
+
// vv配列0番目はコマンド名
|
31
|
+
vv[0]=const_cast<char*>("sspc");
|
32
|
+
|
33
|
+
size_t vvSize;
|
34
|
+
for(vvSize=0; vvSize<opts.size(); vvSize++){
|
35
|
+
vv[vvSize+1] = opts.at(vvSize);
|
36
|
+
}
|
37
|
+
vvSize+=1;
|
38
|
+
|
39
|
+
SSPC_main(vvSize,vv);
|
40
|
+
return Qtrue;
|
41
|
+
}
|
42
|
+
|
43
|
+
|
44
|
+
// -----------------------------------------------------------------------------
|
45
|
+
// ruby Mcsvin クラス init
|
46
|
+
// -----------------------------------------------------------------------------
|
47
|
+
void Init_sspcrun(void)
|
48
|
+
{
|
49
|
+
// モジュール定義:MCMD::xxxxの部分
|
50
|
+
VALUE mtake=rb_define_module("TAKE");
|
51
|
+
rb_define_module_function(mtake,"run_sspc" , (VALUE (*)(...))sspcrun,1);
|
52
|
+
}
|
53
|
+
|
54
|
+
|
@@ -0,0 +1,338 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require "rubygems"
|
3
|
+
require "nysol/mcmd"
|
4
|
+
require "nysol/take"
|
5
|
+
|
6
|
+
require "nysol/traDB.rb"
|
7
|
+
|
8
|
+
module TAKE
|
9
|
+
|
10
|
+
#========================================================================
|
11
|
+
# 列挙関数:lcm 利用DB:TraDB
|
12
|
+
#========================================================================
|
13
|
+
class LcmEp
|
14
|
+
attr_reader :size # 列挙されたパターン数
|
15
|
+
attr_reader :pFile
|
16
|
+
attr_reader :tFile
|
17
|
+
|
18
|
+
@@intMax=2147483646
|
19
|
+
#@@intMax=100
|
20
|
+
|
21
|
+
# posトランザクションの重み計算
|
22
|
+
# マニュアルの式(10)
|
23
|
+
def calOmega(posCnt)
|
24
|
+
return @@intMax/posCnt
|
25
|
+
end
|
26
|
+
|
27
|
+
# LCM最小サポートの計算
|
28
|
+
# マニュアルの式(9)
|
29
|
+
def calSigma(minPos,minGR,posCnt,negCnt)
|
30
|
+
omegaF=@@intMax.to_f/posCnt.to_f
|
31
|
+
beta=minPos
|
32
|
+
w=posCnt.to_f/negCnt.to_f
|
33
|
+
#puts "omegaF=#{omegaF}"
|
34
|
+
#puts "minPos=#{minPos}"
|
35
|
+
#puts "beta=#{beta}"
|
36
|
+
#puts "posCnt=#{posCnt}"
|
37
|
+
#puts "negCnt=#{negCnt}"
|
38
|
+
#puts "w=#{w}"
|
39
|
+
sigma=(beta*(omegaF-w/minGR)).to_i # 切り捨て
|
40
|
+
sigma=1 if sigma<=0
|
41
|
+
return sigma
|
42
|
+
end
|
43
|
+
|
44
|
+
def reduceTaxo(pat,items)
|
45
|
+
tf=MCMD::Mtemp.new
|
46
|
+
|
47
|
+
if items.taxonomy==nil then
|
48
|
+
return pat
|
49
|
+
end
|
50
|
+
|
51
|
+
xxrt = tf.file
|
52
|
+
taxo=items.taxonomy
|
53
|
+
f=""
|
54
|
+
f << "mtrafld f=#{taxo.itemFN},#{taxo.taxoFN} -valOnly a=__fld i=#{taxo.file} o=#{xxrt}"
|
55
|
+
system(f)
|
56
|
+
|
57
|
+
# xxrtの内容:oyakoに親子関係にあるアイテム集合のリストが格納される
|
58
|
+
# __fld
|
59
|
+
# A X
|
60
|
+
# B X
|
61
|
+
# C Y
|
62
|
+
# D Z
|
63
|
+
# E Z
|
64
|
+
# F Z
|
65
|
+
oyako=ZDD.constant(0)
|
66
|
+
MCMD::Mcsvin.new("i=#{xxrt}"){|csv|
|
67
|
+
csv.each{|fldVal|
|
68
|
+
items=fldVal["__fld"]
|
69
|
+
oyako=oyako+ZDD.itemset(items)
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
# 親子リストにあるアイテム集合を含むパターンを削除する
|
74
|
+
pat=pat.restrict(oyako).iif(0,pat)
|
75
|
+
|
76
|
+
return pat
|
77
|
+
end
|
78
|
+
|
79
|
+
def initialize(db)
|
80
|
+
@temp=MCMD::Mtemp.new
|
81
|
+
@db = db # 入力データベース
|
82
|
+
@file=@temp.file
|
83
|
+
items=@db.items
|
84
|
+
|
85
|
+
# 重みファイルの作成
|
86
|
+
# pos,negのTransactionオブジェクトに対してLCMが扱う整数アイテムによるトランザクションファイルを生成する。
|
87
|
+
# この時、pos,negを併合して一つのファイルとして作成され(@wNumTraFile)、
|
88
|
+
# 重みファイル(@weightFile[クラス])の作成は以下の通り。
|
89
|
+
# 1.対象クラスをpos、その他のクラスをnegとする。
|
90
|
+
# 2. negの重みは-1に設定し、posの重みはcalOmegaで計算した値。
|
91
|
+
# 3.@wNumTraFileの各行のクラスに対応した重みデータを出力する(1項目のみのデータ)。
|
92
|
+
@weightFile = Hash.new
|
93
|
+
@posWeight = Hash.new
|
94
|
+
@sigma = Hash.new
|
95
|
+
@db.clsNameRecSize.each {|cName,posSize|
|
96
|
+
@weightFile[cName] = @temp.file
|
97
|
+
@posWeight[cName]=calOmega(posSize)
|
98
|
+
|
99
|
+
f=""
|
100
|
+
f << "mcut -nfno f=#{@db.clsFN} i=#{@db.cFile} |"
|
101
|
+
f << "mchgstr -nfn f=0 c=#{cName}:#{@posWeight[cName]} O=-1 o=#{@weightFile[cName]}"
|
102
|
+
system(f)
|
103
|
+
}
|
104
|
+
|
105
|
+
# アイテムをシンボルから番号に変換する。
|
106
|
+
f=""
|
107
|
+
f << "msortf f=#{@db.itemFN} i=#{@db.file} |"
|
108
|
+
f << "mjoin k=#{@db.itemFN} K=#{items.itemFN} m=#{items.file} f=#{items.idFN} |"
|
109
|
+
f << "mcut f=#{@db.idFN},#{items.idFN} |"
|
110
|
+
f << "msortf f=#{@db.idFN} |"
|
111
|
+
f << "mtra k=#{@db.idFN} f=#{items.idFN} |"
|
112
|
+
f << "mcut f=#{items.idFN} -nfno o=#{@file}"
|
113
|
+
system(f)
|
114
|
+
end
|
115
|
+
|
116
|
+
# 各種パラメータを与えて列挙を実行
|
117
|
+
def enumerate(eArgs)
|
118
|
+
|
119
|
+
pFiles=[]
|
120
|
+
tFiles=[]
|
121
|
+
tf=MCMD::Mtemp.new
|
122
|
+
@db.clsNameRecSize.each{|cName,posSize|
|
123
|
+
negSize=@db.size-posSize
|
124
|
+
|
125
|
+
# minGRの計算
|
126
|
+
if eArgs["minGR"] then
|
127
|
+
@minGR=eArgs["minGR"]
|
128
|
+
else
|
129
|
+
minProb=0.5
|
130
|
+
minProb=eArgs["minProb"] if eArgs["minProb"]
|
131
|
+
if eArgs["uniform"] then
|
132
|
+
@minGR = (minProb/(1-minProb)) * (@db.clsSize-1) # マニュアルの式(4)
|
133
|
+
else
|
134
|
+
@minGR = (minProb/(1-minProb)) * (negSize.to_f/posSize.to_f) # マニュアルの式(4)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# 最小サポートと最小サポート件数
|
139
|
+
# s=0.05
|
140
|
+
# s=c1:0.05,c2:0.06
|
141
|
+
# S=10
|
142
|
+
# S=c1:10,c2:15
|
143
|
+
if eArgs["minCnt"] then
|
144
|
+
if eArgs["minCnt"].class.name=="Hash"
|
145
|
+
@minPos = eArgs["minCnt"][cName]
|
146
|
+
else
|
147
|
+
@minPos = eArgs["minCnt"]
|
148
|
+
end
|
149
|
+
else
|
150
|
+
if eArgs["minSup"].class.name=="Hash"
|
151
|
+
@minPos = (eArgs["minSup"][cName] * posSize.to_f + 0.99).to_i
|
152
|
+
else
|
153
|
+
@minPos = (eArgs["minSup"] * posSize.to_f + 0.99).to_i
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# 最大サポートと最大サポート件数
|
158
|
+
if eArgs["maxCnt"] then
|
159
|
+
if eArgs["maxCnt"].class.name=="Hash"
|
160
|
+
@maxPos = eArgs["maxCnt"][cName]
|
161
|
+
else
|
162
|
+
@maxPos = eArgs["maxCnt"]
|
163
|
+
end
|
164
|
+
elsif eArgs["maxSup"]
|
165
|
+
if eArgs["maxSup"].class.name=="Hash"
|
166
|
+
@maxPos = (eArgs["maxSup"][cName] * posSize.to_f + 0.99).to_i
|
167
|
+
else
|
168
|
+
p posSize
|
169
|
+
@maxPos = (eArgs["maxSup"] * posSize.to_f + 0.99).to_i
|
170
|
+
end
|
171
|
+
else
|
172
|
+
@maxPos = posSize.to_f
|
173
|
+
end
|
174
|
+
|
175
|
+
@sigma[cName] = calSigma(@minPos,@minGR,posSize,negSize)
|
176
|
+
|
177
|
+
# lcmのパラメータ設定と実行
|
178
|
+
lcmout = tf.file # lcm出力ファイル
|
179
|
+
# 頻出パターンがなかった場合、lcm出力ファイルが生成されないので
|
180
|
+
# そのときのために空ファイルを生成しておいく。
|
181
|
+
system("touch #{lcmout}")
|
182
|
+
|
183
|
+
run=""
|
184
|
+
run << "#{eArgs["type"]}IA"
|
185
|
+
run << " -U #{@maxCnt}" if @maxCnt # windowサイズ上限
|
186
|
+
run << " -l #{eArgs['minLen']}" if eArgs["minLen"] # パターンサイズ下限
|
187
|
+
run << " -u #{eArgs['maxLen']}" if eArgs['maxLen'] # パターンサイズ上限
|
188
|
+
run << " -w #{@weightFile[cName]} #{@file} #{@sigma[cName]} #{lcmout}"
|
189
|
+
|
190
|
+
|
191
|
+
# lcm実行
|
192
|
+
MCMD::msgLog("#{run}")
|
193
|
+
TAKE::run_lcm(run)
|
194
|
+
#system run
|
195
|
+
|
196
|
+
#system("cp #{@file} xxtra_#{cName}")
|
197
|
+
#system("cp #{@weightFile[cName]} xxw_#{cName}")
|
198
|
+
#system("echo '#{run}' >xxscp_#{cName}")
|
199
|
+
|
200
|
+
# パターンのサポートを計算しCSV出力する
|
201
|
+
MCMD::msgLog("output patterns to CSV file ...")
|
202
|
+
pFiles << @temp.file
|
203
|
+
|
204
|
+
transle = @temp.file
|
205
|
+
TAKE::run_lcmtrans(lcmout,"e",transle)
|
206
|
+
|
207
|
+
f=""
|
208
|
+
#f << "lcm_trans #{lcmout} e |" # pattern,countP,countN,size,pid
|
209
|
+
f << "mdelnull f=pattern i=#{transle} |"
|
210
|
+
f << "mcal c='round(${countN},1)' a=neg |"
|
211
|
+
f << "mcal c='round(${countP}/#{@posWeight[cName]},1)' a=pos |"
|
212
|
+
f << "mdelnull f=pattern |"
|
213
|
+
f << "msetstr v=#{cName} a=class |"
|
214
|
+
f << "msetstr v=#{posSize} a=posTotal |"
|
215
|
+
f << "msetstr v=#{@minGR} a=minGR |"
|
216
|
+
f << "mcut f=class,pid,pattern,size,pos,neg,posTotal,minGR o=#{pFiles.last}"
|
217
|
+
system(f)
|
218
|
+
|
219
|
+
s = MCMD::mrecount("i=#{pFiles.last}") # 列挙されたパターンの数
|
220
|
+
MCMD::msgLog("the number of contrast patterns on class `#{cName}' enumerated is #{s}")
|
221
|
+
|
222
|
+
# トランザクション毎に出現するパターンを書き出す
|
223
|
+
MCMD::msgLog("output tid-patterns ...")
|
224
|
+
tFiles << @temp.file
|
225
|
+
|
226
|
+
xxw= tf.file
|
227
|
+
f=""
|
228
|
+
f << "mcut f=#{@db.idFN} i=#{@db.file} |"
|
229
|
+
f << "muniq k=#{@db.idFN} |"
|
230
|
+
f << "mnumber S=0 a=__tid -q |"
|
231
|
+
f << "msortf f=__tid o=#{xxw};"
|
232
|
+
system(f)
|
233
|
+
|
234
|
+
translt = @temp.file
|
235
|
+
TAKE::run_lcmtrans(lcmout,"t",translt)
|
236
|
+
|
237
|
+
f=""
|
238
|
+
#f << "lcm_trans #{lcmout} t |" #__tid,pid
|
239
|
+
f << "msortf f=__tid i=#{translt} |"
|
240
|
+
f << "mjoin k=__tid m=#{xxw} f=#{@db.idFN} |"
|
241
|
+
f << "msetstr v=#{cName} a=class |"
|
242
|
+
f << "mcut f=#{@db.idFN},class,pid o=#{tFiles.last}"
|
243
|
+
system(f)
|
244
|
+
}
|
245
|
+
|
246
|
+
# クラス別のパターンとtid-pidファイルを統合して最終出力
|
247
|
+
@pFile = @temp.file
|
248
|
+
@tFile = @temp.file
|
249
|
+
|
250
|
+
# パターンファイル併合
|
251
|
+
xxpCat = tf.file
|
252
|
+
f=""
|
253
|
+
f << "mcat i=#{pFiles.join(",")} |"
|
254
|
+
f << "msortf f=class,pid |"
|
255
|
+
f << "mnumber s=class,pid S=0 a=ppid o=#{xxpCat}"
|
256
|
+
system(f)
|
257
|
+
|
258
|
+
# パターンファイル計算
|
259
|
+
items=@db.items
|
260
|
+
f=""
|
261
|
+
f << "mcut f=class,ppid:pid,pattern,size,pos,neg,posTotal,minGR i=#{xxpCat} |"
|
262
|
+
f << "msetstr v=#{@db.size} a=total |" # トータル件数
|
263
|
+
f << "mcal c='${total}-${posTotal}' a=negTotal |" # negのトータル件数
|
264
|
+
f << "mcal c='${pos}/${posTotal}' a=support |" # サポートの計算
|
265
|
+
f << "mcal c='if(${neg}==0,1.797693135e+308,(${pos}/${posTotal})/(${neg}/${negTotal}))' a=growthRate |"
|
266
|
+
|
267
|
+
if eArgs["uniform"] then
|
268
|
+
f << "mcal c='(${pos}/${posTotal})/(${pos}/${posTotal}+(#{@db.clsSize}-1)*${neg}/${negTotal})' a=postProb |"
|
269
|
+
else
|
270
|
+
f << "mcal c='${pos}/(${pos}+${neg})' a=postProb |"
|
271
|
+
end
|
272
|
+
f << "msel c='${pos}>=#{@minPos}&&${growthRate}>=${minGR}' |" # minSupとminGRによる選択
|
273
|
+
f << "mvreplace vf=pattern m=#{items.file} K=#{items.idFN} f=#{items.itemFN} |"
|
274
|
+
f << "mcut f=class,pid,pattern,size,pos,neg,posTotal,negTotal,total,support,growthRate,postProb |"
|
275
|
+
f << "mvsort vf=pattern |"
|
276
|
+
f << "msortf f=class%nr,postProb%nr,pos%nr o=#{@pFile}"
|
277
|
+
system(f)
|
278
|
+
|
279
|
+
# アイテムを包含している冗長なタクソノミを削除
|
280
|
+
if items.taxonomy then
|
281
|
+
MCMD::msgLog("reducing redundant rules in terms of taxonomy ...")
|
282
|
+
zdd=ZDD.constant(0)
|
283
|
+
MCMD::Mcsvin.new("i=#{@pFile}"){|csv|
|
284
|
+
csv.each{|fldVal|
|
285
|
+
pat=fldVal['pattern']
|
286
|
+
zdd=zdd+ZDD.itemset(pat)
|
287
|
+
}
|
288
|
+
}
|
289
|
+
zdd=reduceTaxo(zdd,@db.items)
|
290
|
+
|
291
|
+
xxp1=tf.file
|
292
|
+
xxp2=tf.file
|
293
|
+
xxp3=tf.file
|
294
|
+
zdd.csvout(xxp1)
|
295
|
+
|
296
|
+
f=""
|
297
|
+
f << "mcut -nfni f=1:pattern i=#{xxp1} |"
|
298
|
+
f << "mvsort vf=pattern |"
|
299
|
+
f << "msortf f=pattern o=#{xxp2}"
|
300
|
+
system(f)
|
301
|
+
|
302
|
+
f=""
|
303
|
+
f << "msortf f=pattern i=#{@pFile} |"
|
304
|
+
f << "mcommon k=pattern m=#{xxp2} |"
|
305
|
+
f << "msortf f=class%nr,postProb%nr,pos%nr o=#{xxp3}"
|
306
|
+
system(f)
|
307
|
+
system "mv #{xxp3} #{@pFile}"
|
308
|
+
end
|
309
|
+
|
310
|
+
# 列挙されたパターンを含むtraのみ選択するためのマスタ
|
311
|
+
xxp4=tf.file
|
312
|
+
f=""
|
313
|
+
f << "mcut f=class,pid i=#{@pFile} |"
|
314
|
+
f << "msortf f=class,pid o=#{xxp4}"
|
315
|
+
system(f)
|
316
|
+
|
317
|
+
# tid-pidファイル計算
|
318
|
+
f=""
|
319
|
+
f << "mcat i=#{tFiles.join(",")} |"
|
320
|
+
f << "msortf f=class,pid |"
|
321
|
+
f << "mjoin k=class,pid m=#{xxpCat} f=ppid |" # 全クラス統一pid(ppid)結合
|
322
|
+
f << "msortf f=class,ppid |"
|
323
|
+
f << "mcommon k=class,ppid K=class,pid m=#{xxp4} |" # 列挙されたパターンの選択
|
324
|
+
f << "mcut f=#{@db.idFN},class,ppid:pid |"
|
325
|
+
f << "msortf f=#{@db.idFN},class,pid o=#{@tFile}"
|
326
|
+
system(f)
|
327
|
+
|
328
|
+
@size = MCMD::mrecount("i=#{@pFile}") # 列挙されたパターンの数
|
329
|
+
MCMD::msgLog("the number of emerging patterns enumerated is #{@size}")
|
330
|
+
end
|
331
|
+
|
332
|
+
def output(outpath)
|
333
|
+
system "mv #{@pFile} #{outpath}/patterns.csv"
|
334
|
+
system "mv #{@tFile} #{outpath}/tid_pats.csv"
|
335
|
+
end
|
336
|
+
end
|
337
|
+
|
338
|
+
end #module
|