nysol-take 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. checksums.yaml +7 -0
  2. data/bin/mbiclique.rb +317 -0
  3. data/bin/mbipolish.rb +362 -0
  4. data/bin/mccomp.rb +235 -0
  5. data/bin/mclique.rb +295 -0
  6. data/bin/mclique2g.rb +105 -0
  7. data/bin/mcliqueInfo.rb +203 -0
  8. data/bin/mfriends.rb +202 -0
  9. data/bin/mgdiff.rb +252 -0
  10. data/bin/mhifriend.rb +456 -0
  11. data/bin/mhipolish.rb +465 -0
  12. data/bin/mitemset.rb +168 -0
  13. data/bin/mpal.rb +410 -0
  14. data/bin/mpolishing.rb +399 -0
  15. data/bin/msequence.rb +165 -0
  16. data/bin/mtra2g.rb +476 -0
  17. data/bin/mtra2gc.rb +360 -0
  18. data/ext/grhfilrun/extconf.rb +12 -0
  19. data/ext/grhfilrun/grhfilrun.c +85 -0
  20. data/ext/grhfilrun/src/_sspc.c +358 -0
  21. data/ext/grhfilrun/src/aheap.c +545 -0
  22. data/ext/grhfilrun/src/aheap.h +251 -0
  23. data/ext/grhfilrun/src/base.c +92 -0
  24. data/ext/grhfilrun/src/base.h +59 -0
  25. data/ext/grhfilrun/src/fstar.c +497 -0
  26. data/ext/grhfilrun/src/fstar.h +80 -0
  27. data/ext/grhfilrun/src/grhfil.c +214 -0
  28. data/ext/grhfilrun/src/itemset.c +713 -0
  29. data/ext/grhfilrun/src/itemset.h +170 -0
  30. data/ext/grhfilrun/src/problem.c +415 -0
  31. data/ext/grhfilrun/src/problem.h +179 -0
  32. data/ext/grhfilrun/src/queue.c +533 -0
  33. data/ext/grhfilrun/src/queue.h +182 -0
  34. data/ext/grhfilrun/src/sample.c +19 -0
  35. data/ext/grhfilrun/src/sspc.c +597 -0
  36. data/ext/grhfilrun/src/sspc2.c +491 -0
  37. data/ext/grhfilrun/src/stdlib2.c +1482 -0
  38. data/ext/grhfilrun/src/stdlib2.h +892 -0
  39. data/ext/grhfilrun/src/trsact.c +817 -0
  40. data/ext/grhfilrun/src/trsact.h +160 -0
  41. data/ext/grhfilrun/src/vec.c +745 -0
  42. data/ext/grhfilrun/src/vec.h +172 -0
  43. data/ext/lcmrun/extconf.rb +20 -0
  44. data/ext/lcmrun/lcmrun.cpp +99 -0
  45. data/ext/lcmrun/src/aheap.c +216 -0
  46. data/ext/lcmrun/src/aheap.h +111 -0
  47. data/ext/lcmrun/src/base.c +92 -0
  48. data/ext/lcmrun/src/base.h +59 -0
  49. data/ext/lcmrun/src/itemset.c +496 -0
  50. data/ext/lcmrun/src/itemset.h +157 -0
  51. data/ext/lcmrun/src/lcm.c +427 -0
  52. data/ext/lcmrun/src/problem.c +349 -0
  53. data/ext/lcmrun/src/problem.h +177 -0
  54. data/ext/lcmrun/src/queue.c +528 -0
  55. data/ext/lcmrun/src/queue.h +176 -0
  56. data/ext/lcmrun/src/sgraph.c +359 -0
  57. data/ext/lcmrun/src/sgraph.h +173 -0
  58. data/ext/lcmrun/src/stdlib2.c +1282 -0
  59. data/ext/lcmrun/src/stdlib2.h +823 -0
  60. data/ext/lcmrun/src/trsact.c +747 -0
  61. data/ext/lcmrun/src/trsact.h +159 -0
  62. data/ext/lcmrun/src/vec.c +731 -0
  63. data/ext/lcmrun/src/vec.h +171 -0
  64. data/ext/lcmseq0run/extconf.rb +20 -0
  65. data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
  66. data/ext/lcmseq0run/src/aheap.c +216 -0
  67. data/ext/lcmseq0run/src/aheap.h +111 -0
  68. data/ext/lcmseq0run/src/base.c +92 -0
  69. data/ext/lcmseq0run/src/base.h +59 -0
  70. data/ext/lcmseq0run/src/itemset.c +518 -0
  71. data/ext/lcmseq0run/src/itemset.h +157 -0
  72. data/ext/lcmseq0run/src/itemset_zero.c +522 -0
  73. data/ext/lcmseq0run/src/lcm_seq.c +446 -0
  74. data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
  75. data/ext/lcmseq0run/src/problem.c +439 -0
  76. data/ext/lcmseq0run/src/problem.h +179 -0
  77. data/ext/lcmseq0run/src/problem_zero.c +439 -0
  78. data/ext/lcmseq0run/src/queue.c +533 -0
  79. data/ext/lcmseq0run/src/queue.h +182 -0
  80. data/ext/lcmseq0run/src/stdlib2.c +1350 -0
  81. data/ext/lcmseq0run/src/stdlib2.h +864 -0
  82. data/ext/lcmseq0run/src/trsact.c +747 -0
  83. data/ext/lcmseq0run/src/trsact.h +159 -0
  84. data/ext/lcmseq0run/src/vec.c +779 -0
  85. data/ext/lcmseq0run/src/vec.h +172 -0
  86. data/ext/lcmseqrun/extconf.rb +20 -0
  87. data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
  88. data/ext/lcmseqrun/src/aheap.c +216 -0
  89. data/ext/lcmseqrun/src/aheap.h +111 -0
  90. data/ext/lcmseqrun/src/base.c +92 -0
  91. data/ext/lcmseqrun/src/base.h +59 -0
  92. data/ext/lcmseqrun/src/itemset.c +518 -0
  93. data/ext/lcmseqrun/src/itemset.h +157 -0
  94. data/ext/lcmseqrun/src/itemset_zero.c +522 -0
  95. data/ext/lcmseqrun/src/lcm_seq.c +447 -0
  96. data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
  97. data/ext/lcmseqrun/src/problem.c +439 -0
  98. data/ext/lcmseqrun/src/problem.h +179 -0
  99. data/ext/lcmseqrun/src/problem_zero.c +439 -0
  100. data/ext/lcmseqrun/src/queue.c +533 -0
  101. data/ext/lcmseqrun/src/queue.h +182 -0
  102. data/ext/lcmseqrun/src/stdlib2.c +1350 -0
  103. data/ext/lcmseqrun/src/stdlib2.h +864 -0
  104. data/ext/lcmseqrun/src/trsact.c +747 -0
  105. data/ext/lcmseqrun/src/trsact.h +159 -0
  106. data/ext/lcmseqrun/src/vec.c +779 -0
  107. data/ext/lcmseqrun/src/vec.h +172 -0
  108. data/ext/lcmtransrun/extconf.rb +18 -0
  109. data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
  110. data/ext/macerun/extconf.rb +20 -0
  111. data/ext/macerun/macerun.cpp +57 -0
  112. data/ext/macerun/src/aheap.c +217 -0
  113. data/ext/macerun/src/aheap.h +112 -0
  114. data/ext/macerun/src/itemset.c +491 -0
  115. data/ext/macerun/src/itemset.h +158 -0
  116. data/ext/macerun/src/mace.c +503 -0
  117. data/ext/macerun/src/problem.c +346 -0
  118. data/ext/macerun/src/problem.h +174 -0
  119. data/ext/macerun/src/queue.c +529 -0
  120. data/ext/macerun/src/queue.h +177 -0
  121. data/ext/macerun/src/sgraph.c +360 -0
  122. data/ext/macerun/src/sgraph.h +174 -0
  123. data/ext/macerun/src/stdlib2.c +993 -0
  124. data/ext/macerun/src/stdlib2.h +811 -0
  125. data/ext/macerun/src/vec.c +634 -0
  126. data/ext/macerun/src/vec.h +170 -0
  127. data/ext/sspcrun/extconf.rb +20 -0
  128. data/ext/sspcrun/src/_sspc.c +358 -0
  129. data/ext/sspcrun/src/aheap.c +545 -0
  130. data/ext/sspcrun/src/aheap.h +251 -0
  131. data/ext/sspcrun/src/base.c +92 -0
  132. data/ext/sspcrun/src/base.h +59 -0
  133. data/ext/sspcrun/src/fstar.c +496 -0
  134. data/ext/sspcrun/src/fstar.h +80 -0
  135. data/ext/sspcrun/src/grhfil.c +213 -0
  136. data/ext/sspcrun/src/itemset.c +713 -0
  137. data/ext/sspcrun/src/itemset.h +170 -0
  138. data/ext/sspcrun/src/problem.c +415 -0
  139. data/ext/sspcrun/src/problem.h +179 -0
  140. data/ext/sspcrun/src/queue.c +533 -0
  141. data/ext/sspcrun/src/queue.h +182 -0
  142. data/ext/sspcrun/src/sample.c +19 -0
  143. data/ext/sspcrun/src/sspc.c +598 -0
  144. data/ext/sspcrun/src/sspc2.c +491 -0
  145. data/ext/sspcrun/src/stdlib2.c +1482 -0
  146. data/ext/sspcrun/src/stdlib2.h +892 -0
  147. data/ext/sspcrun/src/trsact.c +817 -0
  148. data/ext/sspcrun/src/trsact.h +160 -0
  149. data/ext/sspcrun/src/vec.c +745 -0
  150. data/ext/sspcrun/src/vec.h +172 -0
  151. data/ext/sspcrun/sspcrun.cpp +54 -0
  152. data/lib/nysol/enumLcmEp.rb +338 -0
  153. data/lib/nysol/enumLcmEsp.rb +284 -0
  154. data/lib/nysol/enumLcmIs.rb +275 -0
  155. data/lib/nysol/enumLcmSeq.rb +143 -0
  156. data/lib/nysol/items.rb +201 -0
  157. data/lib/nysol/seqDB.rb +256 -0
  158. data/lib/nysol/take.rb +39 -0
  159. data/lib/nysol/taxonomy.rb +113 -0
  160. data/lib/nysol/traDB.rb +257 -0
  161. metadata +239 -0
@@ -0,0 +1,172 @@
1
+ /* library for sparse vector */
2
+ /* Takeaki Uno 27/Dec/2008 */
3
+
4
+ #ifndef _vec_h_
5
+ #define _vec_h_
6
+
7
+ //#define USE_MATH
8
+
9
+ #include"math.h"
10
+ #include"queue.h"
11
+
12
+ #ifndef SVEC_VAL
13
+ #ifdef SVEC_VAL_INT
14
+ #define SVEC_VAL int
15
+ #define SVEC_VAL2 LONG
16
+ #define SVEC_VAL_END INTHUGE
17
+ #define SVEC_VAL2_END LONGHUGE
18
+ #define SVEC_VALF "%d"
19
+ #else
20
+ #define SVEC_VAL double
21
+ #define SVEC_VAL2 double
22
+ #define SVEC_VAL_END DOUBLEHUGE
23
+ #define SVEC_VAL2_END DOUBLEHUGE
24
+ #define SVEC_VALF "%f"
25
+ #endif
26
+ #endif
27
+
28
+ #define VEC_LOAD_BIN 16777216 // read binary file
29
+ #define VEC_LOAD_BIN2 33554432 // read binary file with 2byte for each number
30
+ #define VEC_LOAD_BIN4 67108864 // read binary file with 4byte for each number
31
+ #define VEC_LOAD_CENTERIZE 134217728 // read binary file, and minus the half(128) from each number
32
+ #define VEC_NORMALIZE 268435456 // read binary file, and minus the half(128) from each number
33
+
34
+ /* matrix */
35
+ typedef struct {
36
+ unsigned char type; // mark to identify type of the structure
37
+ char *fname, *wfname; // input/weight file name
38
+ int flag; // flag
39
+
40
+ VEC *v;
41
+ VEC_ID end;
42
+ VEC_ID t;
43
+ VEC_VAL *buf, *buf2;
44
+ VEC_ID clms;
45
+ size_t eles;
46
+ VEC_VAL *buf_org, *buf2_org;
47
+ } MAT;
48
+
49
+ /* sparse vector, element */
50
+ typedef struct {
51
+ QUEUE_ID i;
52
+ SVEC_VAL a;
53
+ } SVEC_ELE;
54
+
55
+ /* sparse vector, vector */
56
+ typedef struct {
57
+ unsigned char type; // mark to identify type of the structure
58
+ SVEC_ELE *v;
59
+ VEC_ID end;
60
+ VEC_ID t;
61
+ } SVEC;
62
+
63
+ /* sparse vector, matrix */
64
+ typedef struct {
65
+ unsigned char type; // mark to identify type of the structure
66
+ char *fname, *wfname; // input/weight file name
67
+ int flag; // flag
68
+
69
+ SVEC *v;
70
+ VEC_ID end;
71
+ VEC_ID t;
72
+ SVEC_ELE *buf, *buf2;
73
+ VEC_ID clms;
74
+ size_t eles, ele_end;
75
+ } SMAT;
76
+
77
+ /* set family */
78
+ typedef struct {
79
+ unsigned char type; // mark to identify type of the structure
80
+ char *fname, *wfname; // input/weight file name
81
+ int flag; // flag
82
+
83
+ QUEUE *v;
84
+ VEC_ID end;
85
+ VEC_ID t;
86
+ QUEUE_INT *buf, *buf2;
87
+ VEC_ID clms;
88
+ size_t eles, ele_end;
89
+ WEIGHT *cw, *rw, **w, *wbuf;
90
+ int unit;
91
+ char *cwfname, *rwfname; // weight file name
92
+ PERM *rperm, *cperm; // row permutation
93
+ } SETFAMILY;
94
+
95
+ #define INIT_SETFAMILY_ {TYPE_SETFAMILY,NULL,NULL,0,NULL,0,0,NULL,NULL,0,0,0,NULL,NULL,NULL,NULL,sizeof(QUEUE_INT),NULL,NULL,NULL,NULL}
96
+
97
+ extern MAT INIT_MAT;
98
+ extern SVEC INIT_SVEC;
99
+ extern SMAT INIT_SMAT;
100
+ extern SETFAMILY INIT_SETFAMILY;
101
+
102
+ QSORT_TYPE_HEADER (SVEC_VAL, SVEC_VAL)
103
+ QSORT_TYPE_HEADER (SVEC_VAL2, SVEC_VAL2)
104
+
105
+ #define ARY_QUEUE_INPRO(f,U,V) do{(f)=0;FLOOP(common_QUEUE_ID, 0, (QUEUE_ID)(U).t)(f)+=(V)[(U).v[common_QUEUE_ID]];}while(0)
106
+ #define ARY_SVEC_INPRO(f,U,V) do{(f)=0;FLOOP(common_VEC_ID, 0, (VEC_ID)(U).t)(f)+=((double)(U).v[common_VEC_ID].a)*(V)[(U).v[common_VEC_ID].i];}while(0)
107
+
108
+ /* terminate routine for VEC */
109
+ void VEC_end (VEC *V);
110
+ void MAT_end (MAT *M);
111
+ void SVEC_end (SVEC *V);
112
+ void SMAT_end (SMAT *M);
113
+ void SETFAMILY_end (SETFAMILY *M);
114
+
115
+ /* allocate memory according to rows and rowt */
116
+ void VEC_alloc (VEC *V, VEC_ID clms);
117
+ void MAT_alloc (MAT *M, VEC_ID rows, VEC_ID clms);
118
+ void SVEC_alloc (SVEC *V, VEC_ID end);
119
+ void SMAT_alloc (SMAT *M, VEC_ID rows, VEC_ID *rowt, VEC_ID clms, size_t eles);
120
+ void SETFAMILY_alloc (SETFAMILY *M, VEC_ID rows, VEC_ID *rowt, VEC_ID clms, size_t eles);
121
+ void SETFAMILY_alloc_weight (SETFAMILY *M, QUEUE_ID *t);
122
+
123
+ /* count/read the number in file for MAT */
124
+ /* if *rows>0, only read count the numbers in a row, for the first scan. */
125
+ void MAT_load_bin (MAT *M, FILE2 *fp, int unit);
126
+ void MAT_file_load (MAT *M, FILE2 *fp);
127
+ void MAT_load (MAT *M);
128
+ void SMAT_load (SMAT *M);
129
+ void SETFAMILY_load (SETFAMILY *M);
130
+ void SETFAMILY_load_weight (SETFAMILY *M);
131
+ void SETFAMILY_load_row_weight (SETFAMILY *M);
132
+ void SETFAMILY_load_column_weight (SETFAMILY *M);
133
+
134
+ void MAT_print (FILE *fp, MAT *M);
135
+ void SVEC_print (FILE *fp, SVEC *M);
136
+ void SMAT_print (FILE *fp, SMAT *M);
137
+ void SETFAMILY_print (FILE *fp, SETFAMILY *M);
138
+ void SETFAMILY_print_weight (FILE *fp, SETFAMILY *M);
139
+
140
+
141
+ /* norm, normalization **************************/
142
+ double SVEC_norm (SVEC *V);
143
+ void SVEC_normalize (SVEC *V);
144
+
145
+ /* inner product **************************/
146
+ SVEC_VAL2 SVEC_inpro (SVEC *V1, SVEC *V2);
147
+
148
+ /** Euclidean distance routines *********************************/
149
+ double VEC_eucdist (VEC *V1, VEC *V2);
150
+ double SVEC_eucdist (SVEC *V1, SVEC *V2);
151
+ double VEC_SVEC_eucdist (VEC *V1, SVEC *V2);
152
+ double QUEUE_eucdist (QUEUE *Q1, QUEUE *Q2);
153
+ double VEC_QUEUE_eucdist (VEC *V, QUEUE *Q);
154
+
155
+ void VEC_rand_gaussian (VEC *V);
156
+
157
+ double VEC_linfdist (VEC *V1, VEC *V2);
158
+
159
+ /* compute the inner product, Euclidean distance for multi vector */
160
+ double MVEC_norm (void *V);
161
+ double MVEC_inpro (void *V, void *U);
162
+ double MVEC_double_inpro (void *V, double *p);
163
+ double MVEC_eucdist (void *V, void *U);
164
+
165
+ /* compute the inner product, euclidean distance for i,jth vector */
166
+ double MMAT_inpro_ij (void *M, int i, int j);
167
+ double MMAT_double_inpro_i (void *M, int i, double *p);
168
+ double MMAT_eucdist_ij (void *M, int i, int j);
169
+ double MMAT_norm_i (void *M, int i);
170
+
171
+
172
+ #endif
@@ -0,0 +1,54 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <unistd.h>
4
+ #include <fcntl.h>
5
+ #include <sys/stat.h>
6
+ #include <string>
7
+ #include <ruby.h>
8
+ #include "src/sspc.c"
9
+ #include <kgMethod.h>
10
+
11
+ extern "C" {
12
+ void Init_sspcrun(void);
13
+ }
14
+
15
+ VALUE sspcrun(VALUE self,VALUE argvV){
16
+
17
+ string argstr=RSTRING_PTR(argvV);
18
+ vector<char *> opts = kglib::splitToken(const_cast<char*>(argstr.c_str()), ' ',true);
19
+
20
+ // 引数文字列へのポインタの領域はここでauto変数に確保する
21
+ kglib::kgAutoPtr2<char*> argv;
22
+ char** vv;
23
+ try{
24
+ argv.set(new char*[opts.size()+1]);
25
+ vv = argv.get();
26
+ }catch(...){
27
+ rb_raise(rb_eRuntimeError,"memory allocation error");
28
+ }
29
+
30
+ // vv配列0番目はコマンド名
31
+ vv[0]=const_cast<char*>("sspc");
32
+
33
+ size_t vvSize;
34
+ for(vvSize=0; vvSize<opts.size(); vvSize++){
35
+ vv[vvSize+1] = opts.at(vvSize);
36
+ }
37
+ vvSize+=1;
38
+
39
+ SSPC_main(vvSize,vv);
40
+ return Qtrue;
41
+ }
42
+
43
+
44
+ // -----------------------------------------------------------------------------
45
+ // ruby Mcsvin クラス init
46
+ // -----------------------------------------------------------------------------
47
+ void Init_sspcrun(void)
48
+ {
49
+ // モジュール定義:MCMD::xxxxの部分
50
+ VALUE mtake=rb_define_module("TAKE");
51
+ rb_define_module_function(mtake,"run_sspc" , (VALUE (*)(...))sspcrun,1);
52
+ }
53
+
54
+
@@ -0,0 +1,338 @@
1
+ #!/usr/bin/env ruby
2
+ require "rubygems"
3
+ require "nysol/mcmd"
4
+ require "nysol/take"
5
+
6
+ require "nysol/traDB.rb"
7
+
8
+ module TAKE
9
+
10
+ #========================================================================
11
+ # 列挙関数:lcm 利用DB:TraDB
12
+ #========================================================================
13
+ class LcmEp
14
+ attr_reader :size # 列挙されたパターン数
15
+ attr_reader :pFile
16
+ attr_reader :tFile
17
+
18
+ @@intMax=2147483646
19
+ #@@intMax=100
20
+
21
+ # posトランザクションの重み計算
22
+ # マニュアルの式(10)
23
+ def calOmega(posCnt)
24
+ return @@intMax/posCnt
25
+ end
26
+
27
+ # LCM最小サポートの計算
28
+ # マニュアルの式(9)
29
+ def calSigma(minPos,minGR,posCnt,negCnt)
30
+ omegaF=@@intMax.to_f/posCnt.to_f
31
+ beta=minPos
32
+ w=posCnt.to_f/negCnt.to_f
33
+ #puts "omegaF=#{omegaF}"
34
+ #puts "minPos=#{minPos}"
35
+ #puts "beta=#{beta}"
36
+ #puts "posCnt=#{posCnt}"
37
+ #puts "negCnt=#{negCnt}"
38
+ #puts "w=#{w}"
39
+ sigma=(beta*(omegaF-w/minGR)).to_i # 切り捨て
40
+ sigma=1 if sigma<=0
41
+ return sigma
42
+ end
43
+
44
+ def reduceTaxo(pat,items)
45
+ tf=MCMD::Mtemp.new
46
+
47
+ if items.taxonomy==nil then
48
+ return pat
49
+ end
50
+
51
+ xxrt = tf.file
52
+ taxo=items.taxonomy
53
+ f=""
54
+ f << "mtrafld f=#{taxo.itemFN},#{taxo.taxoFN} -valOnly a=__fld i=#{taxo.file} o=#{xxrt}"
55
+ system(f)
56
+
57
+ # xxrtの内容:oyakoに親子関係にあるアイテム集合のリストが格納される
58
+ # __fld
59
+ # A X
60
+ # B X
61
+ # C Y
62
+ # D Z
63
+ # E Z
64
+ # F Z
65
+ oyako=ZDD.constant(0)
66
+ MCMD::Mcsvin.new("i=#{xxrt}"){|csv|
67
+ csv.each{|fldVal|
68
+ items=fldVal["__fld"]
69
+ oyako=oyako+ZDD.itemset(items)
70
+ }
71
+ }
72
+
73
+ # 親子リストにあるアイテム集合を含むパターンを削除する
74
+ pat=pat.restrict(oyako).iif(0,pat)
75
+
76
+ return pat
77
+ end
78
+
79
+ def initialize(db)
80
+ @temp=MCMD::Mtemp.new
81
+ @db = db # 入力データベース
82
+ @file=@temp.file
83
+ items=@db.items
84
+
85
+ # 重みファイルの作成
86
+ # pos,negのTransactionオブジェクトに対してLCMが扱う整数アイテムによるトランザクションファイルを生成する。
87
+ # この時、pos,negを併合して一つのファイルとして作成され(@wNumTraFile)、
88
+ # 重みファイル(@weightFile[クラス])の作成は以下の通り。
89
+ # 1.対象クラスをpos、その他のクラスをnegとする。
90
+ # 2. negの重みは-1に設定し、posの重みはcalOmegaで計算した値。
91
+ # 3.@wNumTraFileの各行のクラスに対応した重みデータを出力する(1項目のみのデータ)。
92
+ @weightFile = Hash.new
93
+ @posWeight = Hash.new
94
+ @sigma = Hash.new
95
+ @db.clsNameRecSize.each {|cName,posSize|
96
+ @weightFile[cName] = @temp.file
97
+ @posWeight[cName]=calOmega(posSize)
98
+
99
+ f=""
100
+ f << "mcut -nfno f=#{@db.clsFN} i=#{@db.cFile} |"
101
+ f << "mchgstr -nfn f=0 c=#{cName}:#{@posWeight[cName]} O=-1 o=#{@weightFile[cName]}"
102
+ system(f)
103
+ }
104
+
105
+ # アイテムをシンボルから番号に変換する。
106
+ f=""
107
+ f << "msortf f=#{@db.itemFN} i=#{@db.file} |"
108
+ f << "mjoin k=#{@db.itemFN} K=#{items.itemFN} m=#{items.file} f=#{items.idFN} |"
109
+ f << "mcut f=#{@db.idFN},#{items.idFN} |"
110
+ f << "msortf f=#{@db.idFN} |"
111
+ f << "mtra k=#{@db.idFN} f=#{items.idFN} |"
112
+ f << "mcut f=#{items.idFN} -nfno o=#{@file}"
113
+ system(f)
114
+ end
115
+
116
+ # 各種パラメータを与えて列挙を実行
117
+ def enumerate(eArgs)
118
+
119
+ pFiles=[]
120
+ tFiles=[]
121
+ tf=MCMD::Mtemp.new
122
+ @db.clsNameRecSize.each{|cName,posSize|
123
+ negSize=@db.size-posSize
124
+
125
+ # minGRの計算
126
+ if eArgs["minGR"] then
127
+ @minGR=eArgs["minGR"]
128
+ else
129
+ minProb=0.5
130
+ minProb=eArgs["minProb"] if eArgs["minProb"]
131
+ if eArgs["uniform"] then
132
+ @minGR = (minProb/(1-minProb)) * (@db.clsSize-1) # マニュアルの式(4)
133
+ else
134
+ @minGR = (minProb/(1-minProb)) * (negSize.to_f/posSize.to_f) # マニュアルの式(4)
135
+ end
136
+ end
137
+
138
+ # 最小サポートと最小サポート件数
139
+ # s=0.05
140
+ # s=c1:0.05,c2:0.06
141
+ # S=10
142
+ # S=c1:10,c2:15
143
+ if eArgs["minCnt"] then
144
+ if eArgs["minCnt"].class.name=="Hash"
145
+ @minPos = eArgs["minCnt"][cName]
146
+ else
147
+ @minPos = eArgs["minCnt"]
148
+ end
149
+ else
150
+ if eArgs["minSup"].class.name=="Hash"
151
+ @minPos = (eArgs["minSup"][cName] * posSize.to_f + 0.99).to_i
152
+ else
153
+ @minPos = (eArgs["minSup"] * posSize.to_f + 0.99).to_i
154
+ end
155
+ end
156
+
157
+ # 最大サポートと最大サポート件数
158
+ if eArgs["maxCnt"] then
159
+ if eArgs["maxCnt"].class.name=="Hash"
160
+ @maxPos = eArgs["maxCnt"][cName]
161
+ else
162
+ @maxPos = eArgs["maxCnt"]
163
+ end
164
+ elsif eArgs["maxSup"]
165
+ if eArgs["maxSup"].class.name=="Hash"
166
+ @maxPos = (eArgs["maxSup"][cName] * posSize.to_f + 0.99).to_i
167
+ else
168
+ p posSize
169
+ @maxPos = (eArgs["maxSup"] * posSize.to_f + 0.99).to_i
170
+ end
171
+ else
172
+ @maxPos = posSize.to_f
173
+ end
174
+
175
+ @sigma[cName] = calSigma(@minPos,@minGR,posSize,negSize)
176
+
177
+ # lcmのパラメータ設定と実行
178
+ lcmout = tf.file # lcm出力ファイル
179
+ # 頻出パターンがなかった場合、lcm出力ファイルが生成されないので
180
+ # そのときのために空ファイルを生成しておいく。
181
+ system("touch #{lcmout}")
182
+
183
+ run=""
184
+ run << "#{eArgs["type"]}IA"
185
+ run << " -U #{@maxCnt}" if @maxCnt # windowサイズ上限
186
+ run << " -l #{eArgs['minLen']}" if eArgs["minLen"] # パターンサイズ下限
187
+ run << " -u #{eArgs['maxLen']}" if eArgs['maxLen'] # パターンサイズ上限
188
+ run << " -w #{@weightFile[cName]} #{@file} #{@sigma[cName]} #{lcmout}"
189
+
190
+
191
+ # lcm実行
192
+ MCMD::msgLog("#{run}")
193
+ TAKE::run_lcm(run)
194
+ #system run
195
+
196
+ #system("cp #{@file} xxtra_#{cName}")
197
+ #system("cp #{@weightFile[cName]} xxw_#{cName}")
198
+ #system("echo '#{run}' >xxscp_#{cName}")
199
+
200
+ # パターンのサポートを計算しCSV出力する
201
+ MCMD::msgLog("output patterns to CSV file ...")
202
+ pFiles << @temp.file
203
+
204
+ transle = @temp.file
205
+ TAKE::run_lcmtrans(lcmout,"e",transle)
206
+
207
+ f=""
208
+ #f << "lcm_trans #{lcmout} e |" # pattern,countP,countN,size,pid
209
+ f << "mdelnull f=pattern i=#{transle} |"
210
+ f << "mcal c='round(${countN},1)' a=neg |"
211
+ f << "mcal c='round(${countP}/#{@posWeight[cName]},1)' a=pos |"
212
+ f << "mdelnull f=pattern |"
213
+ f << "msetstr v=#{cName} a=class |"
214
+ f << "msetstr v=#{posSize} a=posTotal |"
215
+ f << "msetstr v=#{@minGR} a=minGR |"
216
+ f << "mcut f=class,pid,pattern,size,pos,neg,posTotal,minGR o=#{pFiles.last}"
217
+ system(f)
218
+
219
+ s = MCMD::mrecount("i=#{pFiles.last}") # 列挙されたパターンの数
220
+ MCMD::msgLog("the number of contrast patterns on class `#{cName}' enumerated is #{s}")
221
+
222
+ # トランザクション毎に出現するパターンを書き出す
223
+ MCMD::msgLog("output tid-patterns ...")
224
+ tFiles << @temp.file
225
+
226
+ xxw= tf.file
227
+ f=""
228
+ f << "mcut f=#{@db.idFN} i=#{@db.file} |"
229
+ f << "muniq k=#{@db.idFN} |"
230
+ f << "mnumber S=0 a=__tid -q |"
231
+ f << "msortf f=__tid o=#{xxw};"
232
+ system(f)
233
+
234
+ translt = @temp.file
235
+ TAKE::run_lcmtrans(lcmout,"t",translt)
236
+
237
+ f=""
238
+ #f << "lcm_trans #{lcmout} t |" #__tid,pid
239
+ f << "msortf f=__tid i=#{translt} |"
240
+ f << "mjoin k=__tid m=#{xxw} f=#{@db.idFN} |"
241
+ f << "msetstr v=#{cName} a=class |"
242
+ f << "mcut f=#{@db.idFN},class,pid o=#{tFiles.last}"
243
+ system(f)
244
+ }
245
+
246
+ # クラス別のパターンとtid-pidファイルを統合して最終出力
247
+ @pFile = @temp.file
248
+ @tFile = @temp.file
249
+
250
+ # パターンファイル併合
251
+ xxpCat = tf.file
252
+ f=""
253
+ f << "mcat i=#{pFiles.join(",")} |"
254
+ f << "msortf f=class,pid |"
255
+ f << "mnumber s=class,pid S=0 a=ppid o=#{xxpCat}"
256
+ system(f)
257
+
258
+ # パターンファイル計算
259
+ items=@db.items
260
+ f=""
261
+ f << "mcut f=class,ppid:pid,pattern,size,pos,neg,posTotal,minGR i=#{xxpCat} |"
262
+ f << "msetstr v=#{@db.size} a=total |" # トータル件数
263
+ f << "mcal c='${total}-${posTotal}' a=negTotal |" # negのトータル件数
264
+ f << "mcal c='${pos}/${posTotal}' a=support |" # サポートの計算
265
+ f << "mcal c='if(${neg}==0,1.797693135e+308,(${pos}/${posTotal})/(${neg}/${negTotal}))' a=growthRate |"
266
+
267
+ if eArgs["uniform"] then
268
+ f << "mcal c='(${pos}/${posTotal})/(${pos}/${posTotal}+(#{@db.clsSize}-1)*${neg}/${negTotal})' a=postProb |"
269
+ else
270
+ f << "mcal c='${pos}/(${pos}+${neg})' a=postProb |"
271
+ end
272
+ f << "msel c='${pos}>=#{@minPos}&&${growthRate}>=${minGR}' |" # minSupとminGRによる選択
273
+ f << "mvreplace vf=pattern m=#{items.file} K=#{items.idFN} f=#{items.itemFN} |"
274
+ f << "mcut f=class,pid,pattern,size,pos,neg,posTotal,negTotal,total,support,growthRate,postProb |"
275
+ f << "mvsort vf=pattern |"
276
+ f << "msortf f=class%nr,postProb%nr,pos%nr o=#{@pFile}"
277
+ system(f)
278
+
279
+ # アイテムを包含している冗長なタクソノミを削除
280
+ if items.taxonomy then
281
+ MCMD::msgLog("reducing redundant rules in terms of taxonomy ...")
282
+ zdd=ZDD.constant(0)
283
+ MCMD::Mcsvin.new("i=#{@pFile}"){|csv|
284
+ csv.each{|fldVal|
285
+ pat=fldVal['pattern']
286
+ zdd=zdd+ZDD.itemset(pat)
287
+ }
288
+ }
289
+ zdd=reduceTaxo(zdd,@db.items)
290
+
291
+ xxp1=tf.file
292
+ xxp2=tf.file
293
+ xxp3=tf.file
294
+ zdd.csvout(xxp1)
295
+
296
+ f=""
297
+ f << "mcut -nfni f=1:pattern i=#{xxp1} |"
298
+ f << "mvsort vf=pattern |"
299
+ f << "msortf f=pattern o=#{xxp2}"
300
+ system(f)
301
+
302
+ f=""
303
+ f << "msortf f=pattern i=#{@pFile} |"
304
+ f << "mcommon k=pattern m=#{xxp2} |"
305
+ f << "msortf f=class%nr,postProb%nr,pos%nr o=#{xxp3}"
306
+ system(f)
307
+ system "mv #{xxp3} #{@pFile}"
308
+ end
309
+
310
+ # 列挙されたパターンを含むtraのみ選択するためのマスタ
311
+ xxp4=tf.file
312
+ f=""
313
+ f << "mcut f=class,pid i=#{@pFile} |"
314
+ f << "msortf f=class,pid o=#{xxp4}"
315
+ system(f)
316
+
317
+ # tid-pidファイル計算
318
+ f=""
319
+ f << "mcat i=#{tFiles.join(",")} |"
320
+ f << "msortf f=class,pid |"
321
+ f << "mjoin k=class,pid m=#{xxpCat} f=ppid |" # 全クラス統一pid(ppid)結合
322
+ f << "msortf f=class,ppid |"
323
+ f << "mcommon k=class,ppid K=class,pid m=#{xxp4} |" # 列挙されたパターンの選択
324
+ f << "mcut f=#{@db.idFN},class,ppid:pid |"
325
+ f << "msortf f=#{@db.idFN},class,pid o=#{@tFile}"
326
+ system(f)
327
+
328
+ @size = MCMD::mrecount("i=#{@pFile}") # 列挙されたパターンの数
329
+ MCMD::msgLog("the number of emerging patterns enumerated is #{@size}")
330
+ end
331
+
332
+ def output(outpath)
333
+ system "mv #{@pFile} #{outpath}/patterns.csv"
334
+ system "mv #{@tFile} #{outpath}/tid_pats.csv"
335
+ end
336
+ end
337
+
338
+ end #module