nysol-take 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (161) hide show
  1. checksums.yaml +7 -0
  2. data/bin/mbiclique.rb +317 -0
  3. data/bin/mbipolish.rb +362 -0
  4. data/bin/mccomp.rb +235 -0
  5. data/bin/mclique.rb +295 -0
  6. data/bin/mclique2g.rb +105 -0
  7. data/bin/mcliqueInfo.rb +203 -0
  8. data/bin/mfriends.rb +202 -0
  9. data/bin/mgdiff.rb +252 -0
  10. data/bin/mhifriend.rb +456 -0
  11. data/bin/mhipolish.rb +465 -0
  12. data/bin/mitemset.rb +168 -0
  13. data/bin/mpal.rb +410 -0
  14. data/bin/mpolishing.rb +399 -0
  15. data/bin/msequence.rb +165 -0
  16. data/bin/mtra2g.rb +476 -0
  17. data/bin/mtra2gc.rb +360 -0
  18. data/ext/grhfilrun/extconf.rb +12 -0
  19. data/ext/grhfilrun/grhfilrun.c +85 -0
  20. data/ext/grhfilrun/src/_sspc.c +358 -0
  21. data/ext/grhfilrun/src/aheap.c +545 -0
  22. data/ext/grhfilrun/src/aheap.h +251 -0
  23. data/ext/grhfilrun/src/base.c +92 -0
  24. data/ext/grhfilrun/src/base.h +59 -0
  25. data/ext/grhfilrun/src/fstar.c +497 -0
  26. data/ext/grhfilrun/src/fstar.h +80 -0
  27. data/ext/grhfilrun/src/grhfil.c +214 -0
  28. data/ext/grhfilrun/src/itemset.c +713 -0
  29. data/ext/grhfilrun/src/itemset.h +170 -0
  30. data/ext/grhfilrun/src/problem.c +415 -0
  31. data/ext/grhfilrun/src/problem.h +179 -0
  32. data/ext/grhfilrun/src/queue.c +533 -0
  33. data/ext/grhfilrun/src/queue.h +182 -0
  34. data/ext/grhfilrun/src/sample.c +19 -0
  35. data/ext/grhfilrun/src/sspc.c +597 -0
  36. data/ext/grhfilrun/src/sspc2.c +491 -0
  37. data/ext/grhfilrun/src/stdlib2.c +1482 -0
  38. data/ext/grhfilrun/src/stdlib2.h +892 -0
  39. data/ext/grhfilrun/src/trsact.c +817 -0
  40. data/ext/grhfilrun/src/trsact.h +160 -0
  41. data/ext/grhfilrun/src/vec.c +745 -0
  42. data/ext/grhfilrun/src/vec.h +172 -0
  43. data/ext/lcmrun/extconf.rb +20 -0
  44. data/ext/lcmrun/lcmrun.cpp +99 -0
  45. data/ext/lcmrun/src/aheap.c +216 -0
  46. data/ext/lcmrun/src/aheap.h +111 -0
  47. data/ext/lcmrun/src/base.c +92 -0
  48. data/ext/lcmrun/src/base.h +59 -0
  49. data/ext/lcmrun/src/itemset.c +496 -0
  50. data/ext/lcmrun/src/itemset.h +157 -0
  51. data/ext/lcmrun/src/lcm.c +427 -0
  52. data/ext/lcmrun/src/problem.c +349 -0
  53. data/ext/lcmrun/src/problem.h +177 -0
  54. data/ext/lcmrun/src/queue.c +528 -0
  55. data/ext/lcmrun/src/queue.h +176 -0
  56. data/ext/lcmrun/src/sgraph.c +359 -0
  57. data/ext/lcmrun/src/sgraph.h +173 -0
  58. data/ext/lcmrun/src/stdlib2.c +1282 -0
  59. data/ext/lcmrun/src/stdlib2.h +823 -0
  60. data/ext/lcmrun/src/trsact.c +747 -0
  61. data/ext/lcmrun/src/trsact.h +159 -0
  62. data/ext/lcmrun/src/vec.c +731 -0
  63. data/ext/lcmrun/src/vec.h +171 -0
  64. data/ext/lcmseq0run/extconf.rb +20 -0
  65. data/ext/lcmseq0run/lcmseq0run.cpp +59 -0
  66. data/ext/lcmseq0run/src/aheap.c +216 -0
  67. data/ext/lcmseq0run/src/aheap.h +111 -0
  68. data/ext/lcmseq0run/src/base.c +92 -0
  69. data/ext/lcmseq0run/src/base.h +59 -0
  70. data/ext/lcmseq0run/src/itemset.c +518 -0
  71. data/ext/lcmseq0run/src/itemset.h +157 -0
  72. data/ext/lcmseq0run/src/itemset_zero.c +522 -0
  73. data/ext/lcmseq0run/src/lcm_seq.c +446 -0
  74. data/ext/lcmseq0run/src/lcm_seq_zero.c +446 -0
  75. data/ext/lcmseq0run/src/problem.c +439 -0
  76. data/ext/lcmseq0run/src/problem.h +179 -0
  77. data/ext/lcmseq0run/src/problem_zero.c +439 -0
  78. data/ext/lcmseq0run/src/queue.c +533 -0
  79. data/ext/lcmseq0run/src/queue.h +182 -0
  80. data/ext/lcmseq0run/src/stdlib2.c +1350 -0
  81. data/ext/lcmseq0run/src/stdlib2.h +864 -0
  82. data/ext/lcmseq0run/src/trsact.c +747 -0
  83. data/ext/lcmseq0run/src/trsact.h +159 -0
  84. data/ext/lcmseq0run/src/vec.c +779 -0
  85. data/ext/lcmseq0run/src/vec.h +172 -0
  86. data/ext/lcmseqrun/extconf.rb +20 -0
  87. data/ext/lcmseqrun/lcmseqrun.cpp +101 -0
  88. data/ext/lcmseqrun/src/aheap.c +216 -0
  89. data/ext/lcmseqrun/src/aheap.h +111 -0
  90. data/ext/lcmseqrun/src/base.c +92 -0
  91. data/ext/lcmseqrun/src/base.h +59 -0
  92. data/ext/lcmseqrun/src/itemset.c +518 -0
  93. data/ext/lcmseqrun/src/itemset.h +157 -0
  94. data/ext/lcmseqrun/src/itemset_zero.c +522 -0
  95. data/ext/lcmseqrun/src/lcm_seq.c +447 -0
  96. data/ext/lcmseqrun/src/lcm_seq_zero.c +446 -0
  97. data/ext/lcmseqrun/src/problem.c +439 -0
  98. data/ext/lcmseqrun/src/problem.h +179 -0
  99. data/ext/lcmseqrun/src/problem_zero.c +439 -0
  100. data/ext/lcmseqrun/src/queue.c +533 -0
  101. data/ext/lcmseqrun/src/queue.h +182 -0
  102. data/ext/lcmseqrun/src/stdlib2.c +1350 -0
  103. data/ext/lcmseqrun/src/stdlib2.h +864 -0
  104. data/ext/lcmseqrun/src/trsact.c +747 -0
  105. data/ext/lcmseqrun/src/trsact.h +159 -0
  106. data/ext/lcmseqrun/src/vec.c +779 -0
  107. data/ext/lcmseqrun/src/vec.h +172 -0
  108. data/ext/lcmtransrun/extconf.rb +18 -0
  109. data/ext/lcmtransrun/lcmtransrun.cpp +264 -0
  110. data/ext/macerun/extconf.rb +20 -0
  111. data/ext/macerun/macerun.cpp +57 -0
  112. data/ext/macerun/src/aheap.c +217 -0
  113. data/ext/macerun/src/aheap.h +112 -0
  114. data/ext/macerun/src/itemset.c +491 -0
  115. data/ext/macerun/src/itemset.h +158 -0
  116. data/ext/macerun/src/mace.c +503 -0
  117. data/ext/macerun/src/problem.c +346 -0
  118. data/ext/macerun/src/problem.h +174 -0
  119. data/ext/macerun/src/queue.c +529 -0
  120. data/ext/macerun/src/queue.h +177 -0
  121. data/ext/macerun/src/sgraph.c +360 -0
  122. data/ext/macerun/src/sgraph.h +174 -0
  123. data/ext/macerun/src/stdlib2.c +993 -0
  124. data/ext/macerun/src/stdlib2.h +811 -0
  125. data/ext/macerun/src/vec.c +634 -0
  126. data/ext/macerun/src/vec.h +170 -0
  127. data/ext/sspcrun/extconf.rb +20 -0
  128. data/ext/sspcrun/src/_sspc.c +358 -0
  129. data/ext/sspcrun/src/aheap.c +545 -0
  130. data/ext/sspcrun/src/aheap.h +251 -0
  131. data/ext/sspcrun/src/base.c +92 -0
  132. data/ext/sspcrun/src/base.h +59 -0
  133. data/ext/sspcrun/src/fstar.c +496 -0
  134. data/ext/sspcrun/src/fstar.h +80 -0
  135. data/ext/sspcrun/src/grhfil.c +213 -0
  136. data/ext/sspcrun/src/itemset.c +713 -0
  137. data/ext/sspcrun/src/itemset.h +170 -0
  138. data/ext/sspcrun/src/problem.c +415 -0
  139. data/ext/sspcrun/src/problem.h +179 -0
  140. data/ext/sspcrun/src/queue.c +533 -0
  141. data/ext/sspcrun/src/queue.h +182 -0
  142. data/ext/sspcrun/src/sample.c +19 -0
  143. data/ext/sspcrun/src/sspc.c +598 -0
  144. data/ext/sspcrun/src/sspc2.c +491 -0
  145. data/ext/sspcrun/src/stdlib2.c +1482 -0
  146. data/ext/sspcrun/src/stdlib2.h +892 -0
  147. data/ext/sspcrun/src/trsact.c +817 -0
  148. data/ext/sspcrun/src/trsact.h +160 -0
  149. data/ext/sspcrun/src/vec.c +745 -0
  150. data/ext/sspcrun/src/vec.h +172 -0
  151. data/ext/sspcrun/sspcrun.cpp +54 -0
  152. data/lib/nysol/enumLcmEp.rb +338 -0
  153. data/lib/nysol/enumLcmEsp.rb +284 -0
  154. data/lib/nysol/enumLcmIs.rb +275 -0
  155. data/lib/nysol/enumLcmSeq.rb +143 -0
  156. data/lib/nysol/items.rb +201 -0
  157. data/lib/nysol/seqDB.rb +256 -0
  158. data/lib/nysol/take.rb +39 -0
  159. data/lib/nysol/taxonomy.rb +113 -0
  160. data/lib/nysol/traDB.rb +257 -0
  161. metadata +239 -0
data/bin/mhifriend.rb ADDED
@@ -0,0 +1,456 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require "rubygems"
5
+ require "nysol/mcmd"
6
+
7
+ # 1.0 initial development: 2016/12/26
8
+ $cmd=$0.sub(/.*\//,"")
9
+ $version="1.0"
10
+
11
+ def help
12
+
13
+ STDERR.puts <<EOF
14
+ ----------------------------
15
+ #{$cmd} version #{$version}
16
+ ----------------------------
17
+ 概要) hierarchical friend: トランザクションデータにfriendによるpolishを階層的に適用する。
18
+
19
+ 書式) #{$cmd} i= tid= item= [class=] [no=] eo= s=|S= [-node_support] [rank=] [sim=] [maxLevel=] [T=] [--help]
20
+ i= : トランザクションデータファイル【必須】
21
+ tid= : トランザクションID項目名【必須】
22
+ item= : アイテム項目名【必須】
23
+ no= : 出力ファイル(節点)
24
+ eo= : 出力ファイル(辺:節点ペア)
25
+ s= : 最小支持度(全トランザクション数に対する割合による指定): 0以上1以下の実数
26
+ S= : 最小支持度(トランザクション数による指定): 1以上の整数
27
+ -node_support : 節点にもs=,S=の条件を適用する。指定しなければ全てのitemを節点として出力する。
28
+ 以上のパラメータ mtra2gc.rbのパラメータであり、詳細は同コマンドヘルプを参照のこと。
29
+
30
+ rank= : 枝を張る条件で、双方向類似枝の上位何個までを選択するか(デフォルト:3)
31
+ sim= : rank=で利用する類似度を指定する。(デフォルト:S)
32
+ 指定できる類似度は以下の3つのいずれか一つ。
33
+ S:Support, J: Jaccard, P:normalized PMI, C:Confidence
34
+
35
+ maxLevel= : 階層化の回数上限(デフォルト:0,収束するまで)
36
+
37
+
38
+ その他
39
+ T= : ワークディレクトリ(default:/tmp)
40
+ --help : ヘルプの表示
41
+
42
+ 入力ファイル形式)
43
+ トランザクションIDとアイテムの2項目によるトランザクションデータ。
44
+
45
+ o=の出力形式)
46
+ 枝ファイル: cluster,node,support,frequency,total
47
+ 節点ファイル: cluster%0,node1%1,node2%2,support(sim=で指定した類似度)
48
+
49
+ 例)
50
+ $ cat tra1.csv
51
+ id,item
52
+ 1,a
53
+ 1,b
54
+ 1,d
55
+ 1,e
56
+ 2,a
57
+ 2,b
58
+ 2,e
59
+ 3,a
60
+ 3,d
61
+ 3,e
62
+ 6,b
63
+ 6,d
64
+ 7,d
65
+ 7,e
66
+ 4,c
67
+ 4,f
68
+ 4,b
69
+ 5,c
70
+ 5,f
71
+ 5,e
72
+ 8,g
73
+ 8,h
74
+ 9,g
75
+ 9,h
76
+ 0,i
77
+ a,j
78
+ a,c
79
+ a,a
80
+
81
+ $ #{$cmd}hifriend.rb i=tra1.csv no=node1.csv eo=edge1.csv tid=id item=item sim=S S=2 rank=3
82
+ $ cat edge.csv
83
+ cluster%0,node1%1,node2%2,support
84
+ #1_1,a,b,0.1818181818
85
+ #1_1,a,d,0.1818181818
86
+ #1_1,a,e,0.2727272727
87
+ #1_1,b,d,0.1818181818
88
+ #1_1,b,e,0.1818181818
89
+ #1_1,d,e,0.2727272727
90
+ #1_2,c,f,0.1818181818
91
+ #1_3,g,h,0.1818181818
92
+ #2_1,#1_1,#1_2,0.2727272727
93
+
94
+ $ cat node.csv
95
+ cluster%0,node%1,support,frequency,total
96
+ ,i,0.09090909091,1,11
97
+ ,j,0.09090909091,1,11
98
+ #1_1,a,0.3636363636,4,11
99
+ #1_1,b,0.3636363636,4,11
100
+ #1_1,d,0.3636363636,4,11
101
+ #1_1,e,0.4545454545,5,11
102
+ #1_2,c,0.2727272727,3,11
103
+ #1_2,f,0.1818181818,2,11
104
+ #1_3,g,0.1818181818,2,11
105
+ #1_3,h,0.1818181818,2,11
106
+ #2_1,#1_1,0.7272727273,8,11
107
+ #2_1,#1_2,0.2727272727,3,11
108
+
109
+ # Copyright(c) NYSOL 2012- All Rights Reserved.
110
+ EOF
111
+ exit
112
+ end
113
+
114
+ def ver()
115
+ STDERR.puts "version #{$version}"
116
+ exit
117
+ end
118
+
119
+ help() if ARGV.size <= 0 or ARGV[0]=="--help"
120
+ ver() if ARGV[0]=="--version"
121
+
122
+ args=MCMD::Margs.new(ARGV,"i=,tid=,item=,no=,eo=,s=,S=,-node_support,rank=,sim=,maxLevel=,-num,-verbose","i=,tid=,item=,eo=,no=")
123
+
124
+ # mcmdのメッセージは警告とエラーのみ
125
+ ENV["KG_VerboseLevel"]="2" unless args.bool("-mcmdenv")
126
+
127
+ #ワークファイルパス
128
+ if args.str("T=")!=nil then
129
+ ENV["KG_TmpPath"] = args.str("T=").sub(/\/$/,"")
130
+ end
131
+ traFile= args.str("i=")
132
+ idFN = args.str("tid=")
133
+ itemFN = args.str("item=")
134
+ numtp = args.bool("-num")
135
+
136
+ onFile = args. file("no=", "w")
137
+ oeFile = args. file("eo=", "w")
138
+
139
+ # mtra2gc parameters
140
+ sp1 = args.str("s=")
141
+ sp2 = args.str("S=")
142
+ node_support=args.bool("-node_support")
143
+
144
+ # firend parameters
145
+ sim = args.str("sim=")
146
+ if sim=="S"
147
+ simStr="support"
148
+ elsif sim=="J"
149
+ simStr="jaccard"
150
+ elsif sim=="P"
151
+ simStr="PMI"
152
+ elsif sim=="C"
153
+ simStr="confidence"
154
+ else
155
+ MCMD::errorLog("sim= takes S, J, P or C")
156
+ raise ArgumentError
157
+ end
158
+ rank = args.str("rank=") # ranking
159
+
160
+ maxLevel = args.int("maxLevel=",0) # ranking
161
+
162
+
163
+ # traファイルから類似度グラフを作成
164
+ def runTra2gc(traFile,paramTra2gc,onFile,oeFile)
165
+ ### mtra2gc.rb
166
+ system "mtra2gc.rb #{paramTra2gc} i=#{traFile} no=#{onFile} eo=#{oeFile}"
167
+ # node1%0,node2%1,support,frequency,frequency1,frequency2,total,lift,jaccard,PMI
168
+ # a,b,0.1818181818,2,4,4,11,1.375,0.3333333333,0.1868039815
169
+ # a,d,0.1818181818,2,4,4,11,1.375,0.3333333333,0.1868039815
170
+ # a,e,0.2727272727,3,4,5,11,1.65,0.5,0.385424341
171
+ # b,d,0.1818181818,2,4,4,11,1.375,0.3333333333,0.1868039815
172
+ # b,e,0.1818181818,2,4,5,11,1.1,0.2857142857,0.05590865902
173
+ # c,f,0.1818181818,2,3,2,11,3.6667,0.6666666667,0.7621554117
174
+ # d,e,0.2727272727,3,4,5,11,1.65,0.5,0.385424341
175
+ # g,h,0.1818181818,2,2,2,11,5.5,1,1
176
+ end
177
+
178
+ def runPolish(simgN,simgE,paramPolish,polishN,polishE)
179
+ ## mpolishs.rb
180
+ system "mfriends.rb -udout -directed ef=node1,node2 nf=node #{paramPolish} ni=#{simgN} ei=#{simgE} eo=#{polishE} no=#{polishN}"
181
+ # polishE
182
+ # node1%0,node2%1,jaccard
183
+ # a,b,0.3333333333
184
+ # a,d,0.3333333333
185
+ # a,e,0.5
186
+ # b,d,0.3333333333
187
+ # b,e,0.2857142857
188
+ # c,f,0.6666666667
189
+ # d,e,0.5
190
+ # g,h,1
191
+ end
192
+
193
+ def runClustering(niFile,eiFile,paramCluster,oFile)
194
+ ### mccomp.rb
195
+ system "mccomp.rb nf=node ef=node1,node2 #{paramCluster} ni=#{niFile} ei=#{eiFile} o=#{oFile}"
196
+ # id%0,node,size
197
+ # 1,a,4
198
+ # 1,b,4
199
+ # 1,d,4
200
+ # 1,e,4
201
+ # 2,c,2
202
+ # 2,f,2
203
+ # 3,g,2
204
+ # 3,h,2
205
+ # 4,i,1
206
+ # 5,j,1
207
+ end
208
+
209
+ def runConvert(traFile,idFN,itemFN,level,clusterFile,mFile,oFile,maxNo)
210
+ temp=MCMD::Mtemp.new
211
+ xxfreq=temp.file
212
+ xxmf0 =temp.file
213
+ xxmf1 =temp.file
214
+ xxmf2 =temp.file
215
+ if maxNo then
216
+ # node-clusterマスター作成
217
+ # (1つのクラスタに1つのnodeはオリジナルアイテムをclusterに)
218
+ system "mcount k=id a=freq i=#{clusterFile} o=#{xxfreq}"
219
+ f=""
220
+ f << "mjoin k=id m=#{xxfreq} f=freq i=#{clusterFile} |"
221
+ f << "mselnum c='(1,]' f=freq u=#{xxmf0} |"
222
+ f << "mnumber k=id -B S=1 a=num |"
223
+ f << "mcal c='${num}+#{maxNo}' a=cluster o=#{xxmf1};"
224
+ f << "mcal c='$s{node}' a=cluster i=#{xxmf0} o=#{xxmf2};"
225
+ f << "mcat f=node,freq,cluster i=#{xxmf1},#{xxmf2} o=#{mFile}"
226
+ system(f)
227
+ f= ""
228
+ f << "mstats c=max f=cluster i=#{xxmf1}|"
229
+ f << "mcut f=cluster -nfno "
230
+ maxNo = `#{f}`.chomp.to_i
231
+ else
232
+
233
+ # node-clusterマスター作成
234
+ # (1つのクラスタに1つのnodeはオリジナルアイテムをclusterに)
235
+ system "mcount k=id a=freq i=#{clusterFile} o=#{xxfreq}"
236
+ f=""
237
+ f << "mjoin k=id m=#{xxfreq} f=freq i=#{clusterFile} |"
238
+ f << "mcal c='if(${freq}==1,$s{node},\"##{level}_\"+$s{id})' a=cluster o=#{mFile}"
239
+ system(f)
240
+ end
241
+
242
+ # トランザクションのitemをclusterに変換
243
+ f=""
244
+ f << "mjoin k=#{itemFN} K=node m=#{mFile} f=cluster i=#{traFile} -n |"
245
+ f << "mcal c='if(isnull($s{cluster}),$s{#{itemFN}},$s{cluster})' a=newItem |"
246
+ f << "mcut f=#{itemFN},cluster -r |"
247
+ f << "mfldname f=newItem:#{itemFN} |"
248
+ f << "muniq k=#{idFN},#{itemFN} o=#{oFile}"
249
+ system(f)
250
+ return maxNo
251
+ end
252
+
253
+ def runSaveNode(polishN,simgN,ncMap,oFile)
254
+ # save to his
255
+ # node情報
256
+ f=""
257
+ f << "mcut f=node i=#{polishN} |"
258
+ f << "mjoin k=node m=#{ncMap} f=cluster |"
259
+ f << "mjoin k=node m=#{simgN} f=support,frequency,total o=#{oFile}"
260
+ system(f)
261
+ end
262
+
263
+ def runSaveEdge(polishE,simgE,ncMap,simStr,oFile)
264
+ f=""
265
+ f << "mcut f=node1,node2,#{simStr} i=#{polishE} |"
266
+ f << "mjoin k=node1 K=node m=#{ncMap} f=cluster |"
267
+ f << "mcut f=node1,node2,cluster,#{simStr} o=#{oFile}"
268
+ system(f)
269
+ end
270
+
271
+ ##########################################
272
+ # iFileのitemFN項目のitem番号最大値を取得
273
+ def getMaxNo(iFile,itemFN)
274
+ maxNo = 0;
275
+ f= ""
276
+ f << "mstats c=max f=#{itemFN} i=#{iFile} |"
277
+ f << "mcut f=#{itemFN} -nfno "
278
+ maxNo = `#{f}`.chomp.to_i
279
+ return maxNo
280
+ end
281
+
282
+ def outputNode(iPath,lastItemNo,oFile)
283
+ temp=MCMD::Mtemp.new
284
+ xxwk1=temp.file
285
+ xxwk2=temp.file
286
+ # ノードファイルの出力
287
+ # いずれのレベルにおいても孤立ノードのクラスタ行は削除する
288
+ # 条件: node==cluster and (nodeが他の行のnodeとして出現していない or nodeはクラスタ)
289
+ # node%0,support,frequency,total,cluster
290
+ # #1_1,0.7272727273,8,11,#2_1
291
+ # #1_2,0.2727272727,3,11,#2_1
292
+ # #1_3,0.1818181818,2,11,#1_3
293
+ # i,0.09090909091,1,11,i
294
+ # j,0.09090909091,1,11,j
295
+ f=""
296
+ f << "mcat i=#{iPath}/node* |"
297
+ f << "muniq k=cluster,node o=#{xxwk1}"
298
+ system(f)
299
+ system "mcut f=node i=#{xxwk1} | mcount k=node a=freq o=#{xxwk2}"
300
+ f=""
301
+ f << "mjoin k=node m=#{xxwk2} i=#{xxwk1} |"
302
+ if lastItemNo then
303
+ f << "msel c='$s{node}==$s{cluster} && (${freq}>1 || ${node}>#{lastItemNo})' -r |"
304
+ else
305
+ f << "msel c='$s{node}==$s{cluster} && (${freq}>1 || left($s{node},1)==\"#\")' -r |"
306
+ end
307
+ f << "mcal c='if($s{node}==$s{cluster},\"\",$s{cluster})' a=newClust|"
308
+ f << "mcut f=newClust:cluster,node,support,frequency,total |"
309
+ f << "msortf f=cluster,node o=#{oFile}"
310
+ system(f)
311
+ end
312
+
313
+ def outputEdge(iPath,simStr,oFile)
314
+ # エッジファイルの出力
315
+ # node1%0,node2%1,jaccard,cluster
316
+ # a,b,0.3333333333,#1_1
317
+ # a,d,0.3333333333,#1_1
318
+ # a,e,0.5,#1_1
319
+ f=""
320
+ f << "mcat i=#{iPath}/edge* |"
321
+ f << "mcut f=cluster,node1,node2,#{simStr} |"
322
+ f << "msortf f=cluster,node1,node2 o=#{oFile}"
323
+ system(f)
324
+ end
325
+
326
+ def hiPolish(traFile,idFN,itemFN,simStr,paramTra2gc,paramPolish,paramCluster,maxLevel,oPath,maxNo)
327
+ temp=MCMD::Mtemp.new
328
+ xxtra =temp.file
329
+ xxtra2 =temp.file
330
+ xxsimgN=temp.file
331
+ xxsimgE=temp.file
332
+ xxpolishN=temp.file
333
+ xxpolishE=temp.file
334
+ xxcluster=temp.file
335
+ xxncMap=temp.file
336
+
337
+ system "cp #{traFile} #{xxtra}"
338
+ counter=1
339
+ while true
340
+ # 繰り返し上限の判定
341
+ break if maxLevel!=0 and counter>maxLevel
342
+
343
+ # 類似度グラフの作成
344
+ runTra2gc(xxtra,paramTra2gc,xxsimgN,xxsimgE)
345
+ # system "head #{xxsimgE}"
346
+ # system "cat #{xxsimgE}"
347
+ # node1%0,node2%1,frequency,frequency1,frequency2,total,support,confidence,lift,jaccard,PMI
348
+ # a,b,2,4,4,11,0.1818181818,0.5,1.375,0.3333333333,0.1868039815
349
+ # a,d,2,4,4,11,0.1818181818,0.5,1.375,0.3333333333,0.1868039815
350
+
351
+ # polish実行
352
+ runPolish(xxsimgN,xxsimgE,paramPolish,xxpolishN,xxpolishE)
353
+ #system "head #{xxpolishE}"
354
+ # node1%0,node2%1,support
355
+ # a,b,0.1818181818
356
+ # a,d,0.1818181818
357
+ # a,e,0.2727272727
358
+
359
+ # stop条件
360
+ size=MCMD::mrecount("i=#{xxpolishE}")
361
+ break if size==0
362
+
363
+ # クラスタリング(連結成分など)
364
+ runClustering(xxpolishN,xxpolishE,paramCluster,xxcluster)
365
+ # system "head #{xxcluster}"
366
+ # id%0,node,size
367
+ # 1,a,4
368
+ # 1,b,4
369
+ # 1,d,4
370
+ # 1,e,4
371
+ # 2,c,2
372
+ # 2,f,2
373
+
374
+ # traのitemをクラスタitemに変換
375
+ maxNo=runConvert(xxtra,idFN,itemFN,counter,xxcluster,xxncMap,xxtra2,maxNo)
376
+ # system "head #{xxncMap}"
377
+ # id%0,node,size,freq,cluster
378
+ # 1,a,4,4,#1_1
379
+ # 1,b,4,4,#1_1
380
+ # 1,d,4,4,#1_1
381
+ # 1,e,4,4,#1_1
382
+ # 2,c,2,2,#1_2
383
+ # 2,f,2,2,#1_2
384
+ # system "head #{xxtra2}"
385
+ # system "cat #{xxtra2}"
386
+ # id%0,item%1
387
+ # 0,i
388
+ # 1,#1_1
389
+ # 2,#1_1
390
+ # 3,#1_1
391
+ #
392
+
393
+ # node,edgeの保存
394
+ runSaveNode(xxpolishN,xxsimgN,xxncMap ,"#{oPath}/node_#{counter}")
395
+ runSaveEdge(xxpolishE,xxsimgE,xxncMap,simStr,"#{oPath}/edge_#{counter}")
396
+ # system "head #{oPath}/node_#{counter}"
397
+ # node%0,cluster,support,frequency,total
398
+ # a,#1_1,0.3636363636,4,11
399
+ # b,#1_1,0.3636363636,4,11
400
+ # system "head #{oPath}/edge_#{counter}"
401
+ # node1%0,node2%1,cluster,support
402
+ # a,b,#1_1,0.1818181818
403
+ # a,d,#1_1,0.1818181818
404
+
405
+ #system "cp #{xxtra} #{oPath}/tra_#{counter}"
406
+ # break if counter==3
407
+ counter+=1
408
+ system "cp #{xxtra2} #{xxtra}"
409
+ end
410
+ end
411
+
412
+ ### mtra2gc用パラメータ
413
+ paramTra2gc=""
414
+ paramTra2gc << " tid=#{idFN}" if idFN
415
+ paramTra2gc << " item=#{itemFN}" if itemFN
416
+ paramTra2gc << " s=#{sp1}" if sp1
417
+ paramTra2gc << " S=#{sp2}" if sp2
418
+ #####################
419
+ # 異なる向きのconfidenceを列挙するためにsim=C th=0として双方向列挙しておく
420
+ # 出力データは倍になるが、mfriendsで-directedとすることで元が取れている
421
+ paramTra2gc << " sim=C"
422
+ paramTra2gc << " th=0"
423
+ #####################
424
+ paramTra2gc << " -node_support" if node_support
425
+ paramTra2gc << " -num" if numtp
426
+
427
+ ### polish用パラメータ
428
+ paramPolish=""
429
+ paramPolish << " sim=#{simStr}"
430
+ paramPolish << " rank=#{rank}" if rank
431
+
432
+ ### クラスタリング用パラメータ
433
+ paramCluster=""
434
+
435
+ temp=MCMD::Mtemp.new
436
+ xxhis=temp.file
437
+ MCMD::mkDir(xxhis,true) # 併合過程の履歴dir
438
+
439
+ # numtpの場合、数値item最大値を取得しておく
440
+ maxNo=nil # hiPolishで更新され、最終的に最後のcluster item番号となる
441
+ lastItemNo=nil # オリジナルのtra上のitem番号の最大値
442
+ if numtp then
443
+ maxNo=getMaxNo(traFile,itemFN)
444
+ lastItemNo=maxNo
445
+ end
446
+
447
+ # 階層化研磨(hierarchical polishing)実行
448
+ hiPolish(traFile,idFN,itemFN,simStr,paramTra2gc,paramPolish,paramCluster,maxLevel,xxhis,maxNo)
449
+
450
+ # node,edgeの最終出力
451
+ outputNode(xxhis,lastItemNo,onFile)
452
+ outputEdge(xxhis,simStr,oeFile)
453
+
454
+ # end message
455
+ MCMD::endLog(args.cmdline)
456
+