nysol-mining 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/mbopt.rb +522 -0
- data/bin/mburst.rb +716 -0
- data/bin/mgfeatures.rb +340 -0
- data/bin/mglmnet.rb +843 -0
- data/bin/mgnfeatures.rb +369 -0
- data/bin/mgpmetis.rb +449 -0
- data/bin/midxmine.rb +484 -0
- data/bin/mnb.rb +631 -0
- data/bin/mnetsimile.rb +572 -0
- data/bin/mnewman.rb +345 -0
- data/bin/msketchsort.rb +243 -0
- data/bin/msm.rb +172 -0
- data/ext/sketchsortrun/Main.cpp +161 -0
- data/ext/sketchsortrun/Main.hpp +24 -0
- data/ext/sketchsortrun/SketchSort.cpp +526 -0
- data/ext/sketchsortrun/SketchSort.hpp +138 -0
- data/ext/sketchsortrun/extconf.rb +26 -0
- data/ext/sketchsortrun/sketchsortrun.cpp +56 -0
- data/lib/nysol/mining.rb +24 -0
- metadata +89 -0
@@ -0,0 +1,138 @@
|
|
1
|
+
/*
|
2
|
+
* SketchSort.hpp
|
3
|
+
* Copyright (c) 2011 Yasuo Tabei All Rights Reserved.
|
4
|
+
*
|
5
|
+
* Permission is hereby granted, free of charge, to any person
|
6
|
+
* obtaining a copy of this software and associated documentation
|
7
|
+
* files (the "Software"), to deal in the Software without
|
8
|
+
* restriction, including without limitation the rights to use,
|
9
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
* copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following
|
11
|
+
* conditions:
|
12
|
+
*
|
13
|
+
* The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software.
|
14
|
+
*
|
15
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE and * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
18
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
19
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
20
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
21
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
*/
|
23
|
+
|
24
|
+
#ifndef SKETCHSORT_HPP
|
25
|
+
#define SKETCHSORT_HPP
|
26
|
+
|
27
|
+
// for boost
|
28
|
+
#include <boost/pool/pool.hpp>
|
29
|
+
#include <boost/pool/object_pool.hpp>
|
30
|
+
#include <boost/numeric/ublas/matrix.hpp>
|
31
|
+
#include <boost/numeric/ublas/triangular.hpp>
|
32
|
+
#include <boost/numeric/ublas/symmetric.hpp>
|
33
|
+
#include <boost/numeric/ublas/hermitian.hpp>
|
34
|
+
#include <boost/numeric/ublas/matrix_sparse.hpp>
|
35
|
+
#include <boost/numeric/ublas/vector.hpp>
|
36
|
+
#include <boost/numeric/ublas/vector_sparse.hpp>
|
37
|
+
|
38
|
+
#include <iostream>
|
39
|
+
#include <string>
|
40
|
+
#include <vector>
|
41
|
+
#include <list>
|
42
|
+
#include <map>
|
43
|
+
#include <cmath>
|
44
|
+
#include <cstring>
|
45
|
+
#include <iterator>
|
46
|
+
#include <fstream>
|
47
|
+
#include <strstream>
|
48
|
+
#include <sstream>
|
49
|
+
#include <cstdio>
|
50
|
+
#include <cstdlib>
|
51
|
+
#include <set>
|
52
|
+
#include <stdint.h>
|
53
|
+
#include <time.h>
|
54
|
+
|
55
|
+
#include <boost/random.hpp>
|
56
|
+
|
57
|
+
using namespace boost::numeric::ublas; // boost::numeric::ublas
|
58
|
+
using namespace boost::numeric; // boost::numeric
|
59
|
+
|
60
|
+
#ifndef M_PI
|
61
|
+
#define M_PI 3.14159265358979323846/* pi */
|
62
|
+
#define M_PI_2 1.57079632679489661923/* pi/2 */
|
63
|
+
#define M_PI_4 0.78539816339744830962/* pi/4 */
|
64
|
+
#endif
|
65
|
+
|
66
|
+
struct pstat {
|
67
|
+
int start;
|
68
|
+
int end;
|
69
|
+
};
|
70
|
+
|
71
|
+
struct params {
|
72
|
+
unsigned int numblocks;
|
73
|
+
unsigned int numchunks;
|
74
|
+
unsigned int projectDim;
|
75
|
+
unsigned int chunk_dist;
|
76
|
+
unsigned int chunks;
|
77
|
+
unsigned int num_seq;
|
78
|
+
unsigned int seq_len;
|
79
|
+
unsigned int chunk_len;
|
80
|
+
unsigned int start_chunk;
|
81
|
+
unsigned int end_chunk;
|
82
|
+
unsigned int cchunk;
|
83
|
+
unsigned int *counter;
|
84
|
+
pstat *pos;
|
85
|
+
pstat *pchunks;
|
86
|
+
float cosDist;
|
87
|
+
std::vector<unsigned int> ids;
|
88
|
+
std::vector<int> blocks;
|
89
|
+
std::ostream *os;
|
90
|
+
};
|
91
|
+
|
92
|
+
class SketchSort {
|
93
|
+
boost::pool<> *p;
|
94
|
+
std::vector<boost::numeric::ublas::vector<float> > fvs;
|
95
|
+
std::vector<int> tws;
|
96
|
+
std::vector<float> norms;
|
97
|
+
uint8_t num_char;
|
98
|
+
unsigned int dim;
|
99
|
+
//unsigned int tw;
|
100
|
+
|
101
|
+
uint64_t numSort;
|
102
|
+
uint64_t numHamDist;
|
103
|
+
uint64_t numCosDist;
|
104
|
+
|
105
|
+
void readFeature(const char *fname, unsigned int _windowsize);
|
106
|
+
void centeringData();
|
107
|
+
void preComputeNorms();
|
108
|
+
int projectVectors(unsigned int projectDim, std::vector<uint8_t*> &sig, unsigned int seed, params ¶m);
|
109
|
+
void report(std::vector<uint8_t*> &sig, int l, int r, params ¶m, unsigned int _windowsize);
|
110
|
+
void sort(std::vector<uint8_t*> &sig, int spos, int epos, int l, int r, params ¶m);
|
111
|
+
void radixsort(std::vector<uint8_t*> &sig, int spos, int epos, int l, int r, params ¶m);
|
112
|
+
void insertionSort(std::vector<uint8_t*> &sig, int spos, int epos, int l, int r, params ¶m);
|
113
|
+
bool calc_chunk_hamdist(uint8_t *seq1, uint8_t *seq2, const params ¶m);
|
114
|
+
bool calc_hamdist(uint8_t *seq1, uint8_t *seq2, const params ¶m);
|
115
|
+
bool check_canonical(uint8_t *seq1, uint8_t *seq2, const params ¶m);
|
116
|
+
bool check_chunk_canonical(uint8_t *seq1, uint8_t *seq2, const params ¶m);
|
117
|
+
float checkCos(unsigned int id1, unsigned int id2);
|
118
|
+
double calcMissingEdgeRatio(params ¶m);
|
119
|
+
void classify(std::vector<uint8_t*> &sig, int spos, int epos, int l, int r, int bpos, params ¶m, unsigned int _windowsize);
|
120
|
+
void multi_classification(std::vector<uint8_t*> &sig, int maxind, int l, int r, params ¶m, unsigned int _windowsize);
|
121
|
+
void refinement();
|
122
|
+
void insertKnnList(unsigned int from_id, unsigned int to_id, float cosDist);
|
123
|
+
void decideParameters(float _missingratio, params ¶m);
|
124
|
+
public:
|
125
|
+
SketchSort() {};
|
126
|
+
void run(const char *fname, const char *oname,
|
127
|
+
unsigned int _numblocks,
|
128
|
+
unsigned int _dist,
|
129
|
+
float _cosDist,
|
130
|
+
unsigned int _numchunks,
|
131
|
+
bool _autoFlag,
|
132
|
+
float _missingratio,
|
133
|
+
bool _centering,
|
134
|
+
unsigned int _windowsize,
|
135
|
+
unsigned int _seed);
|
136
|
+
};
|
137
|
+
|
138
|
+
#endif // SKETCHSORT_HPP
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "mkmf"
|
3
|
+
|
4
|
+
unless have_library("boost_system")
|
5
|
+
puts("need boost_system.")
|
6
|
+
exit 1
|
7
|
+
end
|
8
|
+
|
9
|
+
unless have_library("kgmod3")
|
10
|
+
puts("need libkgmod.")
|
11
|
+
puts("refer https://github.com/nysol/mcmd")
|
12
|
+
exit 1
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
cp = "$(srcdir)"
|
18
|
+
|
19
|
+
$CFLAGS = " -O3 -DNDEBUG -D_NO_MAIN_ -Wno-deprecated -pedantic -ansi -finline-functions -foptimize-sibling-calls -Wcast-qual -Wwrite-strings -Wsign-promo -Wcast-align -Wno-long-long -fexpensive-optimizations -funroll-all-loops -ffast-math -fomit-frame-pointer -pipe -I./"
|
20
|
+
$CPPFLAGS = " -O3 -DNDEBUG -D_NO_MAIN_ -Wno-deprecated -pedantic -ansi -finline-functions -foptimize-sibling-calls -Wcast-qual -Wwrite-strings -Wsign-promo -Wcast-align -Wno-long-long -fexpensive-optimizations -funroll-all-loops -ffast-math -fomit-frame-pointer -pipe -I./"
|
21
|
+
$CXXFLAGS = " -O3 -DNDEBUG -D_NO_MAIN_ -Wno-deprecated -pedantic -ansi -finline-functions -foptimize-sibling-calls -Wcast-qual -Wwrite-strings -Wsign-promo -Wcast-align -Wno-long-long -fexpensive-optimizations -funroll-all-loops -ffast-math -fomit-frame-pointer -pipe -I./"
|
22
|
+
|
23
|
+
$LOCAL_LIBS += " -lstdc++ -lkgmod3 -lm -lboost_system"
|
24
|
+
|
25
|
+
create_makefile("nysol/sketchsortrun")
|
26
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include <stdlib.h>
|
3
|
+
#include <unistd.h>
|
4
|
+
#include <fcntl.h>
|
5
|
+
#include <sys/stat.h>
|
6
|
+
#include <string>
|
7
|
+
#include <ruby.h>
|
8
|
+
|
9
|
+
#include "Main.hpp"
|
10
|
+
|
11
|
+
|
12
|
+
#include <kgMethod.h>
|
13
|
+
|
14
|
+
extern "C" {
|
15
|
+
void Init_sketchsortrun(void);
|
16
|
+
}
|
17
|
+
|
18
|
+
|
19
|
+
VALUE lcmrun(VALUE self,VALUE argvV){
|
20
|
+
|
21
|
+
string argstr=RSTRING_PTR(argvV);
|
22
|
+
vector<char *> opts = kglib::splitToken(const_cast<char*>(argstr.c_str()), ' ',true);
|
23
|
+
|
24
|
+
// 引数文字列へのポインタの領域はここでauto変数に確保する
|
25
|
+
kglib::kgAutoPtr2<char*> argv;
|
26
|
+
char** vv;
|
27
|
+
try{
|
28
|
+
argv.set(new char*[opts.size()+1]);
|
29
|
+
vv = argv.get();
|
30
|
+
}catch(...){
|
31
|
+
rb_raise(rb_eRuntimeError,"memory allocation error");
|
32
|
+
}
|
33
|
+
|
34
|
+
// vv配列0番目はコマンド名
|
35
|
+
vv[0]=const_cast<char*>("lcm");
|
36
|
+
|
37
|
+
size_t vvSize;
|
38
|
+
for(vvSize=0; vvSize<opts.size(); vvSize++){
|
39
|
+
vv[vvSize+1] = opts.at(vvSize);
|
40
|
+
}
|
41
|
+
vvSize+=1;
|
42
|
+
int rtn = sketchsort_main (vvSize,vv);
|
43
|
+
return INT2NUM(rtn);
|
44
|
+
}
|
45
|
+
|
46
|
+
// -----------------------------------------------------------------------------
|
47
|
+
// ruby Mcsvin クラス init
|
48
|
+
// -----------------------------------------------------------------------------
|
49
|
+
void Init_sketchsortrun(void)
|
50
|
+
{
|
51
|
+
// モジュール定義:MCMD::xxxxの部分
|
52
|
+
VALUE mtake=rb_define_module("NYSOL_MINING");
|
53
|
+
rb_define_module_function(mtake,"run_sketchsort" , (VALUE (*)(...))lcmrun,1);
|
54
|
+
}
|
55
|
+
|
56
|
+
|
data/lib/nysol/mining.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#/* ////////// LICENSE INFO ////////////////////
|
3
|
+
#
|
4
|
+
# * Copyright (C) 2013 by NYSOL CORPORATION
|
5
|
+
# *
|
6
|
+
# * Unless you have received this program directly from NYSOL pursuant
|
7
|
+
# * to the terms of a commercial license agreement with NYSOL, then
|
8
|
+
# * this program is licensed to you under the terms of the GNU Affero General
|
9
|
+
# * Public License (AGPL) as published by the Free Software Foundation,
|
10
|
+
# * either version 3 of the License, or (at your option) any later version.
|
11
|
+
# *
|
12
|
+
# * This program is distributed in the hope that it will be useful, but
|
13
|
+
# * WITHOUT ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF
|
14
|
+
# * NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
|
15
|
+
# *
|
16
|
+
# * Please refer to the AGPL (http://www.gnu.org/licenses/agpl-3.0.txt)
|
17
|
+
# * for more details.
|
18
|
+
#
|
19
|
+
# ////////// LICENSE INFO ////////////////////*/
|
20
|
+
|
21
|
+
|
22
|
+
require "nysol/sketchsortrun"
|
23
|
+
|
24
|
+
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nysol-mining
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 3.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- NYSOL
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-07-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nysol
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 3.0.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.0.0
|
27
|
+
description: |2
|
28
|
+
nysol Mining tools
|
29
|
+
email: info@nysol.jp
|
30
|
+
executables:
|
31
|
+
- mburst.rb
|
32
|
+
- mgpmetis.rb
|
33
|
+
- mnb.rb
|
34
|
+
- mbopt.rb
|
35
|
+
- mnetsimile.rb
|
36
|
+
- mgfeatures.rb
|
37
|
+
- msketchsort.rb
|
38
|
+
- mnewman.rb
|
39
|
+
- mgnfeatures.rb
|
40
|
+
- mglmnet.rb
|
41
|
+
- msm.rb
|
42
|
+
- midxmine.rb
|
43
|
+
extensions:
|
44
|
+
- ext/sketchsortrun/extconf.rb
|
45
|
+
extra_rdoc_files: []
|
46
|
+
files:
|
47
|
+
- ext/sketchsortrun/extconf.rb
|
48
|
+
- ext/sketchsortrun/Main.cpp
|
49
|
+
- ext/sketchsortrun/Main.hpp
|
50
|
+
- ext/sketchsortrun/SketchSort.cpp
|
51
|
+
- ext/sketchsortrun/SketchSort.hpp
|
52
|
+
- ext/sketchsortrun/sketchsortrun.cpp
|
53
|
+
- lib/nysol/mining.rb
|
54
|
+
- bin/mnb.rb
|
55
|
+
- bin/mbopt.rb
|
56
|
+
- bin/mburst.rb
|
57
|
+
- bin/mgpmetis.rb
|
58
|
+
- bin/mnetsimile.rb
|
59
|
+
- bin/mgfeatures.rb
|
60
|
+
- bin/msketchsort.rb
|
61
|
+
- bin/mnewman.rb
|
62
|
+
- bin/mgnfeatures.rb
|
63
|
+
- bin/msm.rb
|
64
|
+
- bin/mglmnet.rb
|
65
|
+
- bin/midxmine.rb
|
66
|
+
homepage: http://www.nysol.jp/
|
67
|
+
licenses: []
|
68
|
+
metadata: {}
|
69
|
+
post_install_message:
|
70
|
+
rdoc_options: []
|
71
|
+
require_paths:
|
72
|
+
- lib
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
requirements: []
|
84
|
+
rubyforge_project:
|
85
|
+
rubygems_version: 2.0.14
|
86
|
+
signing_key:
|
87
|
+
specification_version: 4
|
88
|
+
summary: nysol mining tools
|
89
|
+
test_files: []
|