nysol-mining 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/mbopt.rb +522 -0
- data/bin/mburst.rb +716 -0
- data/bin/mgfeatures.rb +340 -0
- data/bin/mglmnet.rb +843 -0
- data/bin/mgnfeatures.rb +369 -0
- data/bin/mgpmetis.rb +449 -0
- data/bin/midxmine.rb +484 -0
- data/bin/mnb.rb +631 -0
- data/bin/mnetsimile.rb +572 -0
- data/bin/mnewman.rb +345 -0
- data/bin/msketchsort.rb +243 -0
- data/bin/msm.rb +172 -0
- data/ext/sketchsortrun/Main.cpp +161 -0
- data/ext/sketchsortrun/Main.hpp +24 -0
- data/ext/sketchsortrun/SketchSort.cpp +526 -0
- data/ext/sketchsortrun/SketchSort.hpp +138 -0
- data/ext/sketchsortrun/extconf.rb +26 -0
- data/ext/sketchsortrun/sketchsortrun.cpp +56 -0
- data/lib/nysol/mining.rb +24 -0
- metadata +89 -0
@@ -0,0 +1,138 @@
|
|
1
|
+
/*
|
2
|
+
* SketchSort.hpp
|
3
|
+
* Copyright (c) 2011 Yasuo Tabei All Rights Reserved.
|
4
|
+
*
|
5
|
+
* Permission is hereby granted, free of charge, to any person
|
6
|
+
* obtaining a copy of this software and associated documentation
|
7
|
+
* files (the "Software"), to deal in the Software without
|
8
|
+
* restriction, including without limitation the rights to use,
|
9
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
* copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following
|
11
|
+
* conditions:
|
12
|
+
*
|
13
|
+
* The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software.
|
14
|
+
*
|
15
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE and * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
18
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
19
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
20
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
21
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
*/
|
23
|
+
|
24
|
+
#ifndef SKETCHSORT_HPP
|
25
|
+
#define SKETCHSORT_HPP
|
26
|
+
|
27
|
+
// for boost
|
28
|
+
#include <boost/pool/pool.hpp>
|
29
|
+
#include <boost/pool/object_pool.hpp>
|
30
|
+
#include <boost/numeric/ublas/matrix.hpp>
|
31
|
+
#include <boost/numeric/ublas/triangular.hpp>
|
32
|
+
#include <boost/numeric/ublas/symmetric.hpp>
|
33
|
+
#include <boost/numeric/ublas/hermitian.hpp>
|
34
|
+
#include <boost/numeric/ublas/matrix_sparse.hpp>
|
35
|
+
#include <boost/numeric/ublas/vector.hpp>
|
36
|
+
#include <boost/numeric/ublas/vector_sparse.hpp>
|
37
|
+
|
38
|
+
#include <iostream>
|
39
|
+
#include <string>
|
40
|
+
#include <vector>
|
41
|
+
#include <list>
|
42
|
+
#include <map>
|
43
|
+
#include <cmath>
|
44
|
+
#include <cstring>
|
45
|
+
#include <iterator>
|
46
|
+
#include <fstream>
|
47
|
+
#include <strstream>
|
48
|
+
#include <sstream>
|
49
|
+
#include <cstdio>
|
50
|
+
#include <cstdlib>
|
51
|
+
#include <set>
|
52
|
+
#include <stdint.h>
|
53
|
+
#include <time.h>
|
54
|
+
|
55
|
+
#include <boost/random.hpp>
|
56
|
+
|
57
|
+
using namespace boost::numeric::ublas; // boost::numeric::ublas
|
58
|
+
using namespace boost::numeric; // boost::numeric
|
59
|
+
|
60
|
+
#ifndef M_PI
|
61
|
+
#define M_PI 3.14159265358979323846/* pi */
|
62
|
+
#define M_PI_2 1.57079632679489661923/* pi/2 */
|
63
|
+
#define M_PI_4 0.78539816339744830962/* pi/4 */
|
64
|
+
#endif
|
65
|
+
|
66
|
+
struct pstat {
|
67
|
+
int start;
|
68
|
+
int end;
|
69
|
+
};
|
70
|
+
|
71
|
+
struct params {
|
72
|
+
unsigned int numblocks;
|
73
|
+
unsigned int numchunks;
|
74
|
+
unsigned int projectDim;
|
75
|
+
unsigned int chunk_dist;
|
76
|
+
unsigned int chunks;
|
77
|
+
unsigned int num_seq;
|
78
|
+
unsigned int seq_len;
|
79
|
+
unsigned int chunk_len;
|
80
|
+
unsigned int start_chunk;
|
81
|
+
unsigned int end_chunk;
|
82
|
+
unsigned int cchunk;
|
83
|
+
unsigned int *counter;
|
84
|
+
pstat *pos;
|
85
|
+
pstat *pchunks;
|
86
|
+
float cosDist;
|
87
|
+
std::vector<unsigned int> ids;
|
88
|
+
std::vector<int> blocks;
|
89
|
+
std::ostream *os;
|
90
|
+
};
|
91
|
+
|
92
|
+
class SketchSort {
|
93
|
+
boost::pool<> *p;
|
94
|
+
std::vector<boost::numeric::ublas::vector<float> > fvs;
|
95
|
+
std::vector<int> tws;
|
96
|
+
std::vector<float> norms;
|
97
|
+
uint8_t num_char;
|
98
|
+
unsigned int dim;
|
99
|
+
//unsigned int tw;
|
100
|
+
|
101
|
+
uint64_t numSort;
|
102
|
+
uint64_t numHamDist;
|
103
|
+
uint64_t numCosDist;
|
104
|
+
|
105
|
+
void readFeature(const char *fname, unsigned int _windowsize);
|
106
|
+
void centeringData();
|
107
|
+
void preComputeNorms();
|
108
|
+
int projectVectors(unsigned int projectDim, std::vector<uint8_t*> &sig, unsigned int seed, params ¶m);
|
109
|
+
void report(std::vector<uint8_t*> &sig, int l, int r, params ¶m, unsigned int _windowsize);
|
110
|
+
void sort(std::vector<uint8_t*> &sig, int spos, int epos, int l, int r, params ¶m);
|
111
|
+
void radixsort(std::vector<uint8_t*> &sig, int spos, int epos, int l, int r, params ¶m);
|
112
|
+
void insertionSort(std::vector<uint8_t*> &sig, int spos, int epos, int l, int r, params ¶m);
|
113
|
+
bool calc_chunk_hamdist(uint8_t *seq1, uint8_t *seq2, const params ¶m);
|
114
|
+
bool calc_hamdist(uint8_t *seq1, uint8_t *seq2, const params ¶m);
|
115
|
+
bool check_canonical(uint8_t *seq1, uint8_t *seq2, const params ¶m);
|
116
|
+
bool check_chunk_canonical(uint8_t *seq1, uint8_t *seq2, const params ¶m);
|
117
|
+
float checkCos(unsigned int id1, unsigned int id2);
|
118
|
+
double calcMissingEdgeRatio(params ¶m);
|
119
|
+
void classify(std::vector<uint8_t*> &sig, int spos, int epos, int l, int r, int bpos, params ¶m, unsigned int _windowsize);
|
120
|
+
void multi_classification(std::vector<uint8_t*> &sig, int maxind, int l, int r, params ¶m, unsigned int _windowsize);
|
121
|
+
void refinement();
|
122
|
+
void insertKnnList(unsigned int from_id, unsigned int to_id, float cosDist);
|
123
|
+
void decideParameters(float _missingratio, params ¶m);
|
124
|
+
public:
|
125
|
+
SketchSort() {};
|
126
|
+
void run(const char *fname, const char *oname,
|
127
|
+
unsigned int _numblocks,
|
128
|
+
unsigned int _dist,
|
129
|
+
float _cosDist,
|
130
|
+
unsigned int _numchunks,
|
131
|
+
bool _autoFlag,
|
132
|
+
float _missingratio,
|
133
|
+
bool _centering,
|
134
|
+
unsigned int _windowsize,
|
135
|
+
unsigned int _seed);
|
136
|
+
};
|
137
|
+
|
138
|
+
#endif // SKETCHSORT_HPP
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "mkmf"
|
3
|
+
|
4
|
+
unless have_library("boost_system")
|
5
|
+
puts("need boost_system.")
|
6
|
+
exit 1
|
7
|
+
end
|
8
|
+
|
9
|
+
unless have_library("kgmod3")
|
10
|
+
puts("need libkgmod.")
|
11
|
+
puts("refer https://github.com/nysol/mcmd")
|
12
|
+
exit 1
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
cp = "$(srcdir)"
|
18
|
+
|
19
|
+
$CFLAGS = " -O3 -DNDEBUG -D_NO_MAIN_ -Wno-deprecated -pedantic -ansi -finline-functions -foptimize-sibling-calls -Wcast-qual -Wwrite-strings -Wsign-promo -Wcast-align -Wno-long-long -fexpensive-optimizations -funroll-all-loops -ffast-math -fomit-frame-pointer -pipe -I./"
|
20
|
+
$CPPFLAGS = " -O3 -DNDEBUG -D_NO_MAIN_ -Wno-deprecated -pedantic -ansi -finline-functions -foptimize-sibling-calls -Wcast-qual -Wwrite-strings -Wsign-promo -Wcast-align -Wno-long-long -fexpensive-optimizations -funroll-all-loops -ffast-math -fomit-frame-pointer -pipe -I./"
|
21
|
+
$CXXFLAGS = " -O3 -DNDEBUG -D_NO_MAIN_ -Wno-deprecated -pedantic -ansi -finline-functions -foptimize-sibling-calls -Wcast-qual -Wwrite-strings -Wsign-promo -Wcast-align -Wno-long-long -fexpensive-optimizations -funroll-all-loops -ffast-math -fomit-frame-pointer -pipe -I./"
|
22
|
+
|
23
|
+
$LOCAL_LIBS += " -lstdc++ -lkgmod3 -lm -lboost_system"
|
24
|
+
|
25
|
+
create_makefile("nysol/sketchsortrun")
|
26
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include <stdlib.h>
|
3
|
+
#include <unistd.h>
|
4
|
+
#include <fcntl.h>
|
5
|
+
#include <sys/stat.h>
|
6
|
+
#include <string>
|
7
|
+
#include <ruby.h>
|
8
|
+
|
9
|
+
#include "Main.hpp"
|
10
|
+
|
11
|
+
|
12
|
+
#include <kgMethod.h>
|
13
|
+
|
14
|
+
extern "C" {
|
15
|
+
void Init_sketchsortrun(void);
|
16
|
+
}
|
17
|
+
|
18
|
+
|
19
|
+
VALUE lcmrun(VALUE self,VALUE argvV){
|
20
|
+
|
21
|
+
string argstr=RSTRING_PTR(argvV);
|
22
|
+
vector<char *> opts = kglib::splitToken(const_cast<char*>(argstr.c_str()), ' ',true);
|
23
|
+
|
24
|
+
// 引数文字列へのポインタの領域はここでauto変数に確保する
|
25
|
+
kglib::kgAutoPtr2<char*> argv;
|
26
|
+
char** vv;
|
27
|
+
try{
|
28
|
+
argv.set(new char*[opts.size()+1]);
|
29
|
+
vv = argv.get();
|
30
|
+
}catch(...){
|
31
|
+
rb_raise(rb_eRuntimeError,"memory allocation error");
|
32
|
+
}
|
33
|
+
|
34
|
+
// vv配列0番目はコマンド名
|
35
|
+
vv[0]=const_cast<char*>("lcm");
|
36
|
+
|
37
|
+
size_t vvSize;
|
38
|
+
for(vvSize=0; vvSize<opts.size(); vvSize++){
|
39
|
+
vv[vvSize+1] = opts.at(vvSize);
|
40
|
+
}
|
41
|
+
vvSize+=1;
|
42
|
+
int rtn = sketchsort_main (vvSize,vv);
|
43
|
+
return INT2NUM(rtn);
|
44
|
+
}
|
45
|
+
|
46
|
+
// -----------------------------------------------------------------------------
|
47
|
+
// ruby Mcsvin クラス init
|
48
|
+
// -----------------------------------------------------------------------------
|
49
|
+
void Init_sketchsortrun(void)
|
50
|
+
{
|
51
|
+
// モジュール定義:MCMD::xxxxの部分
|
52
|
+
VALUE mtake=rb_define_module("NYSOL_MINING");
|
53
|
+
rb_define_module_function(mtake,"run_sketchsort" , (VALUE (*)(...))lcmrun,1);
|
54
|
+
}
|
55
|
+
|
56
|
+
|
data/lib/nysol/mining.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#/* ////////// LICENSE INFO ////////////////////
|
3
|
+
#
|
4
|
+
# * Copyright (C) 2013 by NYSOL CORPORATION
|
5
|
+
# *
|
6
|
+
# * Unless you have received this program directly from NYSOL pursuant
|
7
|
+
# * to the terms of a commercial license agreement with NYSOL, then
|
8
|
+
# * this program is licensed to you under the terms of the GNU Affero General
|
9
|
+
# * Public License (AGPL) as published by the Free Software Foundation,
|
10
|
+
# * either version 3 of the License, or (at your option) any later version.
|
11
|
+
# *
|
12
|
+
# * This program is distributed in the hope that it will be useful, but
|
13
|
+
# * WITHOUT ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF
|
14
|
+
# * NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
|
15
|
+
# *
|
16
|
+
# * Please refer to the AGPL (http://www.gnu.org/licenses/agpl-3.0.txt)
|
17
|
+
# * for more details.
|
18
|
+
#
|
19
|
+
# ////////// LICENSE INFO ////////////////////*/
|
20
|
+
|
21
|
+
|
22
|
+
require "nysol/sketchsortrun"
|
23
|
+
|
24
|
+
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nysol-mining
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 3.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- NYSOL
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-07-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nysol
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 3.0.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.0.0
|
27
|
+
description: |2
|
28
|
+
nysol Mining tools
|
29
|
+
email: info@nysol.jp
|
30
|
+
executables:
|
31
|
+
- mburst.rb
|
32
|
+
- mgpmetis.rb
|
33
|
+
- mnb.rb
|
34
|
+
- mbopt.rb
|
35
|
+
- mnetsimile.rb
|
36
|
+
- mgfeatures.rb
|
37
|
+
- msketchsort.rb
|
38
|
+
- mnewman.rb
|
39
|
+
- mgnfeatures.rb
|
40
|
+
- mglmnet.rb
|
41
|
+
- msm.rb
|
42
|
+
- midxmine.rb
|
43
|
+
extensions:
|
44
|
+
- ext/sketchsortrun/extconf.rb
|
45
|
+
extra_rdoc_files: []
|
46
|
+
files:
|
47
|
+
- ext/sketchsortrun/extconf.rb
|
48
|
+
- ext/sketchsortrun/Main.cpp
|
49
|
+
- ext/sketchsortrun/Main.hpp
|
50
|
+
- ext/sketchsortrun/SketchSort.cpp
|
51
|
+
- ext/sketchsortrun/SketchSort.hpp
|
52
|
+
- ext/sketchsortrun/sketchsortrun.cpp
|
53
|
+
- lib/nysol/mining.rb
|
54
|
+
- bin/mnb.rb
|
55
|
+
- bin/mbopt.rb
|
56
|
+
- bin/mburst.rb
|
57
|
+
- bin/mgpmetis.rb
|
58
|
+
- bin/mnetsimile.rb
|
59
|
+
- bin/mgfeatures.rb
|
60
|
+
- bin/msketchsort.rb
|
61
|
+
- bin/mnewman.rb
|
62
|
+
- bin/mgnfeatures.rb
|
63
|
+
- bin/msm.rb
|
64
|
+
- bin/mglmnet.rb
|
65
|
+
- bin/midxmine.rb
|
66
|
+
homepage: http://www.nysol.jp/
|
67
|
+
licenses: []
|
68
|
+
metadata: {}
|
69
|
+
post_install_message:
|
70
|
+
rdoc_options: []
|
71
|
+
require_paths:
|
72
|
+
- lib
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
requirements: []
|
84
|
+
rubyforge_project:
|
85
|
+
rubygems_version: 2.0.14
|
86
|
+
signing_key:
|
87
|
+
specification_version: 4
|
88
|
+
summary: nysol mining tools
|
89
|
+
test_files: []
|