nysol-mining 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,138 @@
1
+ /*
2
+ * SketchSort.hpp
3
+ * Copyright (c) 2011 Yasuo Tabei All Rights Reserved.
4
+ *
5
+ * Permission is hereby granted, free of charge, to any person
6
+ * obtaining a copy of this software and associated documentation
7
+ * files (the "Software"), to deal in the Software without
8
+ * restriction, including without limitation the rights to use,
9
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following
11
+ * conditions:
12
+ *
13
+ * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software.
14
+ *
15
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE and * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
+ * OTHER DEALINGS IN THE SOFTWARE.
22
+ */
23
+
24
+ #ifndef SKETCHSORT_HPP
25
+ #define SKETCHSORT_HPP
26
+
27
+ // for boost
28
+ #include <boost/pool/pool.hpp>
29
+ #include <boost/pool/object_pool.hpp>
30
+ #include <boost/numeric/ublas/matrix.hpp>
31
+ #include <boost/numeric/ublas/triangular.hpp>
32
+ #include <boost/numeric/ublas/symmetric.hpp>
33
+ #include <boost/numeric/ublas/hermitian.hpp>
34
+ #include <boost/numeric/ublas/matrix_sparse.hpp>
35
+ #include <boost/numeric/ublas/vector.hpp>
36
+ #include <boost/numeric/ublas/vector_sparse.hpp>
37
+
38
+ #include <iostream>
39
+ #include <string>
40
+ #include <vector>
41
+ #include <list>
42
+ #include <map>
43
+ #include <cmath>
44
+ #include <cstring>
45
+ #include <iterator>
46
+ #include <fstream>
47
+ #include <strstream>
48
+ #include <sstream>
49
+ #include <cstdio>
50
+ #include <cstdlib>
51
+ #include <set>
52
+ #include <stdint.h>
53
+ #include <time.h>
54
+
55
+ #include <boost/random.hpp>
56
+
57
+ using namespace boost::numeric::ublas; // boost::numeric::ublas
58
+ using namespace boost::numeric; // boost::numeric
59
+
60
+ #ifndef M_PI
61
+ #define M_PI 3.14159265358979323846/* pi */
62
+ #define M_PI_2 1.57079632679489661923/* pi/2 */
63
+ #define M_PI_4 0.78539816339744830962/* pi/4 */
64
+ #endif
65
+
66
+ struct pstat {
67
+ int start;
68
+ int end;
69
+ };
70
+
71
+ struct params {
72
+ unsigned int numblocks;
73
+ unsigned int numchunks;
74
+ unsigned int projectDim;
75
+ unsigned int chunk_dist;
76
+ unsigned int chunks;
77
+ unsigned int num_seq;
78
+ unsigned int seq_len;
79
+ unsigned int chunk_len;
80
+ unsigned int start_chunk;
81
+ unsigned int end_chunk;
82
+ unsigned int cchunk;
83
+ unsigned int *counter;
84
+ pstat *pos;
85
+ pstat *pchunks;
86
+ float cosDist;
87
+ std::vector<unsigned int> ids;
88
+ std::vector<int> blocks;
89
+ std::ostream *os;
90
+ };
91
+
92
+ class SketchSort {
93
+ boost::pool<> *p;
94
+ std::vector<boost::numeric::ublas::vector<float> > fvs;
95
+ std::vector<int> tws;
96
+ std::vector<float> norms;
97
+ uint8_t num_char;
98
+ unsigned int dim;
99
+ //unsigned int tw;
100
+
101
+ uint64_t numSort;
102
+ uint64_t numHamDist;
103
+ uint64_t numCosDist;
104
+
105
+ void readFeature(const char *fname, unsigned int _windowsize);
106
+ void centeringData();
107
+ void preComputeNorms();
108
+ int projectVectors(unsigned int projectDim, std::vector<uint8_t*> &sig, unsigned int seed, params &param);
109
+ void report(std::vector<uint8_t*> &sig, int l, int r, params &param, unsigned int _windowsize);
110
+ void sort(std::vector<uint8_t*> &sig, int spos, int epos, int l, int r, params &param);
111
+ void radixsort(std::vector<uint8_t*> &sig, int spos, int epos, int l, int r, params &param);
112
+ void insertionSort(std::vector<uint8_t*> &sig, int spos, int epos, int l, int r, params &param);
113
+ bool calc_chunk_hamdist(uint8_t *seq1, uint8_t *seq2, const params &param);
114
+ bool calc_hamdist(uint8_t *seq1, uint8_t *seq2, const params &param);
115
+ bool check_canonical(uint8_t *seq1, uint8_t *seq2, const params &param);
116
+ bool check_chunk_canonical(uint8_t *seq1, uint8_t *seq2, const params &param);
117
+ float checkCos(unsigned int id1, unsigned int id2);
118
+ double calcMissingEdgeRatio(params &param);
119
+ void classify(std::vector<uint8_t*> &sig, int spos, int epos, int l, int r, int bpos, params &param, unsigned int _windowsize);
120
+ void multi_classification(std::vector<uint8_t*> &sig, int maxind, int l, int r, params &param, unsigned int _windowsize);
121
+ void refinement();
122
+ void insertKnnList(unsigned int from_id, unsigned int to_id, float cosDist);
123
+ void decideParameters(float _missingratio, params &param);
124
+ public:
125
+ SketchSort() {};
126
+ void run(const char *fname, const char *oname,
127
+ unsigned int _numblocks,
128
+ unsigned int _dist,
129
+ float _cosDist,
130
+ unsigned int _numchunks,
131
+ bool _autoFlag,
132
+ float _missingratio,
133
+ bool _centering,
134
+ unsigned int _windowsize,
135
+ unsigned int _seed);
136
+ };
137
+
138
+ #endif // SKETCHSORT_HPP
@@ -0,0 +1,26 @@
1
+ require "rubygems"
2
+ require "mkmf"
3
+
4
+ unless have_library("boost_system")
5
+ puts("need boost_system.")
6
+ exit 1
7
+ end
8
+
9
+ unless have_library("kgmod3")
10
+ puts("need libkgmod.")
11
+ puts("refer https://github.com/nysol/mcmd")
12
+ exit 1
13
+ end
14
+
15
+
16
+
17
+ cp = "$(srcdir)"
18
+
19
+ $CFLAGS = " -O3 -DNDEBUG -D_NO_MAIN_ -Wno-deprecated -pedantic -ansi -finline-functions -foptimize-sibling-calls -Wcast-qual -Wwrite-strings -Wsign-promo -Wcast-align -Wno-long-long -fexpensive-optimizations -funroll-all-loops -ffast-math -fomit-frame-pointer -pipe -I./"
20
+ $CPPFLAGS = " -O3 -DNDEBUG -D_NO_MAIN_ -Wno-deprecated -pedantic -ansi -finline-functions -foptimize-sibling-calls -Wcast-qual -Wwrite-strings -Wsign-promo -Wcast-align -Wno-long-long -fexpensive-optimizations -funroll-all-loops -ffast-math -fomit-frame-pointer -pipe -I./"
21
+ $CXXFLAGS = " -O3 -DNDEBUG -D_NO_MAIN_ -Wno-deprecated -pedantic -ansi -finline-functions -foptimize-sibling-calls -Wcast-qual -Wwrite-strings -Wsign-promo -Wcast-align -Wno-long-long -fexpensive-optimizations -funroll-all-loops -ffast-math -fomit-frame-pointer -pipe -I./"
22
+
23
+ $LOCAL_LIBS += " -lstdc++ -lkgmod3 -lm -lboost_system"
24
+
25
+ create_makefile("nysol/sketchsortrun")
26
+
@@ -0,0 +1,56 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <unistd.h>
4
+ #include <fcntl.h>
5
+ #include <sys/stat.h>
6
+ #include <string>
7
+ #include <ruby.h>
8
+
9
+ #include "Main.hpp"
10
+
11
+
12
+ #include <kgMethod.h>
13
+
14
+ extern "C" {
15
+ void Init_sketchsortrun(void);
16
+ }
17
+
18
+
19
+ VALUE lcmrun(VALUE self,VALUE argvV){
20
+
21
+ string argstr=RSTRING_PTR(argvV);
22
+ vector<char *> opts = kglib::splitToken(const_cast<char*>(argstr.c_str()), ' ',true);
23
+
24
+ // 引数文字列へのポインタの領域はここでauto変数に確保する
25
+ kglib::kgAutoPtr2<char*> argv;
26
+ char** vv;
27
+ try{
28
+ argv.set(new char*[opts.size()+1]);
29
+ vv = argv.get();
30
+ }catch(...){
31
+ rb_raise(rb_eRuntimeError,"memory allocation error");
32
+ }
33
+
34
+ // vv配列0番目はコマンド名
35
+ vv[0]=const_cast<char*>("lcm");
36
+
37
+ size_t vvSize;
38
+ for(vvSize=0; vvSize<opts.size(); vvSize++){
39
+ vv[vvSize+1] = opts.at(vvSize);
40
+ }
41
+ vvSize+=1;
42
+ int rtn = sketchsort_main (vvSize,vv);
43
+ return INT2NUM(rtn);
44
+ }
45
+
46
+ // -----------------------------------------------------------------------------
47
+ // ruby Mcsvin クラス init
48
+ // -----------------------------------------------------------------------------
49
+ void Init_sketchsortrun(void)
50
+ {
51
+ // モジュール定義:MCMD::xxxxの部分
52
+ VALUE mtake=rb_define_module("NYSOL_MINING");
53
+ rb_define_module_function(mtake,"run_sketchsort" , (VALUE (*)(...))lcmrun,1);
54
+ }
55
+
56
+
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+ #/* ////////// LICENSE INFO ////////////////////
3
+ #
4
+ # * Copyright (C) 2013 by NYSOL CORPORATION
5
+ # *
6
+ # * Unless you have received this program directly from NYSOL pursuant
7
+ # * to the terms of a commercial license agreement with NYSOL, then
8
+ # * this program is licensed to you under the terms of the GNU Affero General
9
+ # * Public License (AGPL) as published by the Free Software Foundation,
10
+ # * either version 3 of the License, or (at your option) any later version.
11
+ # *
12
+ # * This program is distributed in the hope that it will be useful, but
13
+ # * WITHOUT ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF
14
+ # * NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
15
+ # *
16
+ # * Please refer to the AGPL (http://www.gnu.org/licenses/agpl-3.0.txt)
17
+ # * for more details.
18
+ #
19
+ # ////////// LICENSE INFO ////////////////////*/
20
+
21
+
22
+ require "nysol/sketchsortrun"
23
+
24
+
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nysol-mining
3
+ version: !ruby/object:Gem::Version
4
+ version: 3.0.0
5
+ platform: ruby
6
+ authors:
7
+ - NYSOL
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-07-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nysol
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 3.0.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 3.0.0
27
+ description: |2
28
+ nysol Mining tools
29
+ email: info@nysol.jp
30
+ executables:
31
+ - mburst.rb
32
+ - mgpmetis.rb
33
+ - mnb.rb
34
+ - mbopt.rb
35
+ - mnetsimile.rb
36
+ - mgfeatures.rb
37
+ - msketchsort.rb
38
+ - mnewman.rb
39
+ - mgnfeatures.rb
40
+ - mglmnet.rb
41
+ - msm.rb
42
+ - midxmine.rb
43
+ extensions:
44
+ - ext/sketchsortrun/extconf.rb
45
+ extra_rdoc_files: []
46
+ files:
47
+ - ext/sketchsortrun/extconf.rb
48
+ - ext/sketchsortrun/Main.cpp
49
+ - ext/sketchsortrun/Main.hpp
50
+ - ext/sketchsortrun/SketchSort.cpp
51
+ - ext/sketchsortrun/SketchSort.hpp
52
+ - ext/sketchsortrun/sketchsortrun.cpp
53
+ - lib/nysol/mining.rb
54
+ - bin/mnb.rb
55
+ - bin/mbopt.rb
56
+ - bin/mburst.rb
57
+ - bin/mgpmetis.rb
58
+ - bin/mnetsimile.rb
59
+ - bin/mgfeatures.rb
60
+ - bin/msketchsort.rb
61
+ - bin/mnewman.rb
62
+ - bin/mgnfeatures.rb
63
+ - bin/msm.rb
64
+ - bin/mglmnet.rb
65
+ - bin/midxmine.rb
66
+ homepage: http://www.nysol.jp/
67
+ licenses: []
68
+ metadata: {}
69
+ post_install_message:
70
+ rdoc_options: []
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ required_rubygems_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements: []
84
+ rubyforge_project:
85
+ rubygems_version: 2.0.14
86
+ signing_key:
87
+ specification_version: 4
88
+ summary: nysol mining tools
89
+ test_files: []