rocker 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +19 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +57 -0
- data/Rakefile +25 -0
- data/ext/rocker/auc_info.h +51 -0
- data/ext/rocker/constants.h +39 -0
- data/ext/rocker/extconf.rb +15 -0
- data/ext/rocker/fetcher.h +64 -0
- data/ext/rocker/line_input_iterator.h +114 -0
- data/ext/rocker/rocker.cpp +113 -0
- data/ext/rocker/rocker.h +227 -0
- data/ext/rocker/updater.h +86 -0
- data/lib/rocker.rb +8 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_rocker.rb +11 -0
- data/test/test_rocker_extn.rb +21 -0
- metadata +122 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
PostInstall.txt
|
4
|
+
README.rdoc
|
5
|
+
Rakefile
|
6
|
+
lib/rocker.rb
|
7
|
+
script/console
|
8
|
+
script/destroy
|
9
|
+
script/generate
|
10
|
+
test/test_helper.rb
|
11
|
+
test/test_rocker.rb
|
12
|
+
ext/rocker/extconf.rb
|
13
|
+
ext/rocker/auc_info.h
|
14
|
+
ext/rocker/constants.h
|
15
|
+
ext/rocker/fetcher.h
|
16
|
+
ext/rocker/updater.h
|
17
|
+
ext/rocker/line_input_iterator.h
|
18
|
+
ext/rocker/rocker.h
|
19
|
+
ext/rocker/rocker.cpp
|
data/PostInstall.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
= rocker
|
2
|
+
|
3
|
+
* http://github.com/MarcotteLabGit/rocker
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
This gem is very fast C++ code for calculating AUCs on results of cross-validation.
|
8
|
+
|
9
|
+
It is specific to the crossval database schema, which has not been released yet.
|
10
|
+
|
11
|
+
Chances are you will not find this very useful unless you are the author.
|
12
|
+
|
13
|
+
It is in gem form to ensure that each lab machine can compile its own arch-specific
|
14
|
+
version.
|
15
|
+
|
16
|
+
== FEATURES/PROBLEMS:
|
17
|
+
|
18
|
+
* There is no real reason this couldn't work for other schemas, but an adapter
|
19
|
+
would have to be generated. I have no motivation to do this, but if you are
|
20
|
+
interested, please feel free to get in touch.
|
21
|
+
|
22
|
+
== SYNOPSIS:
|
23
|
+
|
24
|
+
FIX (code sample of usage)
|
25
|
+
|
26
|
+
== REQUIREMENTS:
|
27
|
+
|
28
|
+
* crossval
|
29
|
+
|
30
|
+
== INSTALL:
|
31
|
+
|
32
|
+
* sudo gem install rocker, most likely, but I haven't tried it yet.
|
33
|
+
|
34
|
+
== LICENSE:
|
35
|
+
|
36
|
+
(The MIT License)
|
37
|
+
|
38
|
+
Copyright (c) 2010 FIXME full name
|
39
|
+
|
40
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
41
|
+
a copy of this software and associated documentation files (the
|
42
|
+
'Software'), to deal in the Software without restriction, including
|
43
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
44
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
45
|
+
permit persons to whom the Software is furnished to do so, subject to
|
46
|
+
the following conditions:
|
47
|
+
|
48
|
+
The above copyright notice and this permission notice shall be
|
49
|
+
included in all copies or substantial portions of the Software.
|
50
|
+
|
51
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
52
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
53
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
54
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
55
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
56
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
57
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'hoe', '>= 2.1.0'
|
3
|
+
require 'hoe'
|
4
|
+
require 'fileutils'
|
5
|
+
require './lib/rocker'
|
6
|
+
|
7
|
+
Hoe.plugin :newgem
|
8
|
+
# Hoe.plugin :website
|
9
|
+
# Hoe.plugin :cucumberfeatures
|
10
|
+
|
11
|
+
# Generate all the Rake tasks
|
12
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
13
|
+
$hoe = Hoe.spec 'rocker' do
|
14
|
+
self.developer 'John Woods', 'john.woods@marcottelab.org'
|
15
|
+
self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
|
16
|
+
# self.extra_deps = [['activesupport','>= 2.0.2']]
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
require 'newgem/tasks'
|
21
|
+
Dir['tasks/**/*.rake'].each { |t| load t }
|
22
|
+
|
23
|
+
# TODO - want other tests/tasks run by default? Add them to the list
|
24
|
+
# remove_task :default
|
25
|
+
# task :default => [:spec, :features]
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#ifndef AUC_INFO_H
|
2
|
+
# define AUC_INFO_H
|
3
|
+
|
4
|
+
#include <string>
|
5
|
+
#include <sstream>
|
6
|
+
#include <iostream>
|
7
|
+
|
8
|
+
typedef unsigned int uint;
|
9
|
+
|
10
|
+
|
11
|
+
using std::ostringstream;
|
12
|
+
using std::string;
|
13
|
+
using std::ostream;
|
14
|
+
|
15
|
+
const string AUC_COLUMNS = "(experiment_id, column, auc, true_positives, false_positives, true_negatives, false_negatives)";
|
16
|
+
|
17
|
+
class auc_info {
|
18
|
+
public:
|
19
|
+
double auc;
|
20
|
+
uint tp;
|
21
|
+
uint fp;
|
22
|
+
uint tn;
|
23
|
+
uint fn;
|
24
|
+
|
25
|
+
// Constructor
|
26
|
+
auc_info(double area_under_curve = 0, uint true_positives = 0, uint false_positives = 0, uint true_negatives = 0, uint false_negatives = 0)
|
27
|
+
: auc(area_under_curve), tp(true_positives), fp(false_positives), tn(true_negatives), fn(false_negatives) { }
|
28
|
+
|
29
|
+
~auc_info() { }
|
30
|
+
|
31
|
+
// Convert to a portion of a SQL insertion (string)
|
32
|
+
string to_s() const {
|
33
|
+
ostringstream s;
|
34
|
+
s << auc << ", " << tp << ", " << fp << ", " << tn << ", " << fn;
|
35
|
+
return s.str();
|
36
|
+
}
|
37
|
+
|
38
|
+
string entry(uint experiment_id, uint j) const {
|
39
|
+
ostringstream s;
|
40
|
+
s << '(' << experiment_id << ", " << j << ", " << to_s() << ')';
|
41
|
+
return s.str();
|
42
|
+
}
|
43
|
+
};
|
44
|
+
|
45
|
+
// Probably not necessary.
|
46
|
+
std::ostream& operator<<(std::ostream& out, const auc_info& rhs) {
|
47
|
+
out << rhs.to_s();
|
48
|
+
return out;
|
49
|
+
}
|
50
|
+
|
51
|
+
#endif
|
@@ -0,0 +1,39 @@
|
|
1
|
+
// DATABASE CONSTANTS AND INCLUDES
|
2
|
+
#include <string>
|
3
|
+
#include <sstream>
|
4
|
+
#include <pqxx/connection.hxx>
|
5
|
+
#include <pqxx/transaction.hxx>
|
6
|
+
|
7
|
+
|
8
|
+
// DATABASE CONSTANTS
|
9
|
+
const std::string DBNAME = "crossval_development";
|
10
|
+
const std::string USER = "jwoods";
|
11
|
+
const std::string PASSWORD = "youwish1";
|
12
|
+
const std::string READ_TRANSACTION = "ReadTransaction";
|
13
|
+
const std::string WRITE_TRANSACTION = "WriteTransaction";
|
14
|
+
|
15
|
+
// RESULTS CONSTANTS
|
16
|
+
const std::string ROCKER_VERSION = "0.0.1";
|
17
|
+
|
18
|
+
class database_string {
|
19
|
+
public:
|
20
|
+
database_string(std::string dbn = DBNAME, std::string u = USER, std::string p = PASSWORD) : dbname(dbn), user(u), password(p) { }
|
21
|
+
~database_string() { }
|
22
|
+
|
23
|
+
std::string dbname;
|
24
|
+
std::string user;
|
25
|
+
std::string password;
|
26
|
+
|
27
|
+
std::string operator()() {
|
28
|
+
std::ostringstream arg;
|
29
|
+
arg << "dbname=" << dbname << " user=" << user << " password=" << password;
|
30
|
+
return arg.str();
|
31
|
+
}
|
32
|
+
};
|
33
|
+
|
34
|
+
|
35
|
+
// USED TO GENERATE DBARG CONSTANT:
|
36
|
+
std::string make_db_argument(const std::string& dbname, const std::string& user, const std::string& password) {
|
37
|
+
database_string dbstr(dbname, user, password);
|
38
|
+
return dbstr();
|
39
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'mkmf-rice'
|
3
|
+
|
4
|
+
dir_config("rocker")
|
5
|
+
dir_config("boost")
|
6
|
+
|
7
|
+
have_library("stdc++")
|
8
|
+
have_library("pqxx")
|
9
|
+
# have_library("boost")
|
10
|
+
have_library("boost_filesystem")
|
11
|
+
if RUBY_VERSION =~ /1.9/ then
|
12
|
+
$CPPFLAGS += " -DRUBY_19"
|
13
|
+
end
|
14
|
+
|
15
|
+
create_makefile('rocker')
|
@@ -0,0 +1,64 @@
|
|
1
|
+
#include <iostream>
|
2
|
+
#include <set>
|
3
|
+
#include <vector>
|
4
|
+
#include <string>
|
5
|
+
#include <sstream>
|
6
|
+
#include <pqxx/transactor.hxx>
|
7
|
+
#include <pqxx/result.hxx>
|
8
|
+
|
9
|
+
using std::cout;
|
10
|
+
using std::cerr;
|
11
|
+
using std::endl;
|
12
|
+
using std::set;
|
13
|
+
using std::vector;
|
14
|
+
using std::string;
|
15
|
+
using std::ostringstream;
|
16
|
+
using pqxx::transactor;
|
17
|
+
using pqxx::result;
|
18
|
+
|
19
|
+
typedef unsigned int uint;
|
20
|
+
|
21
|
+
|
22
|
+
class Fetcher : public transactor <> {
|
23
|
+
public:
|
24
|
+
Fetcher() : transactor<>("Fetcher") {}
|
25
|
+
|
26
|
+
uint matrix_id;
|
27
|
+
uint experiment_id;
|
28
|
+
vector< set<uint> > known_correct;
|
29
|
+
string query;
|
30
|
+
|
31
|
+
void operator()(argument_type &T) {
|
32
|
+
result R;
|
33
|
+
query = make_known_correct_query().c_str();
|
34
|
+
|
35
|
+
try {
|
36
|
+
R = T.exec(query);
|
37
|
+
|
38
|
+
vector< set<uint> > known(R.size());
|
39
|
+
|
40
|
+
// Get the row and add it to the results set
|
41
|
+
for (result::const_iterator it = R.begin(); it != R.end(); ++it) {
|
42
|
+
uint i; uint j;
|
43
|
+
(*it)[1].to(j); // Get column
|
44
|
+
(*it)[2].to(i); // Get gene
|
45
|
+
known[j].insert(i);
|
46
|
+
}
|
47
|
+
|
48
|
+
known_correct = known;
|
49
|
+
|
50
|
+
} catch (pqxx::sql_error e) {
|
51
|
+
cerr << "SQL error in Fetcher transactor." << endl;
|
52
|
+
cerr << "Query: " << e.query() << endl;
|
53
|
+
cerr << "Error: " << e.what() << endl;
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
protected:
|
58
|
+
string make_known_correct_query() const {
|
59
|
+
ostringstream q;
|
60
|
+
q << "SELECT id, j, i FROM entries WHERE matrix_id = " << matrix_id
|
61
|
+
<< " AND type = 'Cell' ORDER BY j,i;";
|
62
|
+
return q.str();
|
63
|
+
}
|
64
|
+
};
|
@@ -0,0 +1,114 @@
|
|
1
|
+
|
2
|
+
#include <iterator>
|
3
|
+
#include <istream>
|
4
|
+
#include <iostream>
|
5
|
+
#include <sstream>
|
6
|
+
#include <string>
|
7
|
+
#include <cassert>
|
8
|
+
#include <utility>
|
9
|
+
// #include <boost/lexical_cast.hpp>
|
10
|
+
|
11
|
+
// using boost::lexical_cast;
|
12
|
+
|
13
|
+
template <class StringT = std::string>
|
14
|
+
class LineInputIterator :
|
15
|
+
public std::iterator<std::input_iterator_tag, StringT, std::ptrdiff_t, const StringT*, const StringT&>
|
16
|
+
{
|
17
|
+
public:
|
18
|
+
typedef typename StringT::value_type char_type;
|
19
|
+
typedef typename StringT::traits_type traits_type;
|
20
|
+
typedef std::basic_istream<char_type, traits_type> istream_type;
|
21
|
+
|
22
|
+
LineInputIterator() : is(NULL) { }
|
23
|
+
LineInputIterator(istream_type& is): is(&is) { }
|
24
|
+
|
25
|
+
const StringT& operator*() const { return value; }
|
26
|
+
const StringT* operator->() const { return &value; }
|
27
|
+
|
28
|
+
LineInputIterator<StringT>& operator++() {
|
29
|
+
assert(is != NULL);
|
30
|
+
if (is && !std::getline(*is, value)) {
|
31
|
+
is = NULL;
|
32
|
+
}
|
33
|
+
return *this;
|
34
|
+
}
|
35
|
+
|
36
|
+
LineInputIterator<StringT> operator++(int) {
|
37
|
+
LineInputIterator<StringT> prev(*this);
|
38
|
+
++*this;
|
39
|
+
return prev;
|
40
|
+
}
|
41
|
+
|
42
|
+
bool operator!=(const LineInputIterator<StringT>& other) const {
|
43
|
+
return is != other.is;
|
44
|
+
}
|
45
|
+
|
46
|
+
bool operator==(const LineInputIterator<StringT>& other) const {
|
47
|
+
return !(*this != other);
|
48
|
+
}
|
49
|
+
|
50
|
+
protected:
|
51
|
+
istream_type* is;
|
52
|
+
StringT value;
|
53
|
+
};
|
54
|
+
|
55
|
+
|
56
|
+
template <typename GeneT = unsigned int, typename ScoreT = double>
|
57
|
+
class GeneScoreIterator : public LineInputIterator<std::string> {
|
58
|
+
public:
|
59
|
+
typedef typename std::pair<GeneT,ScoreT> pair_type;
|
60
|
+
typedef typename std::string::value_type char_type;
|
61
|
+
typedef typename std::string::traits_type traits_type;
|
62
|
+
typedef std::basic_istream<char_type, traits_type> istream_type;
|
63
|
+
|
64
|
+
GeneScoreIterator() : is(NULL) { }
|
65
|
+
GeneScoreIterator(istream_type& is): is(&is) {
|
66
|
+
++*this; // Priming read.
|
67
|
+
}
|
68
|
+
|
69
|
+
GeneScoreIterator<GeneT,ScoreT>& operator++() {
|
70
|
+
assert(is != NULL);
|
71
|
+
|
72
|
+
if (is) {
|
73
|
+
if (std::getline(*is, value)) {
|
74
|
+
|
75
|
+
// Cast the contents of the string
|
76
|
+
std::istringstream in(value, std::istringstream::in);
|
77
|
+
in >> value_gene_score.first;
|
78
|
+
in >> value_gene_score.second;
|
79
|
+
} else {
|
80
|
+
is = NULL;
|
81
|
+
}
|
82
|
+
}
|
83
|
+
return *this;
|
84
|
+
}
|
85
|
+
|
86
|
+
GeneScoreIterator<GeneT,ScoreT> operator++(int) {
|
87
|
+
GeneScoreIterator<std::string> prev(*this);
|
88
|
+
++*this;
|
89
|
+
return prev;
|
90
|
+
}
|
91
|
+
|
92
|
+
// De-reference
|
93
|
+
const pair_type operator*() const {
|
94
|
+
return value_gene_score;
|
95
|
+
}
|
96
|
+
const pair_type* operator->() const {
|
97
|
+
return &value_gene_score;
|
98
|
+
}
|
99
|
+
|
100
|
+
bool operator!=(const GeneScoreIterator<GeneT,ScoreT>& other) const {
|
101
|
+
return is != other.is;
|
102
|
+
}
|
103
|
+
|
104
|
+
bool operator==(const GeneScoreIterator<GeneT,ScoreT>& other) const {
|
105
|
+
return !(*this != other);
|
106
|
+
}
|
107
|
+
|
108
|
+
const std::string test_value() const { return value; }
|
109
|
+
|
110
|
+
protected:
|
111
|
+
istream_type* is;
|
112
|
+
std::string value;
|
113
|
+
pair_type value_gene_score;
|
114
|
+
};
|
@@ -0,0 +1,113 @@
|
|
1
|
+
/* Rocker gem C++ extension
|
2
|
+
* Part of crossval, in the Phenolog project
|
3
|
+
* (C) John O. Woods, The Marcotte Lab, 2010
|
4
|
+
*
|
5
|
+
* Requires boost, boost_filesystem, rice (Ruby in C++ Extension), and of course
|
6
|
+
* Ruby (1.8). Use at your own risk if you're not a lab member!
|
7
|
+
*
|
8
|
+
* To compile, run irb or script/console and do:
|
9
|
+
* require 'extconf'
|
10
|
+
*
|
11
|
+
* Then leave the console, and type:
|
12
|
+
* make
|
13
|
+
*
|
14
|
+
* You will then load the module from within the ruby shell using:
|
15
|
+
* require 'rocker'
|
16
|
+
*
|
17
|
+
* Instantiate using:
|
18
|
+
* Rocker.new("dbname=crossval_production username=youruser password=yourpass", 1, 167)
|
19
|
+
*
|
20
|
+
* (That connects to the specified PostgreSQL database and autoloads matrix 1
|
21
|
+
* data. Updates will be made to experiment 167. This is just an example.)
|
22
|
+
*/
|
23
|
+
|
24
|
+
// g++ -I/usr/include -I/usr/local/include -L/usr/lib -L/usr/local/lib -lpqxx -lboost_filesystem rocker.cpp -o rocker
|
25
|
+
#include <rice/Data_Type.hpp>
|
26
|
+
#include <rice/Constructor.hpp>
|
27
|
+
|
28
|
+
#include <iostream>
|
29
|
+
#include <fstream>
|
30
|
+
#include <cstdlib>
|
31
|
+
#include <string>
|
32
|
+
using std::cout;
|
33
|
+
using std::endl;
|
34
|
+
using namespace Rice;
|
35
|
+
|
36
|
+
#include "rocker.h"
|
37
|
+
|
38
|
+
//#include "line_input_iterator.h"
|
39
|
+
|
40
|
+
typedef LineInputIterator<std::string> line_input_iterator;
|
41
|
+
|
42
|
+
|
43
|
+
extern "C"
|
44
|
+
void Init_rocker() {
|
45
|
+
|
46
|
+
// Expose Rocker class to Ruby
|
47
|
+
//database_string dbarg(DBNAME, USER, PASSWORD);
|
48
|
+
Data_Type<Rocker> rb_cRocker =
|
49
|
+
define_class<Rocker>("Rocker")
|
50
|
+
.define_constructor(Constructor<Rocker,std::string,uint,uint>())
|
51
|
+
.define_method("process_results", &Rocker::process_results)
|
52
|
+
.define_method("fetch", &Rocker::fetch, (Arg("j")))
|
53
|
+
.define_method("calculate_statistic",
|
54
|
+
&Rocker::calculate_statistic,
|
55
|
+
(Arg("j"), Arg("threshold") = (double)(0.0)))
|
56
|
+
.define_method("read_candidates", &Rocker::read_candidates, (Arg("j")))
|
57
|
+
.define_method("mean_auc", &Rocker::mean_auc);
|
58
|
+
|
59
|
+
}
|
60
|
+
|
61
|
+
extern "C"
|
62
|
+
void Init_DatabaseString() {
|
63
|
+
Data_Type<database_string> rb_cDatabaseString =
|
64
|
+
define_class<database_string>("DatabaseString")
|
65
|
+
.define_constructor(Constructor<database_string,std::string,std::string,std::string>())
|
66
|
+
.define_method("to_s", &database_string::operator());
|
67
|
+
}
|
68
|
+
|
69
|
+
// Uncomment to test -- probably unnecessary now that the Ruby extensions are
|
70
|
+
// built.
|
71
|
+
/*
|
72
|
+
int main(int argc, char* argv[]) {
|
73
|
+
database_string dbarg(DBNAME, USER, PASSWORD);
|
74
|
+
Rocker rocker(dbarg(), 1, 167);
|
75
|
+
|
76
|
+
uint j = 0, m = 0, x = 0;
|
77
|
+
cout << "Args: " << argc << endl;
|
78
|
+
if (argc < 4) {
|
79
|
+
cerr << "Please provide matrix id, experiment id, and column as arguments." << endl;
|
80
|
+
return EXIT_FAILURE;
|
81
|
+
} else {
|
82
|
+
// Read command line arguments.
|
83
|
+
m = atoi(argv[1]);
|
84
|
+
x = atoi(argv[2]);
|
85
|
+
j = atoi(argv[3]);
|
86
|
+
}
|
87
|
+
|
88
|
+
set<uint> gene_ids = rocker.fetch(42);
|
89
|
+
for (set<uint>::const_iterator i = gene_ids.begin(); i != gene_ids.end(); ++i)
|
90
|
+
cout << *i << endl;
|
91
|
+
|
92
|
+
cout << "---" << endl;
|
93
|
+
gene_score_list candidates = rocker.read_candidates(42);
|
94
|
+
for (gene_score_list::iterator i = candidates.begin(); i != candidates.end(); ++i)
|
95
|
+
cout << "hi:\t" << i->first << '\t' << i->second << endl;
|
96
|
+
|
97
|
+
cout << "---" << endl;
|
98
|
+
auc_info test = rocker.calculate_statistic(42);
|
99
|
+
cout << "auc\t=" << test.auc << endl;
|
100
|
+
cout << " tp\t=" << test.tp << endl;
|
101
|
+
cout << " fp\t=" << test.fp << endl;
|
102
|
+
cout << " tn\t=" << test.tn << endl;
|
103
|
+
cout << " fn\t=" << test.fn << endl;
|
104
|
+
|
105
|
+
cerr << "m=" << m << endl;
|
106
|
+
cerr << "x=" << x << endl;
|
107
|
+
cerr << "j=" << j << endl;
|
108
|
+
|
109
|
+
rocker.process_results();
|
110
|
+
|
111
|
+
return EXIT_SUCCESS;
|
112
|
+
}
|
113
|
+
*/
|
data/ext/rocker/rocker.h
ADDED
@@ -0,0 +1,227 @@
|
|
1
|
+
#include "constants.h"
|
2
|
+
#include "line_input_iterator.h"
|
3
|
+
#include "fetcher.h"
|
4
|
+
#include "updater.h"
|
5
|
+
#include "auc_info.h"
|
6
|
+
|
7
|
+
#include <string>
|
8
|
+
#include <utility>
|
9
|
+
#include <sstream>
|
10
|
+
#include <list>
|
11
|
+
#include <boost/filesystem.hpp>
|
12
|
+
#include <boost/filesystem/fstream.hpp>
|
13
|
+
#include <boost/lexical_cast.hpp>
|
14
|
+
|
15
|
+
using boost::filesystem::exists;
|
16
|
+
using boost::filesystem::ifstream;
|
17
|
+
using std::string;
|
18
|
+
using std::list;
|
19
|
+
using std::ostringstream;
|
20
|
+
|
21
|
+
// typedef unordered_map<uint, double> gene_score_map;
|
22
|
+
typedef list<std::pair<uint,double> > gene_score_list;
|
23
|
+
typedef GeneScoreIterator<unsigned int,double> gene_score_iterator;
|
24
|
+
|
25
|
+
|
26
|
+
string path_to_s(const boost::filesystem::path& p) {
|
27
|
+
return p.string().substr(2);
|
28
|
+
}
|
29
|
+
|
30
|
+
bool path_to_uint(const boost::filesystem::path& p, uint& n) {
|
31
|
+
using boost::lexical_cast;
|
32
|
+
using boost::bad_lexical_cast;
|
33
|
+
|
34
|
+
string s = path_to_s(p);
|
35
|
+
try {
|
36
|
+
n = lexical_cast<uint>(s);
|
37
|
+
} catch(bad_lexical_cast &) {
|
38
|
+
cerr << "Unable to read phenotype file '" << s << "': not numeric" << endl;
|
39
|
+
return false;
|
40
|
+
}
|
41
|
+
return true;
|
42
|
+
}
|
43
|
+
|
44
|
+
|
45
|
+
class Rocker {
|
46
|
+
public:
|
47
|
+
|
48
|
+
// Connect to the database and create the read transaction
|
49
|
+
Rocker(string dbarg, uint m_id, uint e_id) : c(dbarg), mean_auc_(0.0) {
|
50
|
+
// Make sure the fetcher knows which matrix to restrict queries to.
|
51
|
+
fetcher.matrix_id = m_id;
|
52
|
+
|
53
|
+
// Set up a transaction
|
54
|
+
action = new pqxx::transaction<>(c, READ_TRANSACTION);
|
55
|
+
|
56
|
+
fetcher(*action); // Perform the fetch.
|
57
|
+
|
58
|
+
delete action;
|
59
|
+
|
60
|
+
updater.experiment_id = e_id;
|
61
|
+
updater.aucs = process_results();
|
62
|
+
|
63
|
+
action = new pqxx::transaction<>(c, WRITE_TRANSACTION);
|
64
|
+
|
65
|
+
updater(*action);
|
66
|
+
|
67
|
+
delete action;
|
68
|
+
}
|
69
|
+
|
70
|
+
|
71
|
+
//
|
72
|
+
~Rocker() { }
|
73
|
+
|
74
|
+
|
75
|
+
// Return the mean AUC calculated -- requires that process_results was called,
|
76
|
+
// which happens in the constructor, so it's okay.
|
77
|
+
double mean_auc() { return mean_auc_; }
|
78
|
+
|
79
|
+
// Go through the results directory
|
80
|
+
map<uint,auc_info> process_results() {
|
81
|
+
using namespace boost::filesystem;
|
82
|
+
map<uint, auc_info> rocs;
|
83
|
+
|
84
|
+
double temp_auc_accum = 0.0; // Keep track of AUCs so we can get a mean
|
85
|
+
size_t divide_by = 1;
|
86
|
+
|
87
|
+
// Look at all files in the directory
|
88
|
+
for (basic_directory_iterator<path> jit(path(".")); jit != directory_iterator(); ++jit) {
|
89
|
+
uint j = 0;
|
90
|
+
if (path_to_uint(jit->path(), j)) {
|
91
|
+
// Read the file and calculate AUCs.
|
92
|
+
rocs[j] = calculate_statistic(j);
|
93
|
+
temp_auc_accum += rocs[j].auc;
|
94
|
+
++divide_by;
|
95
|
+
|
96
|
+
cout << "AUC: " << rocs[j].auc << endl;
|
97
|
+
}
|
98
|
+
}
|
99
|
+
|
100
|
+
// Calculate the mean AUC
|
101
|
+
if (divide_by > 0)
|
102
|
+
mean_auc_ = temp_auc_accum / (double)(divide_by);
|
103
|
+
else
|
104
|
+
mean_auc_ = 0;
|
105
|
+
|
106
|
+
return rocs;
|
107
|
+
}
|
108
|
+
|
109
|
+
|
110
|
+
// Get genes with a specific phenotype association (phenotype id = j).
|
111
|
+
set<uint> fetch(uint j) const {
|
112
|
+
return fetcher.known_correct[j];
|
113
|
+
}
|
114
|
+
|
115
|
+
|
116
|
+
// For some phenotype j, determine AUC, fp, tp, fn, tn, etc.
|
117
|
+
auc_info calculate_statistic(uint j, double threshold = 0.0) const {
|
118
|
+
set<uint> known_correct = fetch(j);
|
119
|
+
gene_score_list candidates = read_candidates(j);
|
120
|
+
//cerr << "Size of known_correct: " << known_correct.size() << endl;
|
121
|
+
|
122
|
+
// Attempted transcription of code from Ruby into C++, after having taken
|
123
|
+
// it from Python the first time.
|
124
|
+
// No guarantees!
|
125
|
+
vector<size_t> t;
|
126
|
+
t.reserve(candidates.size()+1); t.push_back(0);
|
127
|
+
vector<size_t> f = t;
|
128
|
+
|
129
|
+
auc_info result;
|
130
|
+
|
131
|
+
for (gene_score_list::const_iterator i = candidates.begin(); i != candidates.end(); ++i) {
|
132
|
+
if (known_correct.find(i->first) != known_correct.end()) {
|
133
|
+
t.push_back( *(t.rbegin()) + 1 );
|
134
|
+
f.push_back( *(f.rbegin()) );
|
135
|
+
|
136
|
+
// Update true positives / false negatives
|
137
|
+
if (i->second > threshold) result.tp++;
|
138
|
+
else result.fn++;
|
139
|
+
|
140
|
+
} else {
|
141
|
+
t.push_back( *(t.rbegin()) );
|
142
|
+
f.push_back( *(f.rbegin()) + 1 );
|
143
|
+
|
144
|
+
// Update false positives / true negatives
|
145
|
+
if (i->second > threshold) result.fp++;
|
146
|
+
else result.tn++;
|
147
|
+
}
|
148
|
+
|
149
|
+
}
|
150
|
+
|
151
|
+
vector<double> tpl; tpl.reserve(candidates.size()+1);
|
152
|
+
// vector<double> fpl = tpl;
|
153
|
+
size_t last_f = 0;
|
154
|
+
for (size_t i = 0; i < t.size(); ++i) {
|
155
|
+
if (f[i] > last_f) {
|
156
|
+
tpl.push_back(t[i]);
|
157
|
+
// fpl.push_back(f[i]);
|
158
|
+
last_f = f[i];
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
162
|
+
size_t last_t = *(t.rbegin());
|
163
|
+
double sum = 0.0;
|
164
|
+
// Divide each by the last item in that array
|
165
|
+
// Also keep track of the sum for calculating the final AUC value
|
166
|
+
for (size_t i = 0; i < tpl.size(); ++i) {
|
167
|
+
// tpl[i] /= (double)(last_t);
|
168
|
+
// fpl[i] /= (double)(last_f);
|
169
|
+
sum += tpl[i];
|
170
|
+
}
|
171
|
+
|
172
|
+
result.auc = (sum / (double)(last_t)) / (double)(tpl.size());
|
173
|
+
if (tpl.size() == 0) result.auc = 0; // prevent NaN return.
|
174
|
+
|
175
|
+
return result;
|
176
|
+
}
|
177
|
+
|
178
|
+
|
179
|
+
// Assume we're in the correct directory and read the correct phenotype file
|
180
|
+
// First two lines are comment.
|
181
|
+
// Assumes the files are pre-sorted by sortall.pl (by column 2 descending).
|
182
|
+
// column 1 is the gene, column 2 is the prediction score (higher is better).
|
183
|
+
gene_score_list read_candidates(uint j) const {
|
184
|
+
ostringstream fn; fn << j;
|
185
|
+
boost::filesystem::path filepath(fn.str());
|
186
|
+
//cerr << "Opening file: '" << filepath << "'" << endl;
|
187
|
+
|
188
|
+
if (!exists(filepath)) {
|
189
|
+
//cerr << "Error: File '" << filepath << "' does not exist." << endl;
|
190
|
+
throw;
|
191
|
+
}
|
192
|
+
|
193
|
+
gene_score_list res;
|
194
|
+
|
195
|
+
// Open a filestream
|
196
|
+
ifstream fin(filepath);
|
197
|
+
|
198
|
+
// Ignore two header lines
|
199
|
+
fin.ignore(500, '\n');
|
200
|
+
fin.ignore(500, '\n');
|
201
|
+
//cout << "Next character is: '" << fin.peek() << "'" << endl;
|
202
|
+
|
203
|
+
// Iterate through the gene-score pairs in the file
|
204
|
+
for (gene_score_iterator gsit(fin); gsit != gene_score_iterator(); ++gsit) {
|
205
|
+
//cerr << "Adding " << gsit->first << '\t' << gsit->second << endl;
|
206
|
+
res.push_back(*gsit);
|
207
|
+
}
|
208
|
+
|
209
|
+
fin.close();
|
210
|
+
|
211
|
+
return res;
|
212
|
+
}
|
213
|
+
|
214
|
+
|
215
|
+
protected:
|
216
|
+
|
217
|
+
uint matrix_id;
|
218
|
+
uint experiment_id;
|
219
|
+
uint current_j;
|
220
|
+
pqxx::connection c;
|
221
|
+
pqxx::transaction<>* action;
|
222
|
+
|
223
|
+
double mean_auc_;
|
224
|
+
|
225
|
+
Fetcher fetcher;
|
226
|
+
Updater updater;
|
227
|
+
};
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#include "auc_info.h"
|
2
|
+
|
3
|
+
#include <iostream>
|
4
|
+
#include <set>
|
5
|
+
#include <vector>
|
6
|
+
#include <string>
|
7
|
+
#include <list>
|
8
|
+
#include <map>
|
9
|
+
#include <sstream>
|
10
|
+
#include <pqxx/transactor.hxx>
|
11
|
+
#include <pqxx/result.hxx>
|
12
|
+
#include <boost/algorithm/string/join.hpp>
|
13
|
+
|
14
|
+
using std::cout;
|
15
|
+
using std::cerr;
|
16
|
+
using std::endl;
|
17
|
+
using std::set;
|
18
|
+
using std::vector;
|
19
|
+
using std::string;
|
20
|
+
using std::list;
|
21
|
+
using std::map;
|
22
|
+
using std::ostringstream;
|
23
|
+
using pqxx::transactor;
|
24
|
+
using pqxx::result;
|
25
|
+
using boost::algorithm::join;
|
26
|
+
|
27
|
+
typedef unsigned int uint;
|
28
|
+
|
29
|
+
|
30
|
+
//std::string join(const SequenceT<std::string>& strings, std::string join_str = "") {
|
31
|
+
// std::ostringstream o;
|
32
|
+
//
|
33
|
+
// SequenceT<string>::const_iterator i = strings.begin();
|
34
|
+
// o << *i;
|
35
|
+
// ++i;
|
36
|
+
//
|
37
|
+
// for (; i != strings.end(); ++i) {
|
38
|
+
// o << join_str << *i;
|
39
|
+
// }
|
40
|
+
//
|
41
|
+
// return o.str();
|
42
|
+
//}
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
class Updater : public transactor <> {
|
47
|
+
public:
|
48
|
+
Updater() : transactor<>("Updater") {}
|
49
|
+
|
50
|
+
uint experiment_id;
|
51
|
+
map<uint,auc_info> aucs;
|
52
|
+
string query;
|
53
|
+
|
54
|
+
void operator()(argument_type &T) {
|
55
|
+
result R;
|
56
|
+
|
57
|
+
if (aucs.size() == 0) {
|
58
|
+
cerr << "No updates necessary." << endl;
|
59
|
+
return;
|
60
|
+
}
|
61
|
+
|
62
|
+
query = make_known_correct_query().c_str();
|
63
|
+
|
64
|
+
try {
|
65
|
+
//R = T.exec(query);
|
66
|
+
cout << "Query:" << endl;
|
67
|
+
cout << query << endl;
|
68
|
+
} catch (pqxx::sql_error e) {
|
69
|
+
cerr << "SQL error in Fetcher transactor." << endl;
|
70
|
+
cerr << "Query: " << e.query() << endl;
|
71
|
+
cerr << "Error: " << e.what() << endl;
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
protected:
|
76
|
+
string make_known_correct_query() const {
|
77
|
+
ostringstream q;
|
78
|
+
q << "INSERT INTO rocs " << AUC_COLUMNS << " VALUES \n";
|
79
|
+
list<string> insertions;
|
80
|
+
for (map<uint,auc_info>::const_iterator i = aucs.begin(); i != aucs.end(); ++i) {
|
81
|
+
insertions.push_back( i->second.entry(experiment_id, i->first) );
|
82
|
+
}
|
83
|
+
q << join(insertions, ",\n") << ';';
|
84
|
+
return q.str();
|
85
|
+
}
|
86
|
+
};
|
data/lib/rocker.rb
ADDED
data/script/console
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# File: script/console
|
3
|
+
irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
|
4
|
+
|
5
|
+
libs = " -r irb/completion"
|
6
|
+
# Perhaps use a console_lib to store any extra methods I may want available in the cosole
|
7
|
+
# libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
|
8
|
+
libs << " -r #{File.dirname(__FILE__) + '/../lib/rocker.rb'}"
|
9
|
+
puts "Loading rocker gem"
|
10
|
+
exec "#{irb} #{libs} --simple-prompt"
|
data/script/destroy
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/destroy'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/generate'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Generate.new.run(ARGV)
|
data/test/test_helper.rb
ADDED
data/test/test_rocker.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
|
3
|
+
$:.unshift File.dirname(__FILE__) + "/../ext/rocker"
|
4
|
+
require "rocker.so"
|
5
|
+
|
6
|
+
def database_string dbn = "crossval_development", u = "jwoods", p = "youwish1"
|
7
|
+
"dbname=#{dbn} user=#{u} password=#{p}"
|
8
|
+
end
|
9
|
+
|
10
|
+
class TestRockerExtn < Test::Unit::TestCase
|
11
|
+
def test_working
|
12
|
+
t = Rocker.new(database_string, 1, 167)
|
13
|
+
x = t.mean_auc
|
14
|
+
puts "Mean AUC was #{x}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_version_string
|
18
|
+
puts Rocker::VERSION
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rocker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 4
|
9
|
+
version: 0.0.4
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- John Woods
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-04-23 00:00:00 -05:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rubyforge
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 2
|
29
|
+
- 0
|
30
|
+
- 4
|
31
|
+
version: 2.0.4
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: hoe
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 2
|
43
|
+
- 6
|
44
|
+
- 0
|
45
|
+
version: 2.6.0
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id002
|
48
|
+
description: |-
|
49
|
+
This gem is very fast C++ code for calculating AUCs on results of cross-validation.
|
50
|
+
|
51
|
+
It is specific to the crossval database schema, which has not been released yet.
|
52
|
+
|
53
|
+
Chances are you will not find this very useful unless you are the author.
|
54
|
+
|
55
|
+
It is in gem form to ensure that each lab machine can compile its own arch-specific
|
56
|
+
version.
|
57
|
+
email:
|
58
|
+
- john.woods@marcottelab.org
|
59
|
+
executables: []
|
60
|
+
|
61
|
+
extensions:
|
62
|
+
- ext/rocker/extconf.rb
|
63
|
+
extra_rdoc_files:
|
64
|
+
- History.txt
|
65
|
+
- Manifest.txt
|
66
|
+
- PostInstall.txt
|
67
|
+
files:
|
68
|
+
- History.txt
|
69
|
+
- Manifest.txt
|
70
|
+
- PostInstall.txt
|
71
|
+
- README.rdoc
|
72
|
+
- Rakefile
|
73
|
+
- lib/rocker.rb
|
74
|
+
- script/console
|
75
|
+
- script/destroy
|
76
|
+
- script/generate
|
77
|
+
- test/test_helper.rb
|
78
|
+
- test/test_rocker.rb
|
79
|
+
- ext/rocker/extconf.rb
|
80
|
+
- ext/rocker/auc_info.h
|
81
|
+
- ext/rocker/constants.h
|
82
|
+
- ext/rocker/fetcher.h
|
83
|
+
- ext/rocker/updater.h
|
84
|
+
- ext/rocker/line_input_iterator.h
|
85
|
+
- ext/rocker/rocker.h
|
86
|
+
- ext/rocker/rocker.cpp
|
87
|
+
has_rdoc: true
|
88
|
+
homepage: http://github.com/MarcotteLabGit/rocker
|
89
|
+
licenses: []
|
90
|
+
|
91
|
+
post_install_message: PostInstall.txt
|
92
|
+
rdoc_options:
|
93
|
+
- --main
|
94
|
+
- README.rdoc
|
95
|
+
require_paths:
|
96
|
+
- lib
|
97
|
+
- ext/rocker
|
98
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
segments:
|
103
|
+
- 0
|
104
|
+
version: "0"
|
105
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
segments:
|
110
|
+
- 0
|
111
|
+
version: "0"
|
112
|
+
requirements: []
|
113
|
+
|
114
|
+
rubyforge_project: rocker
|
115
|
+
rubygems_version: 1.3.6
|
116
|
+
signing_key:
|
117
|
+
specification_version: 3
|
118
|
+
summary: This gem is very fast C++ code for calculating AUCs on results of cross-validation
|
119
|
+
test_files:
|
120
|
+
- test/test_helper.rb
|
121
|
+
- test/test_rocker.rb
|
122
|
+
- test/test_rocker_extn.rb
|