rocker 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +19 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +57 -0
- data/Rakefile +25 -0
- data/ext/rocker/auc_info.h +51 -0
- data/ext/rocker/constants.h +39 -0
- data/ext/rocker/extconf.rb +15 -0
- data/ext/rocker/fetcher.h +64 -0
- data/ext/rocker/line_input_iterator.h +114 -0
- data/ext/rocker/rocker.cpp +113 -0
- data/ext/rocker/rocker.h +227 -0
- data/ext/rocker/updater.h +86 -0
- data/lib/rocker.rb +8 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_rocker.rb +11 -0
- data/test/test_rocker_extn.rb +21 -0
- metadata +122 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
PostInstall.txt
|
4
|
+
README.rdoc
|
5
|
+
Rakefile
|
6
|
+
lib/rocker.rb
|
7
|
+
script/console
|
8
|
+
script/destroy
|
9
|
+
script/generate
|
10
|
+
test/test_helper.rb
|
11
|
+
test/test_rocker.rb
|
12
|
+
ext/rocker/extconf.rb
|
13
|
+
ext/rocker/auc_info.h
|
14
|
+
ext/rocker/constants.h
|
15
|
+
ext/rocker/fetcher.h
|
16
|
+
ext/rocker/updater.h
|
17
|
+
ext/rocker/line_input_iterator.h
|
18
|
+
ext/rocker/rocker.h
|
19
|
+
ext/rocker/rocker.cpp
|
data/PostInstall.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
= rocker
|
2
|
+
|
3
|
+
* http://github.com/MarcotteLabGit/rocker
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
This gem is very fast C++ code for calculating AUCs on results of cross-validation.
|
8
|
+
|
9
|
+
It is specific to the crossval database schema, which has not been released yet.
|
10
|
+
|
11
|
+
Chances are you will not find this very useful unless you are the author.
|
12
|
+
|
13
|
+
It is in gem form to ensure that each lab machine can compile its own arch-specific
|
14
|
+
version.
|
15
|
+
|
16
|
+
== FEATURES/PROBLEMS:
|
17
|
+
|
18
|
+
* There is no real reason this couldn't work for other schemas, but an adapter
|
19
|
+
would have to be generated. I have no motivation to do this, but if you are
|
20
|
+
interested, please feel free to get in touch.
|
21
|
+
|
22
|
+
== SYNOPSIS:
|
23
|
+
|
24
|
+
FIX (code sample of usage)
|
25
|
+
|
26
|
+
== REQUIREMENTS:
|
27
|
+
|
28
|
+
* crossval
|
29
|
+
|
30
|
+
== INSTALL:
|
31
|
+
|
32
|
+
* sudo gem install rocker, most likely, but I haven't tried it yet.
|
33
|
+
|
34
|
+
== LICENSE:
|
35
|
+
|
36
|
+
(The MIT License)
|
37
|
+
|
38
|
+
Copyright (c) 2010 FIXME full name
|
39
|
+
|
40
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
41
|
+
a copy of this software and associated documentation files (the
|
42
|
+
'Software'), to deal in the Software without restriction, including
|
43
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
44
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
45
|
+
permit persons to whom the Software is furnished to do so, subject to
|
46
|
+
the following conditions:
|
47
|
+
|
48
|
+
The above copyright notice and this permission notice shall be
|
49
|
+
included in all copies or substantial portions of the Software.
|
50
|
+
|
51
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
52
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
53
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
54
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
55
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
56
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
57
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'hoe', '>= 2.1.0'
|
3
|
+
require 'hoe'
|
4
|
+
require 'fileutils'
|
5
|
+
require './lib/rocker'
|
6
|
+
|
7
|
+
Hoe.plugin :newgem
|
8
|
+
# Hoe.plugin :website
|
9
|
+
# Hoe.plugin :cucumberfeatures
|
10
|
+
|
11
|
+
# Generate all the Rake tasks
|
12
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
13
|
+
$hoe = Hoe.spec 'rocker' do
|
14
|
+
self.developer 'John Woods', 'john.woods@marcottelab.org'
|
15
|
+
self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
|
16
|
+
# self.extra_deps = [['activesupport','>= 2.0.2']]
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
require 'newgem/tasks'
|
21
|
+
Dir['tasks/**/*.rake'].each { |t| load t }
|
22
|
+
|
23
|
+
# TODO - want other tests/tasks run by default? Add them to the list
|
24
|
+
# remove_task :default
|
25
|
+
# task :default => [:spec, :features]
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#ifndef AUC_INFO_H
|
2
|
+
# define AUC_INFO_H
|
3
|
+
|
4
|
+
#include <string>
|
5
|
+
#include <sstream>
|
6
|
+
#include <iostream>
|
7
|
+
|
8
|
+
typedef unsigned int uint;
|
9
|
+
|
10
|
+
|
11
|
+
using std::ostringstream;
|
12
|
+
using std::string;
|
13
|
+
using std::ostream;
|
14
|
+
|
15
|
+
const string AUC_COLUMNS = "(experiment_id, column, auc, true_positives, false_positives, true_negatives, false_negatives)";
|
16
|
+
|
17
|
+
class auc_info {
|
18
|
+
public:
|
19
|
+
double auc;
|
20
|
+
uint tp;
|
21
|
+
uint fp;
|
22
|
+
uint tn;
|
23
|
+
uint fn;
|
24
|
+
|
25
|
+
// Constructor
|
26
|
+
auc_info(double area_under_curve = 0, uint true_positives = 0, uint false_positives = 0, uint true_negatives = 0, uint false_negatives = 0)
|
27
|
+
: auc(area_under_curve), tp(true_positives), fp(false_positives), tn(true_negatives), fn(false_negatives) { }
|
28
|
+
|
29
|
+
~auc_info() { }
|
30
|
+
|
31
|
+
// Convert to a portion of a SQL insertion (string)
|
32
|
+
string to_s() const {
|
33
|
+
ostringstream s;
|
34
|
+
s << auc << ", " << tp << ", " << fp << ", " << tn << ", " << fn;
|
35
|
+
return s.str();
|
36
|
+
}
|
37
|
+
|
38
|
+
string entry(uint experiment_id, uint j) const {
|
39
|
+
ostringstream s;
|
40
|
+
s << '(' << experiment_id << ", " << j << ", " << to_s() << ')';
|
41
|
+
return s.str();
|
42
|
+
}
|
43
|
+
};
|
44
|
+
|
45
|
+
// Probably not necessary.
|
46
|
+
std::ostream& operator<<(std::ostream& out, const auc_info& rhs) {
|
47
|
+
out << rhs.to_s();
|
48
|
+
return out;
|
49
|
+
}
|
50
|
+
|
51
|
+
#endif
|
@@ -0,0 +1,39 @@
|
|
1
|
+
// DATABASE CONSTANTS AND INCLUDES
|
2
|
+
#include <string>
|
3
|
+
#include <sstream>
|
4
|
+
#include <pqxx/connection.hxx>
|
5
|
+
#include <pqxx/transaction.hxx>
|
6
|
+
|
7
|
+
|
8
|
+
// DATABASE CONSTANTS
|
9
|
+
const std::string DBNAME = "crossval_development";
|
10
|
+
const std::string USER = "jwoods";
|
11
|
+
const std::string PASSWORD = "youwish1";
|
12
|
+
const std::string READ_TRANSACTION = "ReadTransaction";
|
13
|
+
const std::string WRITE_TRANSACTION = "WriteTransaction";
|
14
|
+
|
15
|
+
// RESULTS CONSTANTS
|
16
|
+
const std::string ROCKER_VERSION = "0.0.1";
|
17
|
+
|
18
|
+
class database_string {
|
19
|
+
public:
|
20
|
+
database_string(std::string dbn = DBNAME, std::string u = USER, std::string p = PASSWORD) : dbname(dbn), user(u), password(p) { }
|
21
|
+
~database_string() { }
|
22
|
+
|
23
|
+
std::string dbname;
|
24
|
+
std::string user;
|
25
|
+
std::string password;
|
26
|
+
|
27
|
+
std::string operator()() {
|
28
|
+
std::ostringstream arg;
|
29
|
+
arg << "dbname=" << dbname << " user=" << user << " password=" << password;
|
30
|
+
return arg.str();
|
31
|
+
}
|
32
|
+
};
|
33
|
+
|
34
|
+
|
35
|
+
// USED TO GENERATE DBARG CONSTANT:
|
36
|
+
std::string make_db_argument(const std::string& dbname, const std::string& user, const std::string& password) {
|
37
|
+
database_string dbstr(dbname, user, password);
|
38
|
+
return dbstr();
|
39
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'mkmf-rice'
|
3
|
+
|
4
|
+
dir_config("rocker")
|
5
|
+
dir_config("boost")
|
6
|
+
|
7
|
+
have_library("stdc++")
|
8
|
+
have_library("pqxx")
|
9
|
+
# have_library("boost")
|
10
|
+
have_library("boost_filesystem")
|
11
|
+
if RUBY_VERSION =~ /1.9/ then
|
12
|
+
$CPPFLAGS += " -DRUBY_19"
|
13
|
+
end
|
14
|
+
|
15
|
+
create_makefile('rocker')
|
@@ -0,0 +1,64 @@
|
|
1
|
+
#include <iostream>
|
2
|
+
#include <set>
|
3
|
+
#include <vector>
|
4
|
+
#include <string>
|
5
|
+
#include <sstream>
|
6
|
+
#include <pqxx/transactor.hxx>
|
7
|
+
#include <pqxx/result.hxx>
|
8
|
+
|
9
|
+
using std::cout;
|
10
|
+
using std::cerr;
|
11
|
+
using std::endl;
|
12
|
+
using std::set;
|
13
|
+
using std::vector;
|
14
|
+
using std::string;
|
15
|
+
using std::ostringstream;
|
16
|
+
using pqxx::transactor;
|
17
|
+
using pqxx::result;
|
18
|
+
|
19
|
+
typedef unsigned int uint;
|
20
|
+
|
21
|
+
|
22
|
+
class Fetcher : public transactor <> {
|
23
|
+
public:
|
24
|
+
Fetcher() : transactor<>("Fetcher") {}
|
25
|
+
|
26
|
+
uint matrix_id;
|
27
|
+
uint experiment_id;
|
28
|
+
vector< set<uint> > known_correct;
|
29
|
+
string query;
|
30
|
+
|
31
|
+
void operator()(argument_type &T) {
|
32
|
+
result R;
|
33
|
+
query = make_known_correct_query().c_str();
|
34
|
+
|
35
|
+
try {
|
36
|
+
R = T.exec(query);
|
37
|
+
|
38
|
+
vector< set<uint> > known(R.size());
|
39
|
+
|
40
|
+
// Get the row and add it to the results set
|
41
|
+
for (result::const_iterator it = R.begin(); it != R.end(); ++it) {
|
42
|
+
uint i; uint j;
|
43
|
+
(*it)[1].to(j); // Get column
|
44
|
+
(*it)[2].to(i); // Get gene
|
45
|
+
known[j].insert(i);
|
46
|
+
}
|
47
|
+
|
48
|
+
known_correct = known;
|
49
|
+
|
50
|
+
} catch (pqxx::sql_error e) {
|
51
|
+
cerr << "SQL error in Fetcher transactor." << endl;
|
52
|
+
cerr << "Query: " << e.query() << endl;
|
53
|
+
cerr << "Error: " << e.what() << endl;
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
protected:
|
58
|
+
string make_known_correct_query() const {
|
59
|
+
ostringstream q;
|
60
|
+
q << "SELECT id, j, i FROM entries WHERE matrix_id = " << matrix_id
|
61
|
+
<< " AND type = 'Cell' ORDER BY j,i;";
|
62
|
+
return q.str();
|
63
|
+
}
|
64
|
+
};
|
@@ -0,0 +1,114 @@
|
|
1
|
+
|
2
|
+
#include <iterator>
|
3
|
+
#include <istream>
|
4
|
+
#include <iostream>
|
5
|
+
#include <sstream>
|
6
|
+
#include <string>
|
7
|
+
#include <cassert>
|
8
|
+
#include <utility>
|
9
|
+
// #include <boost/lexical_cast.hpp>
|
10
|
+
|
11
|
+
// using boost::lexical_cast;
|
12
|
+
|
13
|
+
template <class StringT = std::string>
|
14
|
+
class LineInputIterator :
|
15
|
+
public std::iterator<std::input_iterator_tag, StringT, std::ptrdiff_t, const StringT*, const StringT&>
|
16
|
+
{
|
17
|
+
public:
|
18
|
+
typedef typename StringT::value_type char_type;
|
19
|
+
typedef typename StringT::traits_type traits_type;
|
20
|
+
typedef std::basic_istream<char_type, traits_type> istream_type;
|
21
|
+
|
22
|
+
LineInputIterator() : is(NULL) { }
|
23
|
+
LineInputIterator(istream_type& is): is(&is) { }
|
24
|
+
|
25
|
+
const StringT& operator*() const { return value; }
|
26
|
+
const StringT* operator->() const { return &value; }
|
27
|
+
|
28
|
+
LineInputIterator<StringT>& operator++() {
|
29
|
+
assert(is != NULL);
|
30
|
+
if (is && !std::getline(*is, value)) {
|
31
|
+
is = NULL;
|
32
|
+
}
|
33
|
+
return *this;
|
34
|
+
}
|
35
|
+
|
36
|
+
LineInputIterator<StringT> operator++(int) {
|
37
|
+
LineInputIterator<StringT> prev(*this);
|
38
|
+
++*this;
|
39
|
+
return prev;
|
40
|
+
}
|
41
|
+
|
42
|
+
bool operator!=(const LineInputIterator<StringT>& other) const {
|
43
|
+
return is != other.is;
|
44
|
+
}
|
45
|
+
|
46
|
+
bool operator==(const LineInputIterator<StringT>& other) const {
|
47
|
+
return !(*this != other);
|
48
|
+
}
|
49
|
+
|
50
|
+
protected:
|
51
|
+
istream_type* is;
|
52
|
+
StringT value;
|
53
|
+
};
|
54
|
+
|
55
|
+
|
56
|
+
template <typename GeneT = unsigned int, typename ScoreT = double>
|
57
|
+
class GeneScoreIterator : public LineInputIterator<std::string> {
|
58
|
+
public:
|
59
|
+
typedef typename std::pair<GeneT,ScoreT> pair_type;
|
60
|
+
typedef typename std::string::value_type char_type;
|
61
|
+
typedef typename std::string::traits_type traits_type;
|
62
|
+
typedef std::basic_istream<char_type, traits_type> istream_type;
|
63
|
+
|
64
|
+
GeneScoreIterator() : is(NULL) { }
|
65
|
+
GeneScoreIterator(istream_type& is): is(&is) {
|
66
|
+
++*this; // Priming read.
|
67
|
+
}
|
68
|
+
|
69
|
+
GeneScoreIterator<GeneT,ScoreT>& operator++() {
|
70
|
+
assert(is != NULL);
|
71
|
+
|
72
|
+
if (is) {
|
73
|
+
if (std::getline(*is, value)) {
|
74
|
+
|
75
|
+
// Cast the contents of the string
|
76
|
+
std::istringstream in(value, std::istringstream::in);
|
77
|
+
in >> value_gene_score.first;
|
78
|
+
in >> value_gene_score.second;
|
79
|
+
} else {
|
80
|
+
is = NULL;
|
81
|
+
}
|
82
|
+
}
|
83
|
+
return *this;
|
84
|
+
}
|
85
|
+
|
86
|
+
GeneScoreIterator<GeneT,ScoreT> operator++(int) {
|
87
|
+
GeneScoreIterator<std::string> prev(*this);
|
88
|
+
++*this;
|
89
|
+
return prev;
|
90
|
+
}
|
91
|
+
|
92
|
+
// De-reference
|
93
|
+
const pair_type operator*() const {
|
94
|
+
return value_gene_score;
|
95
|
+
}
|
96
|
+
const pair_type* operator->() const {
|
97
|
+
return &value_gene_score;
|
98
|
+
}
|
99
|
+
|
100
|
+
bool operator!=(const GeneScoreIterator<GeneT,ScoreT>& other) const {
|
101
|
+
return is != other.is;
|
102
|
+
}
|
103
|
+
|
104
|
+
bool operator==(const GeneScoreIterator<GeneT,ScoreT>& other) const {
|
105
|
+
return !(*this != other);
|
106
|
+
}
|
107
|
+
|
108
|
+
const std::string test_value() const { return value; }
|
109
|
+
|
110
|
+
protected:
|
111
|
+
istream_type* is;
|
112
|
+
std::string value;
|
113
|
+
pair_type value_gene_score;
|
114
|
+
};
|
@@ -0,0 +1,113 @@
|
|
1
|
+
/* Rocker gem C++ extension
|
2
|
+
* Part of crossval, in the Phenolog project
|
3
|
+
* (C) John O. Woods, The Marcotte Lab, 2010
|
4
|
+
*
|
5
|
+
* Requires boost, boost_filesystem, rice (Ruby in C++ Extension), and of course
|
6
|
+
* Ruby (1.8). Use at your own risk if you're not a lab member!
|
7
|
+
*
|
8
|
+
* To compile, run irb or script/console and do:
|
9
|
+
* require 'extconf'
|
10
|
+
*
|
11
|
+
* Then leave the console, and type:
|
12
|
+
* make
|
13
|
+
*
|
14
|
+
* You will then load the module from within the ruby shell using:
|
15
|
+
* require 'rocker'
|
16
|
+
*
|
17
|
+
* Instantiate using:
|
18
|
+
* Rocker.new("dbname=crossval_production username=youruser password=yourpass", 1, 167)
|
19
|
+
*
|
20
|
+
* (That connects to the specified PostgreSQL database and autoloads matrix 1
|
21
|
+
* data. Updates will be made to experiment 167. This is just an example.)
|
22
|
+
*/
|
23
|
+
|
24
|
+
// g++ -I/usr/include -I/usr/local/include -L/usr/lib -L/usr/local/lib -lpqxx -lboost_filesystem rocker.cpp -o rocker
|
25
|
+
#include <rice/Data_Type.hpp>
|
26
|
+
#include <rice/Constructor.hpp>
|
27
|
+
|
28
|
+
#include <iostream>
|
29
|
+
#include <fstream>
|
30
|
+
#include <cstdlib>
|
31
|
+
#include <string>
|
32
|
+
using std::cout;
|
33
|
+
using std::endl;
|
34
|
+
using namespace Rice;
|
35
|
+
|
36
|
+
#include "rocker.h"
|
37
|
+
|
38
|
+
//#include "line_input_iterator.h"
|
39
|
+
|
40
|
+
typedef LineInputIterator<std::string> line_input_iterator;
|
41
|
+
|
42
|
+
|
43
|
+
extern "C"
|
44
|
+
void Init_rocker() {
|
45
|
+
|
46
|
+
// Expose Rocker class to Ruby
|
47
|
+
//database_string dbarg(DBNAME, USER, PASSWORD);
|
48
|
+
Data_Type<Rocker> rb_cRocker =
|
49
|
+
define_class<Rocker>("Rocker")
|
50
|
+
.define_constructor(Constructor<Rocker,std::string,uint,uint>())
|
51
|
+
.define_method("process_results", &Rocker::process_results)
|
52
|
+
.define_method("fetch", &Rocker::fetch, (Arg("j")))
|
53
|
+
.define_method("calculate_statistic",
|
54
|
+
&Rocker::calculate_statistic,
|
55
|
+
(Arg("j"), Arg("threshold") = (double)(0.0)))
|
56
|
+
.define_method("read_candidates", &Rocker::read_candidates, (Arg("j")))
|
57
|
+
.define_method("mean_auc", &Rocker::mean_auc);
|
58
|
+
|
59
|
+
}
|
60
|
+
|
61
|
+
extern "C"
|
62
|
+
void Init_DatabaseString() {
|
63
|
+
Data_Type<database_string> rb_cDatabaseString =
|
64
|
+
define_class<database_string>("DatabaseString")
|
65
|
+
.define_constructor(Constructor<database_string,std::string,std::string,std::string>())
|
66
|
+
.define_method("to_s", &database_string::operator());
|
67
|
+
}
|
68
|
+
|
69
|
+
// Uncomment to test -- probably unnecessary now that the Ruby extensions are
|
70
|
+
// built.
|
71
|
+
/*
|
72
|
+
int main(int argc, char* argv[]) {
|
73
|
+
database_string dbarg(DBNAME, USER, PASSWORD);
|
74
|
+
Rocker rocker(dbarg(), 1, 167);
|
75
|
+
|
76
|
+
uint j = 0, m = 0, x = 0;
|
77
|
+
cout << "Args: " << argc << endl;
|
78
|
+
if (argc < 4) {
|
79
|
+
cerr << "Please provide matrix id, experiment id, and column as arguments." << endl;
|
80
|
+
return EXIT_FAILURE;
|
81
|
+
} else {
|
82
|
+
// Read command line arguments.
|
83
|
+
m = atoi(argv[1]);
|
84
|
+
x = atoi(argv[2]);
|
85
|
+
j = atoi(argv[3]);
|
86
|
+
}
|
87
|
+
|
88
|
+
set<uint> gene_ids = rocker.fetch(42);
|
89
|
+
for (set<uint>::const_iterator i = gene_ids.begin(); i != gene_ids.end(); ++i)
|
90
|
+
cout << *i << endl;
|
91
|
+
|
92
|
+
cout << "---" << endl;
|
93
|
+
gene_score_list candidates = rocker.read_candidates(42);
|
94
|
+
for (gene_score_list::iterator i = candidates.begin(); i != candidates.end(); ++i)
|
95
|
+
cout << "hi:\t" << i->first << '\t' << i->second << endl;
|
96
|
+
|
97
|
+
cout << "---" << endl;
|
98
|
+
auc_info test = rocker.calculate_statistic(42);
|
99
|
+
cout << "auc\t=" << test.auc << endl;
|
100
|
+
cout << " tp\t=" << test.tp << endl;
|
101
|
+
cout << " fp\t=" << test.fp << endl;
|
102
|
+
cout << " tn\t=" << test.tn << endl;
|
103
|
+
cout << " fn\t=" << test.fn << endl;
|
104
|
+
|
105
|
+
cerr << "m=" << m << endl;
|
106
|
+
cerr << "x=" << x << endl;
|
107
|
+
cerr << "j=" << j << endl;
|
108
|
+
|
109
|
+
rocker.process_results();
|
110
|
+
|
111
|
+
return EXIT_SUCCESS;
|
112
|
+
}
|
113
|
+
*/
|
data/ext/rocker/rocker.h
ADDED
@@ -0,0 +1,227 @@
|
|
1
|
+
#include "constants.h"
|
2
|
+
#include "line_input_iterator.h"
|
3
|
+
#include "fetcher.h"
|
4
|
+
#include "updater.h"
|
5
|
+
#include "auc_info.h"
|
6
|
+
|
7
|
+
#include <string>
|
8
|
+
#include <utility>
|
9
|
+
#include <sstream>
|
10
|
+
#include <list>
|
11
|
+
#include <boost/filesystem.hpp>
|
12
|
+
#include <boost/filesystem/fstream.hpp>
|
13
|
+
#include <boost/lexical_cast.hpp>
|
14
|
+
|
15
|
+
using boost::filesystem::exists;
|
16
|
+
using boost::filesystem::ifstream;
|
17
|
+
using std::string;
|
18
|
+
using std::list;
|
19
|
+
using std::ostringstream;
|
20
|
+
|
21
|
+
// typedef unordered_map<uint, double> gene_score_map;
|
22
|
+
typedef list<std::pair<uint,double> > gene_score_list;
|
23
|
+
typedef GeneScoreIterator<unsigned int,double> gene_score_iterator;
|
24
|
+
|
25
|
+
|
26
|
+
string path_to_s(const boost::filesystem::path& p) {
|
27
|
+
return p.string().substr(2);
|
28
|
+
}
|
29
|
+
|
30
|
+
bool path_to_uint(const boost::filesystem::path& p, uint& n) {
|
31
|
+
using boost::lexical_cast;
|
32
|
+
using boost::bad_lexical_cast;
|
33
|
+
|
34
|
+
string s = path_to_s(p);
|
35
|
+
try {
|
36
|
+
n = lexical_cast<uint>(s);
|
37
|
+
} catch(bad_lexical_cast &) {
|
38
|
+
cerr << "Unable to read phenotype file '" << s << "': not numeric" << endl;
|
39
|
+
return false;
|
40
|
+
}
|
41
|
+
return true;
|
42
|
+
}
|
43
|
+
|
44
|
+
|
45
|
+
class Rocker {
|
46
|
+
public:
|
47
|
+
|
48
|
+
// Connect to the database and create the read transaction
|
49
|
+
Rocker(string dbarg, uint m_id, uint e_id) : c(dbarg), mean_auc_(0.0) {
|
50
|
+
// Make sure the fetcher knows which matrix to restrict queries to.
|
51
|
+
fetcher.matrix_id = m_id;
|
52
|
+
|
53
|
+
// Set up a transaction
|
54
|
+
action = new pqxx::transaction<>(c, READ_TRANSACTION);
|
55
|
+
|
56
|
+
fetcher(*action); // Perform the fetch.
|
57
|
+
|
58
|
+
delete action;
|
59
|
+
|
60
|
+
updater.experiment_id = e_id;
|
61
|
+
updater.aucs = process_results();
|
62
|
+
|
63
|
+
action = new pqxx::transaction<>(c, WRITE_TRANSACTION);
|
64
|
+
|
65
|
+
updater(*action);
|
66
|
+
|
67
|
+
delete action;
|
68
|
+
}
|
69
|
+
|
70
|
+
|
71
|
+
//
|
72
|
+
~Rocker() { }
|
73
|
+
|
74
|
+
|
75
|
+
// Return the mean AUC calculated -- requires that process_results was called,
|
76
|
+
// which happens in the constructor, so it's okay.
|
77
|
+
double mean_auc() { return mean_auc_; }
|
78
|
+
|
79
|
+
// Go through the results directory
|
80
|
+
map<uint,auc_info> process_results() {
|
81
|
+
using namespace boost::filesystem;
|
82
|
+
map<uint, auc_info> rocs;
|
83
|
+
|
84
|
+
double temp_auc_accum = 0.0; // Keep track of AUCs so we can get a mean
|
85
|
+
size_t divide_by = 1;
|
86
|
+
|
87
|
+
// Look at all files in the directory
|
88
|
+
for (basic_directory_iterator<path> jit(path(".")); jit != directory_iterator(); ++jit) {
|
89
|
+
uint j = 0;
|
90
|
+
if (path_to_uint(jit->path(), j)) {
|
91
|
+
// Read the file and calculate AUCs.
|
92
|
+
rocs[j] = calculate_statistic(j);
|
93
|
+
temp_auc_accum += rocs[j].auc;
|
94
|
+
++divide_by;
|
95
|
+
|
96
|
+
cout << "AUC: " << rocs[j].auc << endl;
|
97
|
+
}
|
98
|
+
}
|
99
|
+
|
100
|
+
// Calculate the mean AUC
|
101
|
+
if (divide_by > 0)
|
102
|
+
mean_auc_ = temp_auc_accum / (double)(divide_by);
|
103
|
+
else
|
104
|
+
mean_auc_ = 0;
|
105
|
+
|
106
|
+
return rocs;
|
107
|
+
}
|
108
|
+
|
109
|
+
|
110
|
+
// Get genes with a specific phenotype association (phenotype id = j).
|
111
|
+
set<uint> fetch(uint j) const {
|
112
|
+
return fetcher.known_correct[j];
|
113
|
+
}
|
114
|
+
|
115
|
+
|
116
|
+
// For some phenotype j, determine AUC, fp, tp, fn, tn, etc.
|
117
|
+
auc_info calculate_statistic(uint j, double threshold = 0.0) const {
|
118
|
+
set<uint> known_correct = fetch(j);
|
119
|
+
gene_score_list candidates = read_candidates(j);
|
120
|
+
//cerr << "Size of known_correct: " << known_correct.size() << endl;
|
121
|
+
|
122
|
+
// Attempted transcription of code from Ruby into C++, after having taken
|
123
|
+
// it from Python the first time.
|
124
|
+
// No guarantees!
|
125
|
+
vector<size_t> t;
|
126
|
+
t.reserve(candidates.size()+1); t.push_back(0);
|
127
|
+
vector<size_t> f = t;
|
128
|
+
|
129
|
+
auc_info result;
|
130
|
+
|
131
|
+
for (gene_score_list::const_iterator i = candidates.begin(); i != candidates.end(); ++i) {
|
132
|
+
if (known_correct.find(i->first) != known_correct.end()) {
|
133
|
+
t.push_back( *(t.rbegin()) + 1 );
|
134
|
+
f.push_back( *(f.rbegin()) );
|
135
|
+
|
136
|
+
// Update true positives / false negatives
|
137
|
+
if (i->second > threshold) result.tp++;
|
138
|
+
else result.fn++;
|
139
|
+
|
140
|
+
} else {
|
141
|
+
t.push_back( *(t.rbegin()) );
|
142
|
+
f.push_back( *(f.rbegin()) + 1 );
|
143
|
+
|
144
|
+
// Update false positives / true negatives
|
145
|
+
if (i->second > threshold) result.fp++;
|
146
|
+
else result.tn++;
|
147
|
+
}
|
148
|
+
|
149
|
+
}
|
150
|
+
|
151
|
+
vector<double> tpl; tpl.reserve(candidates.size()+1);
|
152
|
+
// vector<double> fpl = tpl;
|
153
|
+
size_t last_f = 0;
|
154
|
+
for (size_t i = 0; i < t.size(); ++i) {
|
155
|
+
if (f[i] > last_f) {
|
156
|
+
tpl.push_back(t[i]);
|
157
|
+
// fpl.push_back(f[i]);
|
158
|
+
last_f = f[i];
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
162
|
+
size_t last_t = *(t.rbegin());
|
163
|
+
double sum = 0.0;
|
164
|
+
// Divide each by the last item in that array
|
165
|
+
// Also keep track of the sum for calculating the final AUC value
|
166
|
+
for (size_t i = 0; i < tpl.size(); ++i) {
|
167
|
+
// tpl[i] /= (double)(last_t);
|
168
|
+
// fpl[i] /= (double)(last_f);
|
169
|
+
sum += tpl[i];
|
170
|
+
}
|
171
|
+
|
172
|
+
result.auc = (sum / (double)(last_t)) / (double)(tpl.size());
|
173
|
+
if (tpl.size() == 0) result.auc = 0; // prevent NaN return.
|
174
|
+
|
175
|
+
return result;
|
176
|
+
}
|
177
|
+
|
178
|
+
|
179
|
+
// Assume we're in the correct directory and read the correct phenotype file
|
180
|
+
// First two lines are comment.
|
181
|
+
// Assumes the files are pre-sorted by sortall.pl (by column 2 descending).
|
182
|
+
// column 1 is the gene, column 2 is the prediction score (higher is better).
|
183
|
+
gene_score_list read_candidates(uint j) const {
|
184
|
+
ostringstream fn; fn << j;
|
185
|
+
boost::filesystem::path filepath(fn.str());
|
186
|
+
//cerr << "Opening file: '" << filepath << "'" << endl;
|
187
|
+
|
188
|
+
if (!exists(filepath)) {
|
189
|
+
//cerr << "Error: File '" << filepath << "' does not exist." << endl;
|
190
|
+
throw;
|
191
|
+
}
|
192
|
+
|
193
|
+
gene_score_list res;
|
194
|
+
|
195
|
+
// Open a filestream
|
196
|
+
ifstream fin(filepath);
|
197
|
+
|
198
|
+
// Ignore two header lines
|
199
|
+
fin.ignore(500, '\n');
|
200
|
+
fin.ignore(500, '\n');
|
201
|
+
//cout << "Next character is: '" << fin.peek() << "'" << endl;
|
202
|
+
|
203
|
+
// Iterate through the gene-score pairs in the file
|
204
|
+
for (gene_score_iterator gsit(fin); gsit != gene_score_iterator(); ++gsit) {
|
205
|
+
//cerr << "Adding " << gsit->first << '\t' << gsit->second << endl;
|
206
|
+
res.push_back(*gsit);
|
207
|
+
}
|
208
|
+
|
209
|
+
fin.close();
|
210
|
+
|
211
|
+
return res;
|
212
|
+
}
|
213
|
+
|
214
|
+
|
215
|
+
protected:
|
216
|
+
|
217
|
+
uint matrix_id;
|
218
|
+
uint experiment_id;
|
219
|
+
uint current_j;
|
220
|
+
pqxx::connection c;
|
221
|
+
pqxx::transaction<>* action;
|
222
|
+
|
223
|
+
double mean_auc_;
|
224
|
+
|
225
|
+
Fetcher fetcher;
|
226
|
+
Updater updater;
|
227
|
+
};
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#include "auc_info.h"
|
2
|
+
|
3
|
+
#include <iostream>
|
4
|
+
#include <set>
|
5
|
+
#include <vector>
|
6
|
+
#include <string>
|
7
|
+
#include <list>
|
8
|
+
#include <map>
|
9
|
+
#include <sstream>
|
10
|
+
#include <pqxx/transactor.hxx>
|
11
|
+
#include <pqxx/result.hxx>
|
12
|
+
#include <boost/algorithm/string/join.hpp>
|
13
|
+
|
14
|
+
using std::cout;
|
15
|
+
using std::cerr;
|
16
|
+
using std::endl;
|
17
|
+
using std::set;
|
18
|
+
using std::vector;
|
19
|
+
using std::string;
|
20
|
+
using std::list;
|
21
|
+
using std::map;
|
22
|
+
using std::ostringstream;
|
23
|
+
using pqxx::transactor;
|
24
|
+
using pqxx::result;
|
25
|
+
using boost::algorithm::join;
|
26
|
+
|
27
|
+
typedef unsigned int uint;
|
28
|
+
|
29
|
+
|
30
|
+
//std::string join(const SequenceT<std::string>& strings, std::string join_str = "") {
|
31
|
+
// std::ostringstream o;
|
32
|
+
//
|
33
|
+
// SequenceT<string>::const_iterator i = strings.begin();
|
34
|
+
// o << *i;
|
35
|
+
// ++i;
|
36
|
+
//
|
37
|
+
// for (; i != strings.end(); ++i) {
|
38
|
+
// o << join_str << *i;
|
39
|
+
// }
|
40
|
+
//
|
41
|
+
// return o.str();
|
42
|
+
//}
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
class Updater : public transactor <> {
|
47
|
+
public:
|
48
|
+
Updater() : transactor<>("Updater") {}
|
49
|
+
|
50
|
+
uint experiment_id;
|
51
|
+
map<uint,auc_info> aucs;
|
52
|
+
string query;
|
53
|
+
|
54
|
+
void operator()(argument_type &T) {
|
55
|
+
result R;
|
56
|
+
|
57
|
+
if (aucs.size() == 0) {
|
58
|
+
cerr << "No updates necessary." << endl;
|
59
|
+
return;
|
60
|
+
}
|
61
|
+
|
62
|
+
query = make_known_correct_query().c_str();
|
63
|
+
|
64
|
+
try {
|
65
|
+
//R = T.exec(query);
|
66
|
+
cout << "Query:" << endl;
|
67
|
+
cout << query << endl;
|
68
|
+
} catch (pqxx::sql_error e) {
|
69
|
+
cerr << "SQL error in Fetcher transactor." << endl;
|
70
|
+
cerr << "Query: " << e.query() << endl;
|
71
|
+
cerr << "Error: " << e.what() << endl;
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
protected:
|
76
|
+
string make_known_correct_query() const {
|
77
|
+
ostringstream q;
|
78
|
+
q << "INSERT INTO rocs " << AUC_COLUMNS << " VALUES \n";
|
79
|
+
list<string> insertions;
|
80
|
+
for (map<uint,auc_info>::const_iterator i = aucs.begin(); i != aucs.end(); ++i) {
|
81
|
+
insertions.push_back( i->second.entry(experiment_id, i->first) );
|
82
|
+
}
|
83
|
+
q << join(insertions, ",\n") << ';';
|
84
|
+
return q.str();
|
85
|
+
}
|
86
|
+
};
|
data/lib/rocker.rb
ADDED
data/script/console
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# File: script/console
|
3
|
+
irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
|
4
|
+
|
5
|
+
libs = " -r irb/completion"
|
6
|
+
# Perhaps use a console_lib to store any extra methods I may want available in the cosole
|
7
|
+
# libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
|
8
|
+
libs << " -r #{File.dirname(__FILE__) + '/../lib/rocker.rb'}"
|
9
|
+
puts "Loading rocker gem"
|
10
|
+
exec "#{irb} #{libs} --simple-prompt"
|
data/script/destroy
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/destroy'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/generate'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Generate.new.run(ARGV)
|
data/test/test_helper.rb
ADDED
data/test/test_rocker.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
|
3
|
+
$:.unshift File.dirname(__FILE__) + "/../ext/rocker"
|
4
|
+
require "rocker.so"
|
5
|
+
|
6
|
+
def database_string dbn = "crossval_development", u = "jwoods", p = "youwish1"
|
7
|
+
"dbname=#{dbn} user=#{u} password=#{p}"
|
8
|
+
end
|
9
|
+
|
10
|
+
class TestRockerExtn < Test::Unit::TestCase
|
11
|
+
def test_working
|
12
|
+
t = Rocker.new(database_string, 1, 167)
|
13
|
+
x = t.mean_auc
|
14
|
+
puts "Mean AUC was #{x}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_version_string
|
18
|
+
puts Rocker::VERSION
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rocker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 4
|
9
|
+
version: 0.0.4
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- John Woods
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-04-23 00:00:00 -05:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rubyforge
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 2
|
29
|
+
- 0
|
30
|
+
- 4
|
31
|
+
version: 2.0.4
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: hoe
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 2
|
43
|
+
- 6
|
44
|
+
- 0
|
45
|
+
version: 2.6.0
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id002
|
48
|
+
description: |-
|
49
|
+
This gem is very fast C++ code for calculating AUCs on results of cross-validation.
|
50
|
+
|
51
|
+
It is specific to the crossval database schema, which has not been released yet.
|
52
|
+
|
53
|
+
Chances are you will not find this very useful unless you are the author.
|
54
|
+
|
55
|
+
It is in gem form to ensure that each lab machine can compile its own arch-specific
|
56
|
+
version.
|
57
|
+
email:
|
58
|
+
- john.woods@marcottelab.org
|
59
|
+
executables: []
|
60
|
+
|
61
|
+
extensions:
|
62
|
+
- ext/rocker/extconf.rb
|
63
|
+
extra_rdoc_files:
|
64
|
+
- History.txt
|
65
|
+
- Manifest.txt
|
66
|
+
- PostInstall.txt
|
67
|
+
files:
|
68
|
+
- History.txt
|
69
|
+
- Manifest.txt
|
70
|
+
- PostInstall.txt
|
71
|
+
- README.rdoc
|
72
|
+
- Rakefile
|
73
|
+
- lib/rocker.rb
|
74
|
+
- script/console
|
75
|
+
- script/destroy
|
76
|
+
- script/generate
|
77
|
+
- test/test_helper.rb
|
78
|
+
- test/test_rocker.rb
|
79
|
+
- ext/rocker/extconf.rb
|
80
|
+
- ext/rocker/auc_info.h
|
81
|
+
- ext/rocker/constants.h
|
82
|
+
- ext/rocker/fetcher.h
|
83
|
+
- ext/rocker/updater.h
|
84
|
+
- ext/rocker/line_input_iterator.h
|
85
|
+
- ext/rocker/rocker.h
|
86
|
+
- ext/rocker/rocker.cpp
|
87
|
+
has_rdoc: true
|
88
|
+
homepage: http://github.com/MarcotteLabGit/rocker
|
89
|
+
licenses: []
|
90
|
+
|
91
|
+
post_install_message: PostInstall.txt
|
92
|
+
rdoc_options:
|
93
|
+
- --main
|
94
|
+
- README.rdoc
|
95
|
+
require_paths:
|
96
|
+
- lib
|
97
|
+
- ext/rocker
|
98
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
segments:
|
103
|
+
- 0
|
104
|
+
version: "0"
|
105
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
segments:
|
110
|
+
- 0
|
111
|
+
version: "0"
|
112
|
+
requirements: []
|
113
|
+
|
114
|
+
rubyforge_project: rocker
|
115
|
+
rubygems_version: 1.3.6
|
116
|
+
signing_key:
|
117
|
+
specification_version: 3
|
118
|
+
summary: This gem is very fast C++ code for calculating AUCs on results of cross-validation
|
119
|
+
test_files:
|
120
|
+
- test/test_helper.rb
|
121
|
+
- test/test_rocker.rb
|
122
|
+
- test/test_rocker_extn.rb
|