rocker 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ === 0.0.1 2010-04-22
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
data/Manifest.txt ADDED
@@ -0,0 +1,19 @@
1
+ History.txt
2
+ Manifest.txt
3
+ PostInstall.txt
4
+ README.rdoc
5
+ Rakefile
6
+ lib/rocker.rb
7
+ script/console
8
+ script/destroy
9
+ script/generate
10
+ test/test_helper.rb
11
+ test/test_rocker.rb
12
+ ext/rocker/extconf.rb
13
+ ext/rocker/auc_info.h
14
+ ext/rocker/constants.h
15
+ ext/rocker/fetcher.h
16
+ ext/rocker/updater.h
17
+ ext/rocker/line_input_iterator.h
18
+ ext/rocker/rocker.h
19
+ ext/rocker/rocker.cpp
data/PostInstall.txt ADDED
@@ -0,0 +1,7 @@
1
+
2
+ For more information on rocker, see http://rocker.rubyforge.org
3
+
4
+ NOTE: Change this information in PostInstall.txt
5
+ You can also delete it if you don't want it.
6
+
7
+
data/README.rdoc ADDED
@@ -0,0 +1,57 @@
1
+ = rocker
2
+
3
+ * http://github.com/MarcotteLabGit/rocker
4
+
5
+ == DESCRIPTION:
6
+
7
+ This gem is very fast C++ code for calculating AUCs on results of cross-validation.
8
+
9
+ It is specific to the crossval database schema, which has not been released yet.
10
+
11
+ Chances are you will not find this very useful unless you are the author.
12
+
13
+ It is in gem form to ensure that each lab machine can compile its own arch-specific
14
+ version.
15
+
16
+ == FEATURES/PROBLEMS:
17
+
18
+ * There is no real reason this couldn't work for other schemas, but an adapter
19
+ would have to be generated. I have no motivation to do this, but if you are
20
+ interested, please feel free to get in touch.
21
+
22
+ == SYNOPSIS:
23
+
24
+ FIX (code sample of usage)
25
+
26
+ == REQUIREMENTS:
27
+
28
+ * crossval
29
+
30
+ == INSTALL:
31
+
32
+ * sudo gem install rocker, most likely, but I haven't tried it yet.
33
+
34
+ == LICENSE:
35
+
36
+ (The MIT License)
37
+
38
+ Copyright (c) 2010 FIXME full name
39
+
40
+ Permission is hereby granted, free of charge, to any person obtaining
41
+ a copy of this software and associated documentation files (the
42
+ 'Software'), to deal in the Software without restriction, including
43
+ without limitation the rights to use, copy, modify, merge, publish,
44
+ distribute, sublicense, and/or sell copies of the Software, and to
45
+ permit persons to whom the Software is furnished to do so, subject to
46
+ the following conditions:
47
+
48
+ The above copyright notice and this permission notice shall be
49
+ included in all copies or substantial portions of the Software.
50
+
51
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
52
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
53
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
54
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
55
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
56
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
57
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,25 @@
1
+ require 'rubygems'
2
+ gem 'hoe', '>= 2.1.0'
3
+ require 'hoe'
4
+ require 'fileutils'
5
+ require './lib/rocker'
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec 'rocker' do
14
+ self.developer 'John Woods', 'john.woods@marcottelab.org'
15
+ self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
+ # self.extra_deps = [['activesupport','>= 2.0.2']]
17
+
18
+ end
19
+
20
+ require 'newgem/tasks'
21
+ Dir['tasks/**/*.rake'].each { |t| load t }
22
+
23
+ # TODO - want other tests/tasks run by default? Add them to the list
24
+ # remove_task :default
25
+ # task :default => [:spec, :features]
@@ -0,0 +1,51 @@
1
+ #ifndef AUC_INFO_H
2
+ # define AUC_INFO_H
3
+
4
+ #include <string>
5
+ #include <sstream>
6
+ #include <iostream>
7
+
8
+ typedef unsigned int uint;
9
+
10
+
11
+ using std::ostringstream;
12
+ using std::string;
13
+ using std::ostream;
14
+
15
+ const string AUC_COLUMNS = "(experiment_id, column, auc, true_positives, false_positives, true_negatives, false_negatives)";
16
+
17
+ class auc_info {
18
+ public:
19
+ double auc;
20
+ uint tp;
21
+ uint fp;
22
+ uint tn;
23
+ uint fn;
24
+
25
+ // Constructor
26
+ auc_info(double area_under_curve = 0, uint true_positives = 0, uint false_positives = 0, uint true_negatives = 0, uint false_negatives = 0)
27
+ : auc(area_under_curve), tp(true_positives), fp(false_positives), tn(true_negatives), fn(false_negatives) { }
28
+
29
+ ~auc_info() { }
30
+
31
+ // Convert to a portion of a SQL insertion (string)
32
+ string to_s() const {
33
+ ostringstream s;
34
+ s << auc << ", " << tp << ", " << fp << ", " << tn << ", " << fn;
35
+ return s.str();
36
+ }
37
+
38
+ string entry(uint experiment_id, uint j) const {
39
+ ostringstream s;
40
+ s << '(' << experiment_id << ", " << j << ", " << to_s() << ')';
41
+ return s.str();
42
+ }
43
+ };
44
+
45
+ // Probably not necessary.
46
+ std::ostream& operator<<(std::ostream& out, const auc_info& rhs) {
47
+ out << rhs.to_s();
48
+ return out;
49
+ }
50
+
51
+ #endif
@@ -0,0 +1,39 @@
1
+ // DATABASE CONSTANTS AND INCLUDES
2
+ #include <string>
3
+ #include <sstream>
4
+ #include <pqxx/connection.hxx>
5
+ #include <pqxx/transaction.hxx>
6
+
7
+
8
+ // DATABASE CONSTANTS
9
+ const std::string DBNAME = "crossval_development";
10
+ const std::string USER = "jwoods";
11
+ const std::string PASSWORD = "youwish1";
12
+ const std::string READ_TRANSACTION = "ReadTransaction";
13
+ const std::string WRITE_TRANSACTION = "WriteTransaction";
14
+
15
+ // RESULTS CONSTANTS
16
+ const std::string ROCKER_VERSION = "0.0.1";
17
+
18
+ class database_string {
19
+ public:
20
+ database_string(std::string dbn = DBNAME, std::string u = USER, std::string p = PASSWORD) : dbname(dbn), user(u), password(p) { }
21
+ ~database_string() { }
22
+
23
+ std::string dbname;
24
+ std::string user;
25
+ std::string password;
26
+
27
+ std::string operator()() {
28
+ std::ostringstream arg;
29
+ arg << "dbname=" << dbname << " user=" << user << " password=" << password;
30
+ return arg.str();
31
+ }
32
+ };
33
+
34
+
35
+ // USED TO GENERATE DBARG CONSTANT:
36
+ std::string make_db_argument(const std::string& dbname, const std::string& user, const std::string& password) {
37
+ database_string dbstr(dbname, user, password);
38
+ return dbstr();
39
+ }
@@ -0,0 +1,15 @@
1
+ require 'rubygems'
2
+ require 'mkmf-rice'
3
+
4
+ dir_config("rocker")
5
+ dir_config("boost")
6
+
7
+ have_library("stdc++")
8
+ have_library("pqxx")
9
+ # have_library("boost")
10
+ have_library("boost_filesystem")
11
+ if RUBY_VERSION =~ /1.9/ then
12
+ $CPPFLAGS += " -DRUBY_19"
13
+ end
14
+
15
+ create_makefile('rocker')
@@ -0,0 +1,64 @@
1
+ #include <iostream>
2
+ #include <set>
3
+ #include <vector>
4
+ #include <string>
5
+ #include <sstream>
6
+ #include <pqxx/transactor.hxx>
7
+ #include <pqxx/result.hxx>
8
+
9
+ using std::cout;
10
+ using std::cerr;
11
+ using std::endl;
12
+ using std::set;
13
+ using std::vector;
14
+ using std::string;
15
+ using std::ostringstream;
16
+ using pqxx::transactor;
17
+ using pqxx::result;
18
+
19
+ typedef unsigned int uint;
20
+
21
+
22
+ class Fetcher : public transactor <> {
23
+ public:
24
+ Fetcher() : transactor<>("Fetcher") {}
25
+
26
+ uint matrix_id;
27
+ uint experiment_id;
28
+ vector< set<uint> > known_correct;
29
+ string query;
30
+
31
+ void operator()(argument_type &T) {
32
+ result R;
33
+ query = make_known_correct_query().c_str();
34
+
35
+ try {
36
+ R = T.exec(query);
37
+
38
+ vector< set<uint> > known(R.size());
39
+
40
+ // Get the row and add it to the results set
41
+ for (result::const_iterator it = R.begin(); it != R.end(); ++it) {
42
+ uint i; uint j;
43
+ (*it)[1].to(j); // Get column
44
+ (*it)[2].to(i); // Get gene
45
+ known[j].insert(i);
46
+ }
47
+
48
+ known_correct = known;
49
+
50
+ } catch (pqxx::sql_error e) {
51
+ cerr << "SQL error in Fetcher transactor." << endl;
52
+ cerr << "Query: " << e.query() << endl;
53
+ cerr << "Error: " << e.what() << endl;
54
+ }
55
+ }
56
+
57
+ protected:
58
+ string make_known_correct_query() const {
59
+ ostringstream q;
60
+ q << "SELECT id, j, i FROM entries WHERE matrix_id = " << matrix_id
61
+ << " AND type = 'Cell' ORDER BY j,i;";
62
+ return q.str();
63
+ }
64
+ };
@@ -0,0 +1,114 @@
1
+
2
+ #include <iterator>
3
+ #include <istream>
4
+ #include <iostream>
5
+ #include <sstream>
6
+ #include <string>
7
+ #include <cassert>
8
+ #include <utility>
9
+ // #include <boost/lexical_cast.hpp>
10
+
11
+ // using boost::lexical_cast;
12
+
13
+ template <class StringT = std::string>
14
+ class LineInputIterator :
15
+ public std::iterator<std::input_iterator_tag, StringT, std::ptrdiff_t, const StringT*, const StringT&>
16
+ {
17
+ public:
18
+ typedef typename StringT::value_type char_type;
19
+ typedef typename StringT::traits_type traits_type;
20
+ typedef std::basic_istream<char_type, traits_type> istream_type;
21
+
22
+ LineInputIterator() : is(NULL) { }
23
+ LineInputIterator(istream_type& is): is(&is) { }
24
+
25
+ const StringT& operator*() const { return value; }
26
+ const StringT* operator->() const { return &value; }
27
+
28
+ LineInputIterator<StringT>& operator++() {
29
+ assert(is != NULL);
30
+ if (is && !std::getline(*is, value)) {
31
+ is = NULL;
32
+ }
33
+ return *this;
34
+ }
35
+
36
+ LineInputIterator<StringT> operator++(int) {
37
+ LineInputIterator<StringT> prev(*this);
38
+ ++*this;
39
+ return prev;
40
+ }
41
+
42
+ bool operator!=(const LineInputIterator<StringT>& other) const {
43
+ return is != other.is;
44
+ }
45
+
46
+ bool operator==(const LineInputIterator<StringT>& other) const {
47
+ return !(*this != other);
48
+ }
49
+
50
+ protected:
51
+ istream_type* is;
52
+ StringT value;
53
+ };
54
+
55
+
56
+ template <typename GeneT = unsigned int, typename ScoreT = double>
57
+ class GeneScoreIterator : public LineInputIterator<std::string> {
58
+ public:
59
+ typedef typename std::pair<GeneT,ScoreT> pair_type;
60
+ typedef typename std::string::value_type char_type;
61
+ typedef typename std::string::traits_type traits_type;
62
+ typedef std::basic_istream<char_type, traits_type> istream_type;
63
+
64
+ GeneScoreIterator() : is(NULL) { }
65
+ GeneScoreIterator(istream_type& is): is(&is) {
66
+ ++*this; // Priming read.
67
+ }
68
+
69
+ GeneScoreIterator<GeneT,ScoreT>& operator++() {
70
+ assert(is != NULL);
71
+
72
+ if (is) {
73
+ if (std::getline(*is, value)) {
74
+
75
+ // Cast the contents of the string
76
+ std::istringstream in(value, std::istringstream::in);
77
+ in >> value_gene_score.first;
78
+ in >> value_gene_score.second;
79
+ } else {
80
+ is = NULL;
81
+ }
82
+ }
83
+ return *this;
84
+ }
85
+
86
+ GeneScoreIterator<GeneT,ScoreT> operator++(int) {
87
+ GeneScoreIterator<std::string> prev(*this);
88
+ ++*this;
89
+ return prev;
90
+ }
91
+
92
+ // De-reference
93
+ const pair_type operator*() const {
94
+ return value_gene_score;
95
+ }
96
+ const pair_type* operator->() const {
97
+ return &value_gene_score;
98
+ }
99
+
100
+ bool operator!=(const GeneScoreIterator<GeneT,ScoreT>& other) const {
101
+ return is != other.is;
102
+ }
103
+
104
+ bool operator==(const GeneScoreIterator<GeneT,ScoreT>& other) const {
105
+ return !(*this != other);
106
+ }
107
+
108
+ const std::string test_value() const { return value; }
109
+
110
+ protected:
111
+ istream_type* is;
112
+ std::string value;
113
+ pair_type value_gene_score;
114
+ };
@@ -0,0 +1,113 @@
1
+ /* Rocker gem C++ extension
2
+ * Part of crossval, in the Phenolog project
3
+ * (C) John O. Woods, The Marcotte Lab, 2010
4
+ *
5
+ * Requires boost, boost_filesystem, rice (Ruby in C++ Extension), and of course
6
+ * Ruby (1.8). Use at your own risk if you're not a lab member!
7
+ *
8
+ * To compile, run irb or script/console and do:
9
+ * require 'extconf'
10
+ *
11
+ * Then leave the console, and type:
12
+ * make
13
+ *
14
+ * You will then load the module from within the ruby shell using:
15
+ * require 'rocker'
16
+ *
17
+ * Instantiate using:
18
+ * Rocker.new("dbname=crossval_production username=youruser password=yourpass", 1, 167)
19
+ *
20
+ * (That connects to the specified PostgreSQL database and autoloads matrix 1
21
+ * data. Updates will be made to experiment 167. This is just an example.)
22
+ */
23
+
24
+ // g++ -I/usr/include -I/usr/local/include -L/usr/lib -L/usr/local/lib -lpqxx -lboost_filesystem rocker.cpp -o rocker
25
+ #include <rice/Data_Type.hpp>
26
+ #include <rice/Constructor.hpp>
27
+
28
+ #include <iostream>
29
+ #include <fstream>
30
+ #include <cstdlib>
31
+ #include <string>
32
+ using std::cout;
33
+ using std::endl;
34
+ using namespace Rice;
35
+
36
+ #include "rocker.h"
37
+
38
+ //#include "line_input_iterator.h"
39
+
40
+ typedef LineInputIterator<std::string> line_input_iterator;
41
+
42
+
43
+ extern "C"
44
+ void Init_rocker() {
45
+
46
+ // Expose Rocker class to Ruby
47
+ //database_string dbarg(DBNAME, USER, PASSWORD);
48
+ Data_Type<Rocker> rb_cRocker =
49
+ define_class<Rocker>("Rocker")
50
+ .define_constructor(Constructor<Rocker,std::string,uint,uint>())
51
+ .define_method("process_results", &Rocker::process_results)
52
+ .define_method("fetch", &Rocker::fetch, (Arg("j")))
53
+ .define_method("calculate_statistic",
54
+ &Rocker::calculate_statistic,
55
+ (Arg("j"), Arg("threshold") = (double)(0.0)))
56
+ .define_method("read_candidates", &Rocker::read_candidates, (Arg("j")))
57
+ .define_method("mean_auc", &Rocker::mean_auc);
58
+
59
+ }
60
+
61
+ extern "C"
62
+ void Init_DatabaseString() {
63
+ Data_Type<database_string> rb_cDatabaseString =
64
+ define_class<database_string>("DatabaseString")
65
+ .define_constructor(Constructor<database_string,std::string,std::string,std::string>())
66
+ .define_method("to_s", &database_string::operator());
67
+ }
68
+
69
+ // Uncomment to test -- probably unnecessary now that the Ruby extensions are
70
+ // built.
71
+ /*
72
+ int main(int argc, char* argv[]) {
73
+ database_string dbarg(DBNAME, USER, PASSWORD);
74
+ Rocker rocker(dbarg(), 1, 167);
75
+
76
+ uint j = 0, m = 0, x = 0;
77
+ cout << "Args: " << argc << endl;
78
+ if (argc < 4) {
79
+ cerr << "Please provide matrix id, experiment id, and column as arguments." << endl;
80
+ return EXIT_FAILURE;
81
+ } else {
82
+ // Read command line arguments.
83
+ m = atoi(argv[1]);
84
+ x = atoi(argv[2]);
85
+ j = atoi(argv[3]);
86
+ }
87
+
88
+ set<uint> gene_ids = rocker.fetch(42);
89
+ for (set<uint>::const_iterator i = gene_ids.begin(); i != gene_ids.end(); ++i)
90
+ cout << *i << endl;
91
+
92
+ cout << "---" << endl;
93
+ gene_score_list candidates = rocker.read_candidates(42);
94
+ for (gene_score_list::iterator i = candidates.begin(); i != candidates.end(); ++i)
95
+ cout << "hi:\t" << i->first << '\t' << i->second << endl;
96
+
97
+ cout << "---" << endl;
98
+ auc_info test = rocker.calculate_statistic(42);
99
+ cout << "auc\t=" << test.auc << endl;
100
+ cout << " tp\t=" << test.tp << endl;
101
+ cout << " fp\t=" << test.fp << endl;
102
+ cout << " tn\t=" << test.tn << endl;
103
+ cout << " fn\t=" << test.fn << endl;
104
+
105
+ cerr << "m=" << m << endl;
106
+ cerr << "x=" << x << endl;
107
+ cerr << "j=" << j << endl;
108
+
109
+ rocker.process_results();
110
+
111
+ return EXIT_SUCCESS;
112
+ }
113
+ */
@@ -0,0 +1,227 @@
1
+ #include "constants.h"
2
+ #include "line_input_iterator.h"
3
+ #include "fetcher.h"
4
+ #include "updater.h"
5
+ #include "auc_info.h"
6
+
7
+ #include <string>
8
+ #include <utility>
9
+ #include <sstream>
10
+ #include <list>
11
+ #include <boost/filesystem.hpp>
12
+ #include <boost/filesystem/fstream.hpp>
13
+ #include <boost/lexical_cast.hpp>
14
+
15
+ using boost::filesystem::exists;
16
+ using boost::filesystem::ifstream;
17
+ using std::string;
18
+ using std::list;
19
+ using std::ostringstream;
20
+
21
+ // typedef unordered_map<uint, double> gene_score_map;
22
+ typedef list<std::pair<uint,double> > gene_score_list;
23
+ typedef GeneScoreIterator<unsigned int,double> gene_score_iterator;
24
+
25
+
26
+ string path_to_s(const boost::filesystem::path& p) {
27
+ return p.string().substr(2);
28
+ }
29
+
30
+ bool path_to_uint(const boost::filesystem::path& p, uint& n) {
31
+ using boost::lexical_cast;
32
+ using boost::bad_lexical_cast;
33
+
34
+ string s = path_to_s(p);
35
+ try {
36
+ n = lexical_cast<uint>(s);
37
+ } catch(bad_lexical_cast &) {
38
+ cerr << "Unable to read phenotype file '" << s << "': not numeric" << endl;
39
+ return false;
40
+ }
41
+ return true;
42
+ }
43
+
44
+
45
+ class Rocker {
46
+ public:
47
+
48
+ // Connect to the database and create the read transaction
49
+ Rocker(string dbarg, uint m_id, uint e_id) : c(dbarg), mean_auc_(0.0) {
50
+ // Make sure the fetcher knows which matrix to restrict queries to.
51
+ fetcher.matrix_id = m_id;
52
+
53
+ // Set up a transaction
54
+ action = new pqxx::transaction<>(c, READ_TRANSACTION);
55
+
56
+ fetcher(*action); // Perform the fetch.
57
+
58
+ delete action;
59
+
60
+ updater.experiment_id = e_id;
61
+ updater.aucs = process_results();
62
+
63
+ action = new pqxx::transaction<>(c, WRITE_TRANSACTION);
64
+
65
+ updater(*action);
66
+
67
+ delete action;
68
+ }
69
+
70
+
71
+ //
72
+ ~Rocker() { }
73
+
74
+
75
+ // Return the mean AUC calculated -- requires that process_results was called,
76
+ // which happens in the constructor, so it's okay.
77
+ double mean_auc() { return mean_auc_; }
78
+
79
+ // Go through the results directory
80
+ map<uint,auc_info> process_results() {
81
+ using namespace boost::filesystem;
82
+ map<uint, auc_info> rocs;
83
+
84
+ double temp_auc_accum = 0.0; // Keep track of AUCs so we can get a mean
85
+ size_t divide_by = 1;
86
+
87
+ // Look at all files in the directory
88
+ for (basic_directory_iterator<path> jit(path(".")); jit != directory_iterator(); ++jit) {
89
+ uint j = 0;
90
+ if (path_to_uint(jit->path(), j)) {
91
+ // Read the file and calculate AUCs.
92
+ rocs[j] = calculate_statistic(j);
93
+ temp_auc_accum += rocs[j].auc;
94
+ ++divide_by;
95
+
96
+ cout << "AUC: " << rocs[j].auc << endl;
97
+ }
98
+ }
99
+
100
+ // Calculate the mean AUC
101
+ if (divide_by > 0)
102
+ mean_auc_ = temp_auc_accum / (double)(divide_by);
103
+ else
104
+ mean_auc_ = 0;
105
+
106
+ return rocs;
107
+ }
108
+
109
+
110
+ // Get genes with a specific phenotype association (phenotype id = j).
111
+ set<uint> fetch(uint j) const {
112
+ return fetcher.known_correct[j];
113
+ }
114
+
115
+
116
+ // For some phenotype j, determine AUC, fp, tp, fn, tn, etc.
117
+ auc_info calculate_statistic(uint j, double threshold = 0.0) const {
118
+ set<uint> known_correct = fetch(j);
119
+ gene_score_list candidates = read_candidates(j);
120
+ //cerr << "Size of known_correct: " << known_correct.size() << endl;
121
+
122
+ // Attempted transcription of code from Ruby into C++, after having taken
123
+ // it from Python the first time.
124
+ // No guarantees!
125
+ vector<size_t> t;
126
+ t.reserve(candidates.size()+1); t.push_back(0);
127
+ vector<size_t> f = t;
128
+
129
+ auc_info result;
130
+
131
+ for (gene_score_list::const_iterator i = candidates.begin(); i != candidates.end(); ++i) {
132
+ if (known_correct.find(i->first) != known_correct.end()) {
133
+ t.push_back( *(t.rbegin()) + 1 );
134
+ f.push_back( *(f.rbegin()) );
135
+
136
+ // Update true positives / false negatives
137
+ if (i->second > threshold) result.tp++;
138
+ else result.fn++;
139
+
140
+ } else {
141
+ t.push_back( *(t.rbegin()) );
142
+ f.push_back( *(f.rbegin()) + 1 );
143
+
144
+ // Update false positives / true negatives
145
+ if (i->second > threshold) result.fp++;
146
+ else result.tn++;
147
+ }
148
+
149
+ }
150
+
151
+ vector<double> tpl; tpl.reserve(candidates.size()+1);
152
+ // vector<double> fpl = tpl;
153
+ size_t last_f = 0;
154
+ for (size_t i = 0; i < t.size(); ++i) {
155
+ if (f[i] > last_f) {
156
+ tpl.push_back(t[i]);
157
+ // fpl.push_back(f[i]);
158
+ last_f = f[i];
159
+ }
160
+ }
161
+
162
+ size_t last_t = *(t.rbegin());
163
+ double sum = 0.0;
164
+ // Divide each by the last item in that array
165
+ // Also keep track of the sum for calculating the final AUC value
166
+ for (size_t i = 0; i < tpl.size(); ++i) {
167
+ // tpl[i] /= (double)(last_t);
168
+ // fpl[i] /= (double)(last_f);
169
+ sum += tpl[i];
170
+ }
171
+
172
+ result.auc = (sum / (double)(last_t)) / (double)(tpl.size());
173
+ if (tpl.size() == 0) result.auc = 0; // prevent NaN return.
174
+
175
+ return result;
176
+ }
177
+
178
+
179
+ // Assume we're in the correct directory and read the correct phenotype file
180
+ // First two lines are comment.
181
+ // Assumes the files are pre-sorted by sortall.pl (by column 2 descending).
182
+ // column 1 is the gene, column 2 is the prediction score (higher is better).
183
+ gene_score_list read_candidates(uint j) const {
184
+ ostringstream fn; fn << j;
185
+ boost::filesystem::path filepath(fn.str());
186
+ //cerr << "Opening file: '" << filepath << "'" << endl;
187
+
188
+ if (!exists(filepath)) {
189
+ //cerr << "Error: File '" << filepath << "' does not exist." << endl;
190
+ throw;
191
+ }
192
+
193
+ gene_score_list res;
194
+
195
+ // Open a filestream
196
+ ifstream fin(filepath);
197
+
198
+ // Ignore two header lines
199
+ fin.ignore(500, '\n');
200
+ fin.ignore(500, '\n');
201
+ //cout << "Next character is: '" << fin.peek() << "'" << endl;
202
+
203
+ // Iterate through the gene-score pairs in the file
204
+ for (gene_score_iterator gsit(fin); gsit != gene_score_iterator(); ++gsit) {
205
+ //cerr << "Adding " << gsit->first << '\t' << gsit->second << endl;
206
+ res.push_back(*gsit);
207
+ }
208
+
209
+ fin.close();
210
+
211
+ return res;
212
+ }
213
+
214
+
215
+ protected:
216
+
217
+ uint matrix_id;
218
+ uint experiment_id;
219
+ uint current_j;
220
+ pqxx::connection c;
221
+ pqxx::transaction<>* action;
222
+
223
+ double mean_auc_;
224
+
225
+ Fetcher fetcher;
226
+ Updater updater;
227
+ };
@@ -0,0 +1,86 @@
1
+ #include "auc_info.h"
2
+
3
+ #include <iostream>
4
+ #include <set>
5
+ #include <vector>
6
+ #include <string>
7
+ #include <list>
8
+ #include <map>
9
+ #include <sstream>
10
+ #include <pqxx/transactor.hxx>
11
+ #include <pqxx/result.hxx>
12
+ #include <boost/algorithm/string/join.hpp>
13
+
14
+ using std::cout;
15
+ using std::cerr;
16
+ using std::endl;
17
+ using std::set;
18
+ using std::vector;
19
+ using std::string;
20
+ using std::list;
21
+ using std::map;
22
+ using std::ostringstream;
23
+ using pqxx::transactor;
24
+ using pqxx::result;
25
+ using boost::algorithm::join;
26
+
27
+ typedef unsigned int uint;
28
+
29
+
30
+ //std::string join(const SequenceT<std::string>& strings, std::string join_str = "") {
31
+ // std::ostringstream o;
32
+ //
33
+ // SequenceT<string>::const_iterator i = strings.begin();
34
+ // o << *i;
35
+ // ++i;
36
+ //
37
+ // for (; i != strings.end(); ++i) {
38
+ // o << join_str << *i;
39
+ // }
40
+ //
41
+ // return o.str();
42
+ //}
43
+
44
+
45
+
46
+ class Updater : public transactor <> {
47
+ public:
48
+ Updater() : transactor<>("Updater") {}
49
+
50
+ uint experiment_id;
51
+ map<uint,auc_info> aucs;
52
+ string query;
53
+
54
+ void operator()(argument_type &T) {
55
+ result R;
56
+
57
+ if (aucs.size() == 0) {
58
+ cerr << "No updates necessary." << endl;
59
+ return;
60
+ }
61
+
62
+ query = make_known_correct_query().c_str();
63
+
64
+ try {
65
+ //R = T.exec(query);
66
+ cout << "Query:" << endl;
67
+ cout << query << endl;
68
+ } catch (pqxx::sql_error e) {
69
+ cerr << "SQL error in Fetcher transactor." << endl;
70
+ cerr << "Query: " << e.query() << endl;
71
+ cerr << "Error: " << e.what() << endl;
72
+ }
73
+ }
74
+
75
+ protected:
76
+ string make_known_correct_query() const {
77
+ ostringstream q;
78
+ q << "INSERT INTO rocs " << AUC_COLUMNS << " VALUES \n";
79
+ list<string> insertions;
80
+ for (map<uint,auc_info>::const_iterator i = aucs.begin(); i != aucs.end(); ++i) {
81
+ insertions.push_back( i->second.entry(experiment_id, i->first) );
82
+ }
83
+ q << join(insertions, ",\n") << ';';
84
+ return q.str();
85
+ }
86
+ };
data/lib/rocker.rb ADDED
@@ -0,0 +1,8 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ require 'ext/rocker/rocker.so'
5
+
6
+ class Rocker
7
+ VERSION = '0.0.4'
8
+ end
data/script/console ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ libs << " -r #{File.dirname(__FILE__) + '/../lib/rocker.rb'}"
9
+ puts "Loading rocker gem"
10
+ exec "#{irb} #{libs} --simple-prompt"
data/script/destroy ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
data/script/generate ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,3 @@
1
+ require 'stringio'
2
+ require 'test/unit'
3
+ require File.dirname(__FILE__) + '/../lib/rocker'
@@ -0,0 +1,11 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ class TestRocker < Test::Unit::TestCase
4
+
5
+ def setup
6
+ end
7
+
8
+ def test_truth
9
+ assert true
10
+ end
11
+ end
@@ -0,0 +1,21 @@
1
+ require "test/unit"
2
+
3
+ $:.unshift File.dirname(__FILE__) + "/../ext/rocker"
4
+ require "rocker.so"
5
+
6
+ def database_string dbn = "crossval_development", u = "jwoods", p = "youwish1"
7
+ "dbname=#{dbn} user=#{u} password=#{p}"
8
+ end
9
+
10
+ class TestRockerExtn < Test::Unit::TestCase
11
+ def test_working
12
+ t = Rocker.new(database_string, 1, 167)
13
+ x = t.mean_auc
14
+ puts "Mean AUC was #{x}"
15
+ end
16
+
17
+ def test_version_string
18
+ puts Rocker::VERSION
19
+ end
20
+
21
+ end
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rocker
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 4
9
+ version: 0.0.4
10
+ platform: ruby
11
+ authors:
12
+ - John Woods
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-04-23 00:00:00 -05:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rubyforge
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 2
29
+ - 0
30
+ - 4
31
+ version: 2.0.4
32
+ type: :development
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: hoe
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 2
43
+ - 6
44
+ - 0
45
+ version: 2.6.0
46
+ type: :development
47
+ version_requirements: *id002
48
+ description: |-
49
+ This gem is very fast C++ code for calculating AUCs on results of cross-validation.
50
+
51
+ It is specific to the crossval database schema, which has not been released yet.
52
+
53
+ Chances are you will not find this very useful unless you are the author.
54
+
55
+ It is in gem form to ensure that each lab machine can compile its own arch-specific
56
+ version.
57
+ email:
58
+ - john.woods@marcottelab.org
59
+ executables: []
60
+
61
+ extensions:
62
+ - ext/rocker/extconf.rb
63
+ extra_rdoc_files:
64
+ - History.txt
65
+ - Manifest.txt
66
+ - PostInstall.txt
67
+ files:
68
+ - History.txt
69
+ - Manifest.txt
70
+ - PostInstall.txt
71
+ - README.rdoc
72
+ - Rakefile
73
+ - lib/rocker.rb
74
+ - script/console
75
+ - script/destroy
76
+ - script/generate
77
+ - test/test_helper.rb
78
+ - test/test_rocker.rb
79
+ - ext/rocker/extconf.rb
80
+ - ext/rocker/auc_info.h
81
+ - ext/rocker/constants.h
82
+ - ext/rocker/fetcher.h
83
+ - ext/rocker/updater.h
84
+ - ext/rocker/line_input_iterator.h
85
+ - ext/rocker/rocker.h
86
+ - ext/rocker/rocker.cpp
87
+ has_rdoc: true
88
+ homepage: http://github.com/MarcotteLabGit/rocker
89
+ licenses: []
90
+
91
+ post_install_message: PostInstall.txt
92
+ rdoc_options:
93
+ - --main
94
+ - README.rdoc
95
+ require_paths:
96
+ - lib
97
+ - ext/rocker
98
+ required_ruby_version: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ segments:
103
+ - 0
104
+ version: "0"
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ segments:
110
+ - 0
111
+ version: "0"
112
+ requirements: []
113
+
114
+ rubyforge_project: rocker
115
+ rubygems_version: 1.3.6
116
+ signing_key:
117
+ specification_version: 3
118
+ summary: This gem is very fast C++ code for calculating AUCs on results of cross-validation
119
+ test_files:
120
+ - test/test_helper.rb
121
+ - test/test_rocker.rb
122
+ - test/test_rocker_extn.rb