rocker 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ === 0.0.1 2010-04-22
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
data/Manifest.txt ADDED
@@ -0,0 +1,19 @@
1
+ History.txt
2
+ Manifest.txt
3
+ PostInstall.txt
4
+ README.rdoc
5
+ Rakefile
6
+ lib/rocker.rb
7
+ script/console
8
+ script/destroy
9
+ script/generate
10
+ test/test_helper.rb
11
+ test/test_rocker.rb
12
+ ext/rocker/extconf.rb
13
+ ext/rocker/auc_info.h
14
+ ext/rocker/constants.h
15
+ ext/rocker/fetcher.h
16
+ ext/rocker/updater.h
17
+ ext/rocker/line_input_iterator.h
18
+ ext/rocker/rocker.h
19
+ ext/rocker/rocker.cpp
data/PostInstall.txt ADDED
@@ -0,0 +1,7 @@
1
+
2
+ For more information on rocker, see http://rocker.rubyforge.org
3
+
4
+ NOTE: Change this information in PostInstall.txt
5
+ You can also delete it if you don't want it.
6
+
7
+
data/README.rdoc ADDED
@@ -0,0 +1,57 @@
1
+ = rocker
2
+
3
+ * http://github.com/MarcotteLabGit/rocker
4
+
5
+ == DESCRIPTION:
6
+
7
+ This gem is very fast C++ code for calculating AUCs on results of cross-validation.
8
+
9
+ It is specific to the crossval database schema, which has not been released yet.
10
+
11
+ Chances are you will not find this very useful unless you are the author.
12
+
13
+ It is in gem form to ensure that each lab machine can compile its own arch-specific
14
+ version.
15
+
16
+ == FEATURES/PROBLEMS:
17
+
18
+ * There is no real reason this couldn't work for other schemas, but an adapter
19
+ would have to be generated. I have no motivation to do this, but if you are
20
+ interested, please feel free to get in touch.
21
+
22
+ == SYNOPSIS:
23
+
24
+ FIX (code sample of usage)
25
+
26
+ == REQUIREMENTS:
27
+
28
+ * crossval
29
+
30
+ == INSTALL:
31
+
32
+ * sudo gem install rocker, most likely, but I haven't tried it yet.
33
+
34
+ == LICENSE:
35
+
36
+ (The MIT License)
37
+
38
+ Copyright (c) 2010 FIXME full name
39
+
40
+ Permission is hereby granted, free of charge, to any person obtaining
41
+ a copy of this software and associated documentation files (the
42
+ 'Software'), to deal in the Software without restriction, including
43
+ without limitation the rights to use, copy, modify, merge, publish,
44
+ distribute, sublicense, and/or sell copies of the Software, and to
45
+ permit persons to whom the Software is furnished to do so, subject to
46
+ the following conditions:
47
+
48
+ The above copyright notice and this permission notice shall be
49
+ included in all copies or substantial portions of the Software.
50
+
51
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
52
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
53
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
54
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
55
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
56
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
57
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,25 @@
1
+ require 'rubygems'
2
+ gem 'hoe', '>= 2.1.0'
3
+ require 'hoe'
4
+ require 'fileutils'
5
+ require './lib/rocker'
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ $hoe = Hoe.spec 'rocker' do
14
+ self.developer 'John Woods', 'john.woods@marcottelab.org'
15
+ self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
+ # self.extra_deps = [['activesupport','>= 2.0.2']]
17
+
18
+ end
19
+
20
+ require 'newgem/tasks'
21
+ Dir['tasks/**/*.rake'].each { |t| load t }
22
+
23
+ # TODO - want other tests/tasks run by default? Add them to the list
24
+ # remove_task :default
25
+ # task :default => [:spec, :features]
@@ -0,0 +1,51 @@
1
+ #ifndef AUC_INFO_H
2
+ # define AUC_INFO_H
3
+
4
+ #include <string>
5
+ #include <sstream>
6
+ #include <iostream>
7
+
8
+ typedef unsigned int uint;
9
+
10
+
11
+ using std::ostringstream;
12
+ using std::string;
13
+ using std::ostream;
14
+
15
+ const string AUC_COLUMNS = "(experiment_id, column, auc, true_positives, false_positives, true_negatives, false_negatives)";
16
+
17
+ class auc_info {
18
+ public:
19
+ double auc;
20
+ uint tp;
21
+ uint fp;
22
+ uint tn;
23
+ uint fn;
24
+
25
+ // Constructor
26
+ auc_info(double area_under_curve = 0, uint true_positives = 0, uint false_positives = 0, uint true_negatives = 0, uint false_negatives = 0)
27
+ : auc(area_under_curve), tp(true_positives), fp(false_positives), tn(true_negatives), fn(false_negatives) { }
28
+
29
+ ~auc_info() { }
30
+
31
+ // Convert to a portion of a SQL insertion (string)
32
+ string to_s() const {
33
+ ostringstream s;
34
+ s << auc << ", " << tp << ", " << fp << ", " << tn << ", " << fn;
35
+ return s.str();
36
+ }
37
+
38
+ string entry(uint experiment_id, uint j) const {
39
+ ostringstream s;
40
+ s << '(' << experiment_id << ", " << j << ", " << to_s() << ')';
41
+ return s.str();
42
+ }
43
+ };
44
+
45
+ // Probably not necessary.
46
+ std::ostream& operator<<(std::ostream& out, const auc_info& rhs) {
47
+ out << rhs.to_s();
48
+ return out;
49
+ }
50
+
51
+ #endif
@@ -0,0 +1,39 @@
1
+ // DATABASE CONSTANTS AND INCLUDES
2
+ #include <string>
3
+ #include <sstream>
4
+ #include <pqxx/connection.hxx>
5
+ #include <pqxx/transaction.hxx>
6
+
7
+
8
+ // DATABASE CONSTANTS
9
+ const std::string DBNAME = "crossval_development";
10
+ const std::string USER = "jwoods";
11
+ const std::string PASSWORD = "youwish1";
12
+ const std::string READ_TRANSACTION = "ReadTransaction";
13
+ const std::string WRITE_TRANSACTION = "WriteTransaction";
14
+
15
+ // RESULTS CONSTANTS
16
+ const std::string ROCKER_VERSION = "0.0.1";
17
+
18
+ class database_string {
19
+ public:
20
+ database_string(std::string dbn = DBNAME, std::string u = USER, std::string p = PASSWORD) : dbname(dbn), user(u), password(p) { }
21
+ ~database_string() { }
22
+
23
+ std::string dbname;
24
+ std::string user;
25
+ std::string password;
26
+
27
+ std::string operator()() {
28
+ std::ostringstream arg;
29
+ arg << "dbname=" << dbname << " user=" << user << " password=" << password;
30
+ return arg.str();
31
+ }
32
+ };
33
+
34
+
35
+ // USED TO GENERATE DBARG CONSTANT:
36
+ std::string make_db_argument(const std::string& dbname, const std::string& user, const std::string& password) {
37
+ database_string dbstr(dbname, user, password);
38
+ return dbstr();
39
+ }
@@ -0,0 +1,15 @@
1
+ require 'rubygems'
2
+ require 'mkmf-rice'
3
+
4
+ dir_config("rocker")
5
+ dir_config("boost")
6
+
7
+ have_library("stdc++")
8
+ have_library("pqxx")
9
+ # have_library("boost")
10
+ have_library("boost_filesystem")
11
+ if RUBY_VERSION =~ /1.9/ then
12
+ $CPPFLAGS += " -DRUBY_19"
13
+ end
14
+
15
+ create_makefile('rocker')
@@ -0,0 +1,64 @@
1
+ #include <iostream>
2
+ #include <set>
3
+ #include <vector>
4
+ #include <string>
5
+ #include <sstream>
6
+ #include <pqxx/transactor.hxx>
7
+ #include <pqxx/result.hxx>
8
+
9
+ using std::cout;
10
+ using std::cerr;
11
+ using std::endl;
12
+ using std::set;
13
+ using std::vector;
14
+ using std::string;
15
+ using std::ostringstream;
16
+ using pqxx::transactor;
17
+ using pqxx::result;
18
+
19
+ typedef unsigned int uint;
20
+
21
+
22
+ class Fetcher : public transactor <> {
23
+ public:
24
+ Fetcher() : transactor<>("Fetcher") {}
25
+
26
+ uint matrix_id;
27
+ uint experiment_id;
28
+ vector< set<uint> > known_correct;
29
+ string query;
30
+
31
+ void operator()(argument_type &T) {
32
+ result R;
33
+ query = make_known_correct_query().c_str();
34
+
35
+ try {
36
+ R = T.exec(query);
37
+
38
+ vector< set<uint> > known(R.size());
39
+
40
+ // Get the row and add it to the results set
41
+ for (result::const_iterator it = R.begin(); it != R.end(); ++it) {
42
+ uint i; uint j;
43
+ (*it)[1].to(j); // Get column
44
+ (*it)[2].to(i); // Get gene
45
+ known[j].insert(i);
46
+ }
47
+
48
+ known_correct = known;
49
+
50
+ } catch (pqxx::sql_error e) {
51
+ cerr << "SQL error in Fetcher transactor." << endl;
52
+ cerr << "Query: " << e.query() << endl;
53
+ cerr << "Error: " << e.what() << endl;
54
+ }
55
+ }
56
+
57
+ protected:
58
+ string make_known_correct_query() const {
59
+ ostringstream q;
60
+ q << "SELECT id, j, i FROM entries WHERE matrix_id = " << matrix_id
61
+ << " AND type = 'Cell' ORDER BY j,i;";
62
+ return q.str();
63
+ }
64
+ };
@@ -0,0 +1,114 @@
1
+
2
+ #include <iterator>
3
+ #include <istream>
4
+ #include <iostream>
5
+ #include <sstream>
6
+ #include <string>
7
+ #include <cassert>
8
+ #include <utility>
9
+ // #include <boost/lexical_cast.hpp>
10
+
11
+ // using boost::lexical_cast;
12
+
13
+ template <class StringT = std::string>
14
+ class LineInputIterator :
15
+ public std::iterator<std::input_iterator_tag, StringT, std::ptrdiff_t, const StringT*, const StringT&>
16
+ {
17
+ public:
18
+ typedef typename StringT::value_type char_type;
19
+ typedef typename StringT::traits_type traits_type;
20
+ typedef std::basic_istream<char_type, traits_type> istream_type;
21
+
22
+ LineInputIterator() : is(NULL) { }
23
+ LineInputIterator(istream_type& is): is(&is) { }
24
+
25
+ const StringT& operator*() const { return value; }
26
+ const StringT* operator->() const { return &value; }
27
+
28
+ LineInputIterator<StringT>& operator++() {
29
+ assert(is != NULL);
30
+ if (is && !std::getline(*is, value)) {
31
+ is = NULL;
32
+ }
33
+ return *this;
34
+ }
35
+
36
+ LineInputIterator<StringT> operator++(int) {
37
+ LineInputIterator<StringT> prev(*this);
38
+ ++*this;
39
+ return prev;
40
+ }
41
+
42
+ bool operator!=(const LineInputIterator<StringT>& other) const {
43
+ return is != other.is;
44
+ }
45
+
46
+ bool operator==(const LineInputIterator<StringT>& other) const {
47
+ return !(*this != other);
48
+ }
49
+
50
+ protected:
51
+ istream_type* is;
52
+ StringT value;
53
+ };
54
+
55
+
56
+ template <typename GeneT = unsigned int, typename ScoreT = double>
57
+ class GeneScoreIterator : public LineInputIterator<std::string> {
58
+ public:
59
+ typedef typename std::pair<GeneT,ScoreT> pair_type;
60
+ typedef typename std::string::value_type char_type;
61
+ typedef typename std::string::traits_type traits_type;
62
+ typedef std::basic_istream<char_type, traits_type> istream_type;
63
+
64
+ GeneScoreIterator() : is(NULL) { }
65
+ GeneScoreIterator(istream_type& is): is(&is) {
66
+ ++*this; // Priming read.
67
+ }
68
+
69
+ GeneScoreIterator<GeneT,ScoreT>& operator++() {
70
+ assert(is != NULL);
71
+
72
+ if (is) {
73
+ if (std::getline(*is, value)) {
74
+
75
+ // Cast the contents of the string
76
+ std::istringstream in(value, std::istringstream::in);
77
+ in >> value_gene_score.first;
78
+ in >> value_gene_score.second;
79
+ } else {
80
+ is = NULL;
81
+ }
82
+ }
83
+ return *this;
84
+ }
85
+
86
+ GeneScoreIterator<GeneT,ScoreT> operator++(int) {
87
+ GeneScoreIterator<std::string> prev(*this);
88
+ ++*this;
89
+ return prev;
90
+ }
91
+
92
+ // De-reference
93
+ const pair_type operator*() const {
94
+ return value_gene_score;
95
+ }
96
+ const pair_type* operator->() const {
97
+ return &value_gene_score;
98
+ }
99
+
100
+ bool operator!=(const GeneScoreIterator<GeneT,ScoreT>& other) const {
101
+ return is != other.is;
102
+ }
103
+
104
+ bool operator==(const GeneScoreIterator<GeneT,ScoreT>& other) const {
105
+ return !(*this != other);
106
+ }
107
+
108
+ const std::string test_value() const { return value; }
109
+
110
+ protected:
111
+ istream_type* is;
112
+ std::string value;
113
+ pair_type value_gene_score;
114
+ };
@@ -0,0 +1,113 @@
1
+ /* Rocker gem C++ extension
2
+ * Part of crossval, in the Phenolog project
3
+ * (C) John O. Woods, The Marcotte Lab, 2010
4
+ *
5
+ * Requires boost, boost_filesystem, rice (Ruby in C++ Extension), and of course
6
+ * Ruby (1.8). Use at your own risk if you're not a lab member!
7
+ *
8
+ * To compile, run irb or script/console and do:
9
+ * require 'extconf'
10
+ *
11
+ * Then leave the console, and type:
12
+ * make
13
+ *
14
+ * You will then load the module from within the ruby shell using:
15
+ * require 'rocker'
16
+ *
17
+ * Instantiate using:
18
+ * Rocker.new("dbname=crossval_production username=youruser password=yourpass", 1, 167)
19
+ *
20
+ * (That connects to the specified PostgreSQL database and autoloads matrix 1
21
+ * data. Updates will be made to experiment 167. This is just an example.)
22
+ */
23
+
24
+ // g++ -I/usr/include -I/usr/local/include -L/usr/lib -L/usr/local/lib -lpqxx -lboost_filesystem rocker.cpp -o rocker
25
+ #include <rice/Data_Type.hpp>
26
+ #include <rice/Constructor.hpp>
27
+
28
+ #include <iostream>
29
+ #include <fstream>
30
+ #include <cstdlib>
31
+ #include <string>
32
+ using std::cout;
33
+ using std::endl;
34
+ using namespace Rice;
35
+
36
+ #include "rocker.h"
37
+
38
+ //#include "line_input_iterator.h"
39
+
40
+ typedef LineInputIterator<std::string> line_input_iterator;
41
+
42
+
43
+ extern "C"
44
+ void Init_rocker() {
45
+
46
+ // Expose Rocker class to Ruby
47
+ //database_string dbarg(DBNAME, USER, PASSWORD);
48
+ Data_Type<Rocker> rb_cRocker =
49
+ define_class<Rocker>("Rocker")
50
+ .define_constructor(Constructor<Rocker,std::string,uint,uint>())
51
+ .define_method("process_results", &Rocker::process_results)
52
+ .define_method("fetch", &Rocker::fetch, (Arg("j")))
53
+ .define_method("calculate_statistic",
54
+ &Rocker::calculate_statistic,
55
+ (Arg("j"), Arg("threshold") = (double)(0.0)))
56
+ .define_method("read_candidates", &Rocker::read_candidates, (Arg("j")))
57
+ .define_method("mean_auc", &Rocker::mean_auc);
58
+
59
+ }
60
+
61
+ extern "C"
62
+ void Init_DatabaseString() {
63
+ Data_Type<database_string> rb_cDatabaseString =
64
+ define_class<database_string>("DatabaseString")
65
+ .define_constructor(Constructor<database_string,std::string,std::string,std::string>())
66
+ .define_method("to_s", &database_string::operator());
67
+ }
68
+
69
+ // Uncomment to test -- probably unnecessary now that the Ruby extensions are
70
+ // built.
71
+ /*
72
+ int main(int argc, char* argv[]) {
73
+ database_string dbarg(DBNAME, USER, PASSWORD);
74
+ Rocker rocker(dbarg(), 1, 167);
75
+
76
+ uint j = 0, m = 0, x = 0;
77
+ cout << "Args: " << argc << endl;
78
+ if (argc < 4) {
79
+ cerr << "Please provide matrix id, experiment id, and column as arguments." << endl;
80
+ return EXIT_FAILURE;
81
+ } else {
82
+ // Read command line arguments.
83
+ m = atoi(argv[1]);
84
+ x = atoi(argv[2]);
85
+ j = atoi(argv[3]);
86
+ }
87
+
88
+ set<uint> gene_ids = rocker.fetch(42);
89
+ for (set<uint>::const_iterator i = gene_ids.begin(); i != gene_ids.end(); ++i)
90
+ cout << *i << endl;
91
+
92
+ cout << "---" << endl;
93
+ gene_score_list candidates = rocker.read_candidates(42);
94
+ for (gene_score_list::iterator i = candidates.begin(); i != candidates.end(); ++i)
95
+ cout << "hi:\t" << i->first << '\t' << i->second << endl;
96
+
97
+ cout << "---" << endl;
98
+ auc_info test = rocker.calculate_statistic(42);
99
+ cout << "auc\t=" << test.auc << endl;
100
+ cout << " tp\t=" << test.tp << endl;
101
+ cout << " fp\t=" << test.fp << endl;
102
+ cout << " tn\t=" << test.tn << endl;
103
+ cout << " fn\t=" << test.fn << endl;
104
+
105
+ cerr << "m=" << m << endl;
106
+ cerr << "x=" << x << endl;
107
+ cerr << "j=" << j << endl;
108
+
109
+ rocker.process_results();
110
+
111
+ return EXIT_SUCCESS;
112
+ }
113
+ */
@@ -0,0 +1,227 @@
1
+ #include "constants.h"
2
+ #include "line_input_iterator.h"
3
+ #include "fetcher.h"
4
+ #include "updater.h"
5
+ #include "auc_info.h"
6
+
7
+ #include <string>
8
+ #include <utility>
9
+ #include <sstream>
10
+ #include <list>
11
+ #include <boost/filesystem.hpp>
12
+ #include <boost/filesystem/fstream.hpp>
13
+ #include <boost/lexical_cast.hpp>
14
+
15
+ using boost::filesystem::exists;
16
+ using boost::filesystem::ifstream;
17
+ using std::string;
18
+ using std::list;
19
+ using std::ostringstream;
20
+
21
+ // typedef unordered_map<uint, double> gene_score_map;
22
+ typedef list<std::pair<uint,double> > gene_score_list;
23
+ typedef GeneScoreIterator<unsigned int,double> gene_score_iterator;
24
+
25
+
26
+ string path_to_s(const boost::filesystem::path& p) {
27
+ return p.string().substr(2);
28
+ }
29
+
30
+ bool path_to_uint(const boost::filesystem::path& p, uint& n) {
31
+ using boost::lexical_cast;
32
+ using boost::bad_lexical_cast;
33
+
34
+ string s = path_to_s(p);
35
+ try {
36
+ n = lexical_cast<uint>(s);
37
+ } catch(bad_lexical_cast &) {
38
+ cerr << "Unable to read phenotype file '" << s << "': not numeric" << endl;
39
+ return false;
40
+ }
41
+ return true;
42
+ }
43
+
44
+
45
+ class Rocker {
46
+ public:
47
+
48
+ // Connect to the database and create the read transaction
49
+ Rocker(string dbarg, uint m_id, uint e_id) : c(dbarg), mean_auc_(0.0) {
50
+ // Make sure the fetcher knows which matrix to restrict queries to.
51
+ fetcher.matrix_id = m_id;
52
+
53
+ // Set up a transaction
54
+ action = new pqxx::transaction<>(c, READ_TRANSACTION);
55
+
56
+ fetcher(*action); // Perform the fetch.
57
+
58
+ delete action;
59
+
60
+ updater.experiment_id = e_id;
61
+ updater.aucs = process_results();
62
+
63
+ action = new pqxx::transaction<>(c, WRITE_TRANSACTION);
64
+
65
+ updater(*action);
66
+
67
+ delete action;
68
+ }
69
+
70
+
71
+ //
72
+ ~Rocker() { }
73
+
74
+
75
+ // Return the mean AUC calculated -- requires that process_results was called,
76
+ // which happens in the constructor, so it's okay.
77
+ double mean_auc() { return mean_auc_; }
78
+
79
+ // Go through the results directory
80
+ map<uint,auc_info> process_results() {
81
+ using namespace boost::filesystem;
82
+ map<uint, auc_info> rocs;
83
+
84
+ double temp_auc_accum = 0.0; // Keep track of AUCs so we can get a mean
85
+ size_t divide_by = 1;
86
+
87
+ // Look at all files in the directory
88
+ for (basic_directory_iterator<path> jit(path(".")); jit != directory_iterator(); ++jit) {
89
+ uint j = 0;
90
+ if (path_to_uint(jit->path(), j)) {
91
+ // Read the file and calculate AUCs.
92
+ rocs[j] = calculate_statistic(j);
93
+ temp_auc_accum += rocs[j].auc;
94
+ ++divide_by;
95
+
96
+ cout << "AUC: " << rocs[j].auc << endl;
97
+ }
98
+ }
99
+
100
+ // Calculate the mean AUC
101
+ if (divide_by > 0)
102
+ mean_auc_ = temp_auc_accum / (double)(divide_by);
103
+ else
104
+ mean_auc_ = 0;
105
+
106
+ return rocs;
107
+ }
108
+
109
+
110
+ // Get genes with a specific phenotype association (phenotype id = j).
111
+ set<uint> fetch(uint j) const {
112
+ return fetcher.known_correct[j];
113
+ }
114
+
115
+
116
+ // For some phenotype j, determine AUC, fp, tp, fn, tn, etc.
117
+ auc_info calculate_statistic(uint j, double threshold = 0.0) const {
118
+ set<uint> known_correct = fetch(j);
119
+ gene_score_list candidates = read_candidates(j);
120
+ //cerr << "Size of known_correct: " << known_correct.size() << endl;
121
+
122
+ // Attempted transcription of code from Ruby into C++, after having taken
123
+ // it from Python the first time.
124
+ // No guarantees!
125
+ vector<size_t> t;
126
+ t.reserve(candidates.size()+1); t.push_back(0);
127
+ vector<size_t> f = t;
128
+
129
+ auc_info result;
130
+
131
+ for (gene_score_list::const_iterator i = candidates.begin(); i != candidates.end(); ++i) {
132
+ if (known_correct.find(i->first) != known_correct.end()) {
133
+ t.push_back( *(t.rbegin()) + 1 );
134
+ f.push_back( *(f.rbegin()) );
135
+
136
+ // Update true positives / false negatives
137
+ if (i->second > threshold) result.tp++;
138
+ else result.fn++;
139
+
140
+ } else {
141
+ t.push_back( *(t.rbegin()) );
142
+ f.push_back( *(f.rbegin()) + 1 );
143
+
144
+ // Update false positives / true negatives
145
+ if (i->second > threshold) result.fp++;
146
+ else result.tn++;
147
+ }
148
+
149
+ }
150
+
151
+ vector<double> tpl; tpl.reserve(candidates.size()+1);
152
+ // vector<double> fpl = tpl;
153
+ size_t last_f = 0;
154
+ for (size_t i = 0; i < t.size(); ++i) {
155
+ if (f[i] > last_f) {
156
+ tpl.push_back(t[i]);
157
+ // fpl.push_back(f[i]);
158
+ last_f = f[i];
159
+ }
160
+ }
161
+
162
+ size_t last_t = *(t.rbegin());
163
+ double sum = 0.0;
164
+ // Divide each by the last item in that array
165
+ // Also keep track of the sum for calculating the final AUC value
166
+ for (size_t i = 0; i < tpl.size(); ++i) {
167
+ // tpl[i] /= (double)(last_t);
168
+ // fpl[i] /= (double)(last_f);
169
+ sum += tpl[i];
170
+ }
171
+
172
+ result.auc = (sum / (double)(last_t)) / (double)(tpl.size());
173
+ if (tpl.size() == 0) result.auc = 0; // prevent NaN return.
174
+
175
+ return result;
176
+ }
177
+
178
+
179
+ // Assume we're in the correct directory and read the correct phenotype file
180
+ // First two lines are comment.
181
+ // Assumes the files are pre-sorted by sortall.pl (by column 2 descending).
182
+ // column 1 is the gene, column 2 is the prediction score (higher is better).
183
+ gene_score_list read_candidates(uint j) const {
184
+ ostringstream fn; fn << j;
185
+ boost::filesystem::path filepath(fn.str());
186
+ //cerr << "Opening file: '" << filepath << "'" << endl;
187
+
188
+ if (!exists(filepath)) {
189
+ //cerr << "Error: File '" << filepath << "' does not exist." << endl;
190
+ throw;
191
+ }
192
+
193
+ gene_score_list res;
194
+
195
+ // Open a filestream
196
+ ifstream fin(filepath);
197
+
198
+ // Ignore two header lines
199
+ fin.ignore(500, '\n');
200
+ fin.ignore(500, '\n');
201
+ //cout << "Next character is: '" << fin.peek() << "'" << endl;
202
+
203
+ // Iterate through the gene-score pairs in the file
204
+ for (gene_score_iterator gsit(fin); gsit != gene_score_iterator(); ++gsit) {
205
+ //cerr << "Adding " << gsit->first << '\t' << gsit->second << endl;
206
+ res.push_back(*gsit);
207
+ }
208
+
209
+ fin.close();
210
+
211
+ return res;
212
+ }
213
+
214
+
215
+ protected:
216
+
217
+ uint matrix_id;
218
+ uint experiment_id;
219
+ uint current_j;
220
+ pqxx::connection c;
221
+ pqxx::transaction<>* action;
222
+
223
+ double mean_auc_;
224
+
225
+ Fetcher fetcher;
226
+ Updater updater;
227
+ };
@@ -0,0 +1,86 @@
1
+ #include "auc_info.h"
2
+
3
+ #include <iostream>
4
+ #include <set>
5
+ #include <vector>
6
+ #include <string>
7
+ #include <list>
8
+ #include <map>
9
+ #include <sstream>
10
+ #include <pqxx/transactor.hxx>
11
+ #include <pqxx/result.hxx>
12
+ #include <boost/algorithm/string/join.hpp>
13
+
14
+ using std::cout;
15
+ using std::cerr;
16
+ using std::endl;
17
+ using std::set;
18
+ using std::vector;
19
+ using std::string;
20
+ using std::list;
21
+ using std::map;
22
+ using std::ostringstream;
23
+ using pqxx::transactor;
24
+ using pqxx::result;
25
+ using boost::algorithm::join;
26
+
27
+ typedef unsigned int uint;
28
+
29
+
30
+ //std::string join(const SequenceT<std::string>& strings, std::string join_str = "") {
31
+ // std::ostringstream o;
32
+ //
33
+ // SequenceT<string>::const_iterator i = strings.begin();
34
+ // o << *i;
35
+ // ++i;
36
+ //
37
+ // for (; i != strings.end(); ++i) {
38
+ // o << join_str << *i;
39
+ // }
40
+ //
41
+ // return o.str();
42
+ //}
43
+
44
+
45
+
46
+ class Updater : public transactor <> {
47
+ public:
48
+ Updater() : transactor<>("Updater") {}
49
+
50
+ uint experiment_id;
51
+ map<uint,auc_info> aucs;
52
+ string query;
53
+
54
+ void operator()(argument_type &T) {
55
+ result R;
56
+
57
+ if (aucs.size() == 0) {
58
+ cerr << "No updates necessary." << endl;
59
+ return;
60
+ }
61
+
62
+ query = make_known_correct_query().c_str();
63
+
64
+ try {
65
+ //R = T.exec(query);
66
+ cout << "Query:" << endl;
67
+ cout << query << endl;
68
+ } catch (pqxx::sql_error e) {
69
+ cerr << "SQL error in Fetcher transactor." << endl;
70
+ cerr << "Query: " << e.query() << endl;
71
+ cerr << "Error: " << e.what() << endl;
72
+ }
73
+ }
74
+
75
+ protected:
76
+ string make_known_correct_query() const {
77
+ ostringstream q;
78
+ q << "INSERT INTO rocs " << AUC_COLUMNS << " VALUES \n";
79
+ list<string> insertions;
80
+ for (map<uint,auc_info>::const_iterator i = aucs.begin(); i != aucs.end(); ++i) {
81
+ insertions.push_back( i->second.entry(experiment_id, i->first) );
82
+ }
83
+ q << join(insertions, ",\n") << ';';
84
+ return q.str();
85
+ }
86
+ };
data/lib/rocker.rb ADDED
@@ -0,0 +1,8 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ require 'ext/rocker/rocker.so'
5
+
6
+ class Rocker
7
+ VERSION = '0.0.4'
8
+ end
data/script/console ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ libs << " -r #{File.dirname(__FILE__) + '/../lib/rocker.rb'}"
9
+ puts "Loading rocker gem"
10
+ exec "#{irb} #{libs} --simple-prompt"
data/script/destroy ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
data/script/generate ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,3 @@
1
+ require 'stringio'
2
+ require 'test/unit'
3
+ require File.dirname(__FILE__) + '/../lib/rocker'
@@ -0,0 +1,11 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ class TestRocker < Test::Unit::TestCase
4
+
5
+ def setup
6
+ end
7
+
8
+ def test_truth
9
+ assert true
10
+ end
11
+ end
@@ -0,0 +1,21 @@
1
+ require "test/unit"
2
+
3
+ $:.unshift File.dirname(__FILE__) + "/../ext/rocker"
4
+ require "rocker.so"
5
+
6
+ def database_string dbn = "crossval_development", u = "jwoods", p = "youwish1"
7
+ "dbname=#{dbn} user=#{u} password=#{p}"
8
+ end
9
+
10
+ class TestRockerExtn < Test::Unit::TestCase
11
+ def test_working
12
+ t = Rocker.new(database_string, 1, 167)
13
+ x = t.mean_auc
14
+ puts "Mean AUC was #{x}"
15
+ end
16
+
17
+ def test_version_string
18
+ puts Rocker::VERSION
19
+ end
20
+
21
+ end
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rocker
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 4
9
+ version: 0.0.4
10
+ platform: ruby
11
+ authors:
12
+ - John Woods
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-04-23 00:00:00 -05:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rubyforge
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 2
29
+ - 0
30
+ - 4
31
+ version: 2.0.4
32
+ type: :development
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: hoe
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 2
43
+ - 6
44
+ - 0
45
+ version: 2.6.0
46
+ type: :development
47
+ version_requirements: *id002
48
+ description: |-
49
+ This gem is very fast C++ code for calculating AUCs on results of cross-validation.
50
+
51
+ It is specific to the crossval database schema, which has not been released yet.
52
+
53
+ Chances are you will not find this very useful unless you are the author.
54
+
55
+ It is in gem form to ensure that each lab machine can compile its own arch-specific
56
+ version.
57
+ email:
58
+ - john.woods@marcottelab.org
59
+ executables: []
60
+
61
+ extensions:
62
+ - ext/rocker/extconf.rb
63
+ extra_rdoc_files:
64
+ - History.txt
65
+ - Manifest.txt
66
+ - PostInstall.txt
67
+ files:
68
+ - History.txt
69
+ - Manifest.txt
70
+ - PostInstall.txt
71
+ - README.rdoc
72
+ - Rakefile
73
+ - lib/rocker.rb
74
+ - script/console
75
+ - script/destroy
76
+ - script/generate
77
+ - test/test_helper.rb
78
+ - test/test_rocker.rb
79
+ - ext/rocker/extconf.rb
80
+ - ext/rocker/auc_info.h
81
+ - ext/rocker/constants.h
82
+ - ext/rocker/fetcher.h
83
+ - ext/rocker/updater.h
84
+ - ext/rocker/line_input_iterator.h
85
+ - ext/rocker/rocker.h
86
+ - ext/rocker/rocker.cpp
87
+ has_rdoc: true
88
+ homepage: http://github.com/MarcotteLabGit/rocker
89
+ licenses: []
90
+
91
+ post_install_message: PostInstall.txt
92
+ rdoc_options:
93
+ - --main
94
+ - README.rdoc
95
+ require_paths:
96
+ - lib
97
+ - ext/rocker
98
+ required_ruby_version: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ segments:
103
+ - 0
104
+ version: "0"
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ segments:
110
+ - 0
111
+ version: "0"
112
+ requirements: []
113
+
114
+ rubyforge_project: rocker
115
+ rubygems_version: 1.3.6
116
+ signing_key:
117
+ specification_version: 3
118
+ summary: This gem is very fast C++ code for calculating AUCs on results of cross-validation
119
+ test_files:
120
+ - test/test_helper.rb
121
+ - test/test_rocker.rb
122
+ - test/test_rocker_extn.rb