bscampp 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,179 @@
1
+ #include <iostream>
2
+ #include <fstream>
3
+ #include <vector>
4
+ #include <string>
5
+ #include <omp.h>
6
+
7
+
8
+ int main( int argc, char **argv ){
9
+ if( argc <= 6 ){
10
+ std::cerr << "Usage: "<<argv[0]<<" [ref infile] [nbr ref records] [query infile] [nbr query records] [outfile] [nbr of leaves returned]" << std::endl;
11
+ return -1;
12
+ }
13
+
14
+ // read in the reference sequences first
15
+ std::ifstream input_q(argv[1]);
16
+ if(!input_q.good()){
17
+ std::cerr << "Error opening '"<<argv[1]<<"'. Bailing out." << std::endl;
18
+ return -1;
19
+ }
20
+
21
+ //std::string name_arr[std::stoi(argv[2])];
22
+ //std::string seq_arr[std::stoi(argv[2])];
23
+ int ref_size = std::stoi(argv[2]) + 3;
24
+ int count1 = 0;
25
+ std::vector<std::string> name_arr(ref_size);
26
+ std::vector<std::string> seq_arr(ref_size);
27
+ std::string line, name, content;
28
+
29
+ while( std::getline( input_q, line ).good() ){
30
+ if( line.empty() || line[0] == '>' ){ // Identifier marker
31
+ if( !name.empty() ){ // Print out what we read from the last entry
32
+
33
+ name_arr[count1] = name.c_str();
34
+ seq_arr[count1] = content.c_str();
35
+ name.clear();
36
+
37
+ count1++;
38
+ }
39
+ if( !line.empty() ){
40
+ name = line.substr(1);
41
+ }
42
+ content.clear();
43
+ } else if( !name.empty() ){
44
+ if( line.find(' ') != std::string::npos ){ // Invalid sequence--no spaces allowed
45
+ name.clear();
46
+ content.clear();
47
+ } else {
48
+ content += line;
49
+ }
50
+ }
51
+ }
52
+
53
+ if( !name.empty() ){ // Print out what we read from the last entry
54
+ //std::cout << name << " : " << content << std::endl;
55
+ name_arr[count1] = name;
56
+ seq_arr[count1] = content;
57
+ count1++;
58
+ }
59
+
60
+ // read in query sequences second
61
+ std::ifstream input(argv[3]);
62
+ if(!input.good()){
63
+ std::cerr << "Error opening '"<<argv[3]<<"'. Bailing out." << std::endl;
64
+ return -1;
65
+ }
66
+
67
+ int q_size = std::stoi(argv[4]) + 3;
68
+ int count2 = 0;
69
+ //std::string q_name_arr[std::stoi(argv[4])+3];
70
+ //std::string q_seq_arr[std::stoi(argv[4])+3];
71
+ std::vector<std::string> q_name_arr(q_size);
72
+ std::vector<std::string> q_seq_arr(q_size);
73
+ name = "";
74
+
75
+ while( std::getline( input, line ).good() ){
76
+ if( line.empty() || line[0] == '>' ){ // Identifier marker
77
+ if( !name.empty() ){ // Print out what we read from the last entry
78
+ //std::cout << name << " : " << content << std::endl;
79
+ q_name_arr[count2] = name.c_str();
80
+ q_seq_arr[count2] = content.c_str();
81
+ name.clear();
82
+ //std::cout << count2 << " : " << q_name_arr[count2] <<std::endl;
83
+ count2++;
84
+ }
85
+ if( !line.empty() ){
86
+ name = line.substr(1);
87
+ }
88
+ content.clear();
89
+ } else if( !name.empty() ){
90
+ if( line.find(' ') != std::string::npos ){ // Invalid sequence--no spaces allowed
91
+ name.clear();
92
+ content.clear();
93
+ } else {
94
+ content += line;
95
+ }
96
+ }
97
+
98
+ }
99
+
100
+
101
+ if( !name.empty() ){ // Print out what we read from the last entry
102
+ //std::cout << name << " : " << content << std::endl;
103
+ q_name_arr[count2] = name;
104
+ q_seq_arr[count2] = content;
105
+ count2++;
106
+ }
107
+
108
+ //std::cout << "ref count: "<< count1 <<" query count2: " <<count2 << std::endl;
109
+
110
+ std::ofstream outFile(argv[5]);
111
+
112
+ // find n (size) closest reference sequences by Hamming distance to each query
113
+ // print this to outfile with the one query per line followed by the n closest
114
+ // reference sequences separated by a comma, with their requisite Hamming
115
+ // distances separated with a semicolon.
116
+
117
+ #pragma omp parallel for
118
+ for (int c2=0; c2<count2; c2++){ //query seq array
119
+
120
+ int size = std::stoi(argv[6]);
121
+
122
+ int best_homolog[size];
123
+ int best_homolog_index[size];
124
+ int furthest_homolog_index = 0;
125
+
126
+ for (int i=0; i<size; i++){
127
+ best_homolog_index[i] = 0;
128
+ best_homolog[i] = 999999999;
129
+ }
130
+
131
+ int q_len = q_seq_arr[c2].length();
132
+ int q_hom_idx_arr[q_len];
133
+ int hom_nbr = 0;
134
+ for (int i=0; i < q_len; i++) {
135
+ if (q_seq_arr[c2][i] != '-') {
136
+ q_hom_idx_arr[hom_nbr] = i;
137
+ hom_nbr++;
138
+ }
139
+ }
140
+
141
+ for (int c1=0; c1<count1 ; c1++) { //ref seq array
142
+ int count = 0;
143
+ int non_hom_count = 0;
144
+ int len = seq_arr[c1].length();
145
+ for(int i=0; i < hom_nbr; i++) {
146
+ if(seq_arr[c1][q_hom_idx_arr[i]] != q_seq_arr[c2][q_hom_idx_arr[i]]) {
147
+ non_hom_count++;
148
+ if (non_hom_count > best_homolog[furthest_homolog_index]) {
149
+ break;
150
+ }
151
+ }
152
+ }
153
+ //std::cout << "here" << std::endl;
154
+ if (non_hom_count <= best_homolog[furthest_homolog_index]) {
155
+ best_homolog[furthest_homolog_index] = non_hom_count;
156
+ best_homolog_index[furthest_homolog_index] = c1;
157
+ int high_homolog = 0;
158
+ int high_hom_index = 0;
159
+ for (int i=0; i<size; i++){
160
+ if (best_homolog[i] > high_homolog){
161
+ high_homolog = best_homolog[i];
162
+ high_hom_index = i;
163
+ }
164
+ furthest_homolog_index = high_hom_index;
165
+ }
166
+ }
167
+ }
168
+ #pragma omp critical
169
+ {
170
+ outFile << q_name_arr[c2] << ":" << hom_nbr;
171
+ for (int i=0; i<size; i++){
172
+ outFile << "," << name_arr[best_homolog_index[i]] << ":" << best_homolog[i];
173
+ }
174
+ outFile << std::endl;
175
+ }
176
+ }
177
+ outFile.close();
178
+ return 0;
179
+ }
@@ -0,0 +1,161 @@
1
+ #include <iostream>
2
+ #include <fstream>
3
+ #include <vector>
4
+ #include <string>
5
+ #include <omp.h>
6
+
7
+
8
+ int main( int argc, char **argv ){
9
+ if( argc <= 6 ){
10
+ std::cerr << "Usage: "<<argv[0]<<" [ref infile] [nbr ref records] [query infile] [nbr query records] [outfile] [nbr of leaves returned]" << std::endl;
11
+ return -1;
12
+ }
13
+
14
+ std::ifstream input_q(argv[1]);
15
+ if(!input_q.good()){
16
+ std::cerr << "Error opening '"<<argv[1]<<"'. Bailing out." << std::endl;
17
+ return -1;
18
+ }
19
+ //std::cout << argv[2] << std::endl;
20
+ int ref_size = std::stoi(argv[2]) + 3;
21
+ int count1 = 0;
22
+ std::vector<std::string> name_arr(ref_size);
23
+ std::vector<std::string> seq_arr(ref_size);
24
+ std::string line, name, content;
25
+
26
+ while( std::getline( input_q, line ).good() ){
27
+ if( line.empty() || line[0] == '>' ){ // Identifier marker
28
+ if( !name.empty() ){ // Print out what we read from the last entry
29
+ //std::cout << name << " : " << content << std::endl;
30
+ name_arr[count1] = name.c_str();
31
+ seq_arr[count1] = content.c_str();
32
+ name.clear();
33
+ //std::cout << count1<< " : " << name_arr[count1] <<std::endl;
34
+ count1++;
35
+ }
36
+ if( !line.empty() ){
37
+ name = line.substr(1);
38
+ }
39
+ content.clear();
40
+ } else if( !name.empty() ){
41
+ if( line.find(' ') != std::string::npos ){ // Invalid sequence--no spaces allowed
42
+ name.clear();
43
+ content.clear();
44
+ } else {
45
+ content += line;
46
+ }
47
+ }
48
+
49
+ }
50
+
51
+ //std::cout << "here" << std::endl;
52
+
53
+ if( !name.empty() ){ // Print out what we read from the last entry
54
+ //std::cout << name << " : " << content << std::endl;
55
+ name_arr[count1] = name;
56
+ seq_arr[count1] = content;
57
+ count1++;
58
+ }
59
+
60
+
61
+ std::ifstream input(argv[3]);
62
+ if(!input.good()){
63
+ std::cerr << "Error opening '"<<argv[3]<<"'. Bailing out." << std::endl;
64
+ return -1;
65
+ }
66
+ //std::cout << argv[4] << std::endl;
67
+ int q_size = std::stoi(argv[4]) + 3;
68
+ int count2 = 0;
69
+ //std::string q_name_arr[std::stoi(argv[4])+3];
70
+ //std::string q_seq_arr[std::stoi(argv[4])+3];
71
+ std::vector<std::string> q_name_arr(q_size);
72
+ std::vector<std::string> q_seq_arr(q_size);
73
+ name = "";
74
+ while( std::getline( input, line ).good() ){
75
+ if( line.empty() || line[0] == '>' ){ // Identifier marker
76
+ if( !name.empty() ){ // Print out what we read from the last entry
77
+ //std::cout << name << " : " << content << std::endl;
78
+ q_name_arr[count2] = name.c_str();
79
+ q_seq_arr[count2] = content.c_str();
80
+ name.clear();
81
+ //std::cout << count2 << " : " << q_name_arr[count2] <<std::endl;
82
+ count2++;
83
+ }
84
+ if( !line.empty() ){
85
+ name = line.substr(1);
86
+ }
87
+ content.clear();
88
+ } else if( !name.empty() ){
89
+ if( line.find(' ') != std::string::npos ){ // Invalid sequence--no spaces allowed
90
+ name.clear();
91
+ content.clear();
92
+ } else {
93
+ content += line;
94
+ }
95
+ }
96
+
97
+ }
98
+
99
+
100
+ if( !name.empty() ){ // Print out what we read from the last entry
101
+ //std::cout << name << " : " << content << std::endl;
102
+ q_name_arr[count2] = name;
103
+ q_seq_arr[count2] = content;
104
+ count2++;
105
+ }
106
+
107
+ //std::cout << "ref count: "<< count1 <<" query count2: " <<count2 << std::endl;
108
+
109
+ std::ofstream outFile(argv[5]);
110
+
111
+ #pragma omp parallel for
112
+ for (int c2=0; c2<count2; c2++){ //query seq array
113
+
114
+ int size = std::stoi(argv[6]);
115
+ int best_hamming[size];
116
+ int best_index[size];
117
+ int furthest_index = 0;
118
+
119
+ for (int i=0; i<size; i++){
120
+ best_index[i] = 0;
121
+ best_hamming[i] = 999999999;
122
+ }
123
+
124
+ for (int c1=0; c1<count1 ; c1++) { //ref seq array
125
+ int count = 0;
126
+ int len = seq_arr[c1].length();
127
+ for(int i=0; i < len; i++) {
128
+ if(seq_arr[c1][i] != q_seq_arr[c2][i]) {
129
+ count++;
130
+ if (count > best_hamming[furthest_index]) {
131
+ break;
132
+ }
133
+ }
134
+ }
135
+ //std::cout << "here" << std::endl;
136
+ if (count <= best_hamming[furthest_index]) {
137
+ best_hamming[furthest_index] = count;
138
+ best_index[furthest_index] = c1;
139
+ int high_hamming = 0;
140
+ int high_index = 0;
141
+ for (int i=0; i<size; i++){
142
+ if (best_hamming[i] > high_hamming){
143
+ high_hamming = best_hamming[i];
144
+ high_index = i;
145
+ }
146
+ furthest_index = high_index;
147
+ }
148
+ }
149
+ }
150
+ #pragma omp critical
151
+ {
152
+ outFile << q_name_arr[c2];
153
+ for (int i=0; i<size; i++){
154
+ outFile << "," << name_arr[best_index[i]] << ":" << best_hamming[i];
155
+ }
156
+ outFile << std::endl;
157
+ }
158
+ }
159
+ outFile.close();
160
+ return 0;
161
+ }
bscampp/tools/pplacer ADDED
Binary file