scbi_fqbin 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.DS_Store +0 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/{README.rdoc → README.md} +0 -0
- data/Rakefile +8 -28
- data/lib/scbi_fqbin.rb +3 -5
- data/lib/scbi_fqbin/fastabin.rb +411 -0
- data/lib/scbi_fqbin/fastq_file_c.rb +373 -0
- data/lib/scbi_fqbin/fbin_file.rb +1 -1
- data/lib/scbi_fqbin/t.rb +9 -0
- data/lib/scbi_fqbin/t2.rb +12 -0
- data/lib/scbi_fqbin/version.rb +3 -0
- data/lib_fqbin_src.zip +0 -0
- data/lib_fqbin_src/Makefile +66 -0
- data/lib_fqbin_src/fq +0 -0
- data/lib_fqbin_src/fq.c +165 -0
- data/lib_fqbin_src/hash_fqbin +0 -0
- data/lib_fqbin_src/hash_fqbin.c +212 -0
- data/lib_fqbin_src/idx_fqbin +21 -0
- data/lib_fqbin_src/iterate_fqbin +0 -0
- data/lib_fqbin_src/iterate_fqbin.c +136 -0
- data/lib_fqbin_src/lib_fqbin.c +1748 -0
- data/lib_fqbin_src/lib_fqbin.h +194 -0
- data/lib_fqbin_src/mk_fqbin +0 -0
- data/lib_fqbin_src/mk_fqbin.c +138 -0
- data/lib_fqbin_src/other/bwxform.c +915 -0
- data/lib_fqbin_src/other/bwxform.h +74 -0
- data/lib_fqbin_src/other/find_in_index.c +130 -0
- data/lib_fqbin_src/other/hash_fbin_nogzchunks.c +164 -0
- data/lib_fqbin_src/other/idx_fqbin +0 -0
- data/lib_fqbin_src/other/idx_fqbin.c +67 -0
- data/lib_fqbin_src/other/make_hsh.sh +14 -0
- data/lib_fqbin_src/other/rd_extras_fbin.c +45 -0
- data/lib_fqbin_src/read_fq +0 -0
- data/lib_fqbin_src/read_fq.c +143 -0
- data/lib_fqbin_src/read_fqbin +0 -0
- data/lib_fqbin_src/read_fqbin.c +101 -0
- data/lib_fqbin_src/sort_index +9 -0
- data/lib_fqbin_src/test.rb +13 -0
- data/scbi_fqbin.gemspec +25 -0
- data/test/build.rake +15 -0
- data/test/fbinfile +0 -0
- data/test/fbinfile.index +0 -0
- data/test/no_test_fill_file.rb +66 -0
- data/test/old/app.rb +43 -0
- data/test/old/bin/iterate_fastabin.rb +54 -0
- data/test/old/bin/mk_fastabin.rb +22 -0
- data/test/old/bin/rd_fastabin.rb +36 -0
- data/test/old/bin/rd_fq.rb +20 -0
- data/test/old/bioruby.rb +27 -0
- data/test/old/c/Makefile +34 -0
- data/test/old/c/fbin_lib.zip +0 -0
- data/test/old/c/iterate_fbin.c +54 -0
- data/test/old/c/libreria_gz.c +707 -0
- data/test/old/c/libreria_gz.h +127 -0
- data/test/old/c/main.c +86 -0
- data/test/old/c/mk_fbin.c +24 -0
- data/test/old/c/rd_seq_fbin.c +44 -0
- data/test/old/c/test_ffi/a.out +0 -0
- data/test/old/c/test_ffi/app.c +26 -0
- data/test/old/c/test_ffi/app.rb +19 -0
- data/test/old/c/test_ffi/liblibreria_gz.dylib +0 -0
- data/test/old/c/test_ffi/libmylibrary.dylib +0 -0
- data/test/old/c/test_ffi/my_library.rb +23 -0
- data/test/old/c/test_ffi/mylibrary.c +22 -0
- data/test/old/c/test_ffi/mylibrary.h +6 -0
- data/test/old/c/usage_instructions.txt +62 -0
- data/test/old/ext/Makefile +187 -0
- data/test/old/ext/Makefile.dario +34 -0
- data/test/old/ext/extconf.rb +8 -0
- data/test/old/ext/mk_fbin.c +24 -0
- data/test/old/ext/sample/extras.txt +4 -0
- data/{.gemtest → test/old/ext/sample/extras2.txt} +0 -0
- data/test/old/ext/sample/f1.fasta +10 -0
- data/test/old/ext/sample/f1.fasta.qual +10 -0
- data/test/old/ext/sample/f1.fbin +0 -0
- data/test/old/ext/sample/f1.fbin.index +0 -0
- data/test/old/ext/sample/main.c +86 -0
- data/test/old/ext/usage_instructions.txt +62 -0
- data/test/old/t_scbi_fastabin.rb +140 -0
- data/test/read_tests/10-original_sizes.sh +16 -0
- data/test/read_tests/20-fq_time.sh +23 -0
- data/test/read_tests/30-fbin_read_time.sh +23 -0
- data/test/read_tests/40-bsc_read_time.sh +21 -0
- data/test/read_tests/50-fq_time_x4.sh +25 -0
- data/test/read_tests/60-fbin_read_time_x4.sh +24 -0
- data/test/read_tests/70-bsc_read_time_x4.sh +32 -0
- data/test/results_bio_scbi_fasta.txt +11 -0
- data/test/{test_scbi_fbin_file.rb → scbi_fbin_file_test.rb} +0 -0
- data/test/speed.txt +81 -0
- data/test/t_scbi_fasta.rb +12 -0
- data/test/write_tests/10-original_sizes.sh +16 -0
- data/test/write_tests/20-zip_time.sh +17 -0
- data/test/write_tests/30-mk_fbin_time.sh +23 -0
- data/test/write_tests/31-mk_fbin_time_f30.sh +21 -0
- data/test/write_tests/40-gzip_time.sh +16 -0
- data/test/write_tests/41-bsc_time.sh +16 -0
- data/test/write_tests/50-zip_sizes.sh +16 -0
- data/test/write_tests/60-fbin_sizes.sh +17 -0
- data/test/write_tests/61-fbin_sizes_f30.sh +16 -0
- data/test/write_tests/70-gzip_sizes.sh +17 -0
- data/test/write_tests/80-bsc_sizes.sh +17 -0
- data/website/index.html +87 -0
- data/website/index.txt +81 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +159 -0
- data/website/template.html.erb +50 -0
- metadata +208 -95
- data/History.txt +0 -19
- data/Manifest.txt +0 -12
- data/PostInstall.txt +0 -7
- data/script/console +0 -10
- data/script/destroy +0 -14
- data/script/generate +0 -14
data/lib_fqbin_src/fq
ADDED
Binary file
|
data/lib_fqbin_src/fq.c
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
|
2
|
+
#include "lib_fqbin.h"
|
3
|
+
#include <stdio.h>
|
4
|
+
#include <ctype.h>
|
5
|
+
|
6
|
+
#include <unistd.h>
|
7
|
+
|
8
|
+
|
9
|
+
// process a fastq file adding it to fbin file
|
10
|
+
int iterate_fastq(char *fname, int only_extras, int output_fasta, int output_extras)
|
11
|
+
{
|
12
|
+
|
13
|
+
// allocate strings
|
14
|
+
char *name;
|
15
|
+
if ((name = malloc(MAXSEQNAME)) == NULL) {
|
16
|
+
puts("Memory allocation error!");
|
17
|
+
return EXIT_FAILURE;
|
18
|
+
}
|
19
|
+
|
20
|
+
char *fasta;
|
21
|
+
if ((fasta = malloc(MAXSEQLENGTH)) == NULL) {
|
22
|
+
puts("Memory allocation error!");
|
23
|
+
return EXIT_FAILURE;
|
24
|
+
}
|
25
|
+
|
26
|
+
char *qual;
|
27
|
+
if ((qual = malloc(MAXSEQLENGTH)) == NULL) {
|
28
|
+
puts("Memory allocation error!");
|
29
|
+
return EXIT_FAILURE;
|
30
|
+
}
|
31
|
+
|
32
|
+
char *comments;
|
33
|
+
if ((comments = malloc(MAXSEQLENGTH)) == NULL) {
|
34
|
+
puts("Memory allocation error!");
|
35
|
+
return EXIT_FAILURE;
|
36
|
+
}
|
37
|
+
|
38
|
+
static time_t curr_time=0;
|
39
|
+
static time_t prev_time=0;
|
40
|
+
|
41
|
+
prev_time=time(NULL);
|
42
|
+
|
43
|
+
FILE *fastq_file=NULL;
|
44
|
+
|
45
|
+
|
46
|
+
int valid=0;
|
47
|
+
int res=0;
|
48
|
+
int r=0;
|
49
|
+
// Open fasta and qual files
|
50
|
+
if (strcmp(fname,"-")==0){
|
51
|
+
fastq_file=stdin;
|
52
|
+
}else{
|
53
|
+
|
54
|
+
open_file(fname,&fastq_file);
|
55
|
+
}
|
56
|
+
|
57
|
+
if (fastq_file==NULL){
|
58
|
+
printf("TRESb\n");
|
59
|
+
}
|
60
|
+
|
61
|
+
// for each sequence on fastq file
|
62
|
+
while (valid=get_next_seq_fastq(fastq_file,&name,&fasta,&qual,&comments)){
|
63
|
+
if(valid==1)
|
64
|
+
{
|
65
|
+
r++;
|
66
|
+
|
67
|
+
if (!only_extras){
|
68
|
+
if (output_fasta){
|
69
|
+
printf(">%s %s\n%s\n", name, comments, fasta);
|
70
|
+
}else{
|
71
|
+
printf("@%s %s\n%s\n", name,comments, fasta);
|
72
|
+
printf("+%s\n%s\n",name,qual);
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
// if ((extras!=NULL) && (output_extras)) printf ("EXTRAS:%s\n",extras);
|
77
|
+
|
78
|
+
}else{
|
79
|
+
fprintf(stderr,"Invalid sequence found %s. Aborting import.\n",name);
|
80
|
+
res=-1;
|
81
|
+
break;
|
82
|
+
}
|
83
|
+
|
84
|
+
// if ((r%10000)==0) {
|
85
|
+
// }
|
86
|
+
|
87
|
+
}
|
88
|
+
|
89
|
+
curr_time=time(NULL);
|
90
|
+
printf("\nEnd fastq processing. %d seqs in %.0f s. Rate: %8.2f seqs/s\n",r,difftime(curr_time,prev_time),r/difftime(curr_time,prev_time));
|
91
|
+
|
92
|
+
// free mem
|
93
|
+
free(name);
|
94
|
+
free(fasta);
|
95
|
+
free(qual);
|
96
|
+
free(comments);
|
97
|
+
|
98
|
+
// close files
|
99
|
+
fclose(fastq_file);
|
100
|
+
|
101
|
+
return res;
|
102
|
+
}
|
103
|
+
|
104
|
+
void usage(){
|
105
|
+
printf("Usage: fq [-f][-e][-E] fbin_file seq_name\n\n");
|
106
|
+
printf(" -f Output sequence in fasta format\n");
|
107
|
+
printf(" -e Output extras for sequence\n");
|
108
|
+
printf(" -E Output only extras for sequence\n");
|
109
|
+
|
110
|
+
exit(-1);
|
111
|
+
|
112
|
+
}
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
/*******************************************************/
|
117
|
+
/* main */
|
118
|
+
/*******************************************************/
|
119
|
+
int main(int argc, char *argv[])
|
120
|
+
{
|
121
|
+
|
122
|
+
int ch;
|
123
|
+
|
124
|
+
int output_fasta = 0;
|
125
|
+
int output_extras = 0;
|
126
|
+
int only_extras = 0;
|
127
|
+
|
128
|
+
while ((ch = getopt(argc, argv, "feEh")) != -1) {
|
129
|
+
switch (ch) {
|
130
|
+
case 'f':
|
131
|
+
output_fasta = 1;
|
132
|
+
break;
|
133
|
+
case 'e':
|
134
|
+
output_extras = 1;
|
135
|
+
break;
|
136
|
+
case 'E':
|
137
|
+
output_extras = 1;
|
138
|
+
only_extras = 1;
|
139
|
+
break;
|
140
|
+
|
141
|
+
case 'h':
|
142
|
+
usage();
|
143
|
+
break;
|
144
|
+
case '?':
|
145
|
+
default:
|
146
|
+
usage();
|
147
|
+
}
|
148
|
+
}
|
149
|
+
|
150
|
+
argc -= optind;
|
151
|
+
argv += optind;
|
152
|
+
|
153
|
+
|
154
|
+
// check params
|
155
|
+
if (argc<1)
|
156
|
+
{
|
157
|
+
usage();
|
158
|
+
return -1;
|
159
|
+
}
|
160
|
+
|
161
|
+
int res=iterate_fastq(argv[0],only_extras, output_fasta, output_extras);
|
162
|
+
|
163
|
+
return res;
|
164
|
+
}
|
165
|
+
|
Binary file
|
@@ -0,0 +1,212 @@
|
|
1
|
+
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
#include <time.h>
|
5
|
+
|
6
|
+
|
7
|
+
#include <sys/types.h>
|
8
|
+
#include <sys/stat.h>
|
9
|
+
#include <fcntl.h>
|
10
|
+
#include <errno.h>
|
11
|
+
|
12
|
+
#include <zlib.h>
|
13
|
+
#include <zlib.h>
|
14
|
+
#include <stdlib.h>
|
15
|
+
|
16
|
+
// Maximum file name (including .idx)
|
17
|
+
#define MAXFNAME 512
|
18
|
+
|
19
|
+
// Maximum lenght of the name of a sequence
|
20
|
+
#define MAXSEQNAME 1024
|
21
|
+
#define MAXSEQLENGTH 150000000
|
22
|
+
#define DEBUG 1
|
23
|
+
#define FALSE 0
|
24
|
+
#define TRUE 1
|
25
|
+
|
26
|
+
#define INVALID_FASTQ_FORMAT -5
|
27
|
+
#define INVALID_FASTA_FORMAT -6
|
28
|
+
|
29
|
+
#define SEQ_METADATA 10000
|
30
|
+
|
31
|
+
|
32
|
+
long long last_chunk_file(char *filename){
|
33
|
+
|
34
|
+
// open file again to annotate chunk
|
35
|
+
int file=open(filename,O_APPEND);
|
36
|
+
|
37
|
+
//goto end of file
|
38
|
+
long long pos=lseek(file,0,SEEK_END);
|
39
|
+
if (pos==-1) {fprintf(stderr,"error %d seeking file %s :%s\n",errno,filename,strerror(errno));return -1;}
|
40
|
+
|
41
|
+
close(file);
|
42
|
+
|
43
|
+
return pos;
|
44
|
+
}
|
45
|
+
|
46
|
+
// creates a hash from an index file with the desired chunk size. Chunk size can be
|
47
|
+
// adjusted to fit a good compromise between access speed and used space.
|
48
|
+
int hash_index_file(char *filename, int chunk_size, int skip_sort)
|
49
|
+
{
|
50
|
+
|
51
|
+
char hash_file_name[MAXFNAME];
|
52
|
+
char indexname[MAXFNAME];
|
53
|
+
char sorted_indexname[MAXFNAME];
|
54
|
+
|
55
|
+
char sname[MAXSEQNAME];// sequence name
|
56
|
+
long long beginH, gz_chunk=0;
|
57
|
+
char tmp[SEQ_METADATA];
|
58
|
+
int res=0;
|
59
|
+
int error;
|
60
|
+
|
61
|
+
// to save min, max sequences and current chunk
|
62
|
+
char min_name[MAXSEQNAME];
|
63
|
+
char max_name[MAXSEQNAME];
|
64
|
+
long long current_chunk=0;
|
65
|
+
long long count=0;
|
66
|
+
|
67
|
+
|
68
|
+
strcpy(min_name,"");
|
69
|
+
strcpy(max_name,"");
|
70
|
+
|
71
|
+
// calc index and hash name
|
72
|
+
snprintf(indexname,MAXFNAME,"%s.index",filename);
|
73
|
+
snprintf(hash_file_name,MAXFNAME,"%s.index.hash",filename);
|
74
|
+
|
75
|
+
// sort index file by external command
|
76
|
+
if(skip_sort==0)
|
77
|
+
{
|
78
|
+
char cmd[10000];
|
79
|
+
snprintf(cmd,10000,"sort_index %s",indexname);
|
80
|
+
system(cmd);
|
81
|
+
}
|
82
|
+
|
83
|
+
// use sorted index
|
84
|
+
snprintf(sorted_indexname,MAXFNAME,"%s.index.sorted",filename);
|
85
|
+
|
86
|
+
// open hash file
|
87
|
+
gzFile gzhash_file=gzopen(hash_file_name,"wb");
|
88
|
+
|
89
|
+
// open sorted index file
|
90
|
+
gzFile gzsorted_file_index=gzopen(sorted_indexname,"r");
|
91
|
+
|
92
|
+
// open output index file
|
93
|
+
// int file_index=open(indexname,flags,0644);
|
94
|
+
gzFile gzfile_index=gzopen(indexname,"w");
|
95
|
+
|
96
|
+
// write header
|
97
|
+
gzprintf(gzfile_index,"UMACOMPRESSEDFORMAT 1 0 0 999999999999 999999999999\n");
|
98
|
+
|
99
|
+
//reopen
|
100
|
+
gzclose(gzfile_index);
|
101
|
+
gzfile_index=gzopen(indexname,"ab");
|
102
|
+
|
103
|
+
|
104
|
+
if (gzsorted_file_index==NULL) {
|
105
|
+
fprintf(stderr,"error opening gzsorted_file_index :%s\n",gzerror(gzsorted_file_index,&error));
|
106
|
+
return -2;
|
107
|
+
}
|
108
|
+
|
109
|
+
if (gzfile_index==NULL) {
|
110
|
+
fprintf(stderr,"error opening gzfile_index :%s\n",gzerror(gzfile_index,&error));
|
111
|
+
return -2;
|
112
|
+
}
|
113
|
+
|
114
|
+
if (gzhash_file==NULL) {
|
115
|
+
fprintf(stderr,"error opening gzhash_file :%s\n",gzerror(gzhash_file,&error));
|
116
|
+
return -2;
|
117
|
+
}
|
118
|
+
|
119
|
+
// podria leerse saltando linea 1, y luego leyendo 10000 lineas sin sscanf
|
120
|
+
|
121
|
+
// repeat until EOF
|
122
|
+
while ( gzgets(gzsorted_file_index,tmp,sizeof(tmp))!=Z_NULL ) {
|
123
|
+
|
124
|
+
// parse string
|
125
|
+
sscanf(tmp,"%s %lld %lld",sname,&gz_chunk,&beginH);
|
126
|
+
|
127
|
+
if(strcmp(sname,"UMACOMPRESSEDFORMAT")!=0) // valid index line
|
128
|
+
{
|
129
|
+
|
130
|
+
// clear chunk_data if any
|
131
|
+
// if (gz_chunk!=current_chunk){
|
132
|
+
if ((count%chunk_size)==0){
|
133
|
+
if (strcmp(min_name,"")!=0){
|
134
|
+
// there is data to write
|
135
|
+
res=gzprintf(gzhash_file,"%s %s %lld\n",min_name,max_name,current_chunk);
|
136
|
+
}
|
137
|
+
|
138
|
+
strcpy(min_name,"");
|
139
|
+
strcpy(max_name,"");
|
140
|
+
// current_chunk=gz_chunk;
|
141
|
+
current_chunk = gztell(gzfile_index);
|
142
|
+
|
143
|
+
//reopen new gzchunk
|
144
|
+
gzclose(gzfile_index);
|
145
|
+
current_chunk = last_chunk_file(indexname);
|
146
|
+
gzfile_index=gzopen(indexname,"ab");
|
147
|
+
|
148
|
+
}
|
149
|
+
|
150
|
+
// write line to current gzchunk in index
|
151
|
+
gzprintf(gzfile_index,tmp);
|
152
|
+
|
153
|
+
// save min_name
|
154
|
+
if((strcmp(min_name,"")==0) || (strcmp(sname,min_name)<0))
|
155
|
+
{
|
156
|
+
// replace min_name
|
157
|
+
strcpy(min_name,sname);
|
158
|
+
}
|
159
|
+
|
160
|
+
//save max_name
|
161
|
+
if((strcmp(max_name,"")==0) || (strcmp(sname,max_name)>0))
|
162
|
+
{
|
163
|
+
strcpy(max_name,sname);
|
164
|
+
}
|
165
|
+
|
166
|
+
count++;
|
167
|
+
}
|
168
|
+
|
169
|
+
}
|
170
|
+
|
171
|
+
if (strcmp(min_name,"")!=0){
|
172
|
+
// there are data to write
|
173
|
+
res=gzprintf(gzhash_file,"%s %s %lld\n",min_name,max_name,current_chunk);
|
174
|
+
}
|
175
|
+
|
176
|
+
// close files
|
177
|
+
gzclose(gzhash_file);
|
178
|
+
gzclose(gzfile_index);
|
179
|
+
gzclose(gzsorted_file_index);
|
180
|
+
|
181
|
+
return 0;
|
182
|
+
}
|
183
|
+
|
184
|
+
|
185
|
+
/*******************************************************/
|
186
|
+
/* main */
|
187
|
+
/*******************************************************/
|
188
|
+
int main(int argc, char *argv[])
|
189
|
+
{
|
190
|
+
// check params
|
191
|
+
if (argc<2)
|
192
|
+
{
|
193
|
+
printf("Usage %s fqbin_file [chunk_size [--skip_sort]]\n\n",argv[0]);
|
194
|
+
return -1;
|
195
|
+
}
|
196
|
+
|
197
|
+
int chunk_size=10000;
|
198
|
+
int skip_sort=0;
|
199
|
+
|
200
|
+
if(argc==3){
|
201
|
+
chunk_size=atoi(argv[2]);
|
202
|
+
}
|
203
|
+
|
204
|
+
if (argc==4){
|
205
|
+
skip_sort=1;
|
206
|
+
}
|
207
|
+
|
208
|
+
int res=hash_index_file(argv[1],chunk_size, skip_sort);
|
209
|
+
|
210
|
+
return res;
|
211
|
+
}
|
212
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
if [ ! -e "$1" ]; then
|
4
|
+
echo "File $1 does not exists"
|
5
|
+
echo "Usage: $0 fqbin_file.fqbin"
|
6
|
+
exit
|
7
|
+
fi
|
8
|
+
|
9
|
+
|
10
|
+
if [ -e "$1.index" ]; then
|
11
|
+
echo "Index file $1.index already exists"
|
12
|
+
exit
|
13
|
+
fi
|
14
|
+
|
15
|
+
echo "Backing up file as $1.old"
|
16
|
+
mv $1 $1.old
|
17
|
+
|
18
|
+
echo "Regenerating index"
|
19
|
+
iterate_fqbin $1.old | mk_fqbin -i -o $1
|
20
|
+
rm $1
|
21
|
+
mv $1.old $1
|
Binary file
|
@@ -0,0 +1,136 @@
|
|
1
|
+
#include "lib_fqbin.h"
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <ctype.h>
|
4
|
+
#include <string.h>
|
5
|
+
#include <stdlib.h>
|
6
|
+
#include <unistd.h>
|
7
|
+
|
8
|
+
|
9
|
+
void usage(){
|
10
|
+
|
11
|
+
printf("Usage: iterate_fqbin [-F|-q] fqbin_file\n\n");
|
12
|
+
printf("By default outputs in fastq format\n\n");
|
13
|
+
printf(" -F Output only sequence in fasta format\n");
|
14
|
+
printf(" -q Output only qualities in phred format\n\n");
|
15
|
+
printf(" -e Output only extras\n\n");
|
16
|
+
|
17
|
+
exit(-1);
|
18
|
+
|
19
|
+
}
|
20
|
+
|
21
|
+
int print_file(struct file_data *filed, int only_fasta, int only_qual, int only_extras){
|
22
|
+
char *sname=NULL;
|
23
|
+
char *fasta=NULL;
|
24
|
+
char *qual=NULL;
|
25
|
+
char *extras=NULL;
|
26
|
+
|
27
|
+
int len=0;
|
28
|
+
int i=0;
|
29
|
+
// int size=5000;
|
30
|
+
int res=0;
|
31
|
+
|
32
|
+
|
33
|
+
while ((res=read_data_sequential(filed, &sname, &fasta, &qual, &extras))==0)
|
34
|
+
{
|
35
|
+
|
36
|
+
if (only_fasta){
|
37
|
+
|
38
|
+
printf(">%s %s\n", sname, extras);
|
39
|
+
len=strlen(fasta);
|
40
|
+
i=0;
|
41
|
+
for(i = 0; i < len; i+=70)
|
42
|
+
{
|
43
|
+
printf("%.70s\n", fasta+i);
|
44
|
+
}
|
45
|
+
}else if (only_qual){
|
46
|
+
printf(">%s %s\n", sname, extras);
|
47
|
+
if (qual!=NULL){
|
48
|
+
len=strlen(qual);
|
49
|
+
i=0;
|
50
|
+
for(i = 0; i < len; i++)
|
51
|
+
{
|
52
|
+
printf("%02d ", qual[i]-33);
|
53
|
+
if (((i+1)%30 == 0) || (i==len-1)) printf("\n");
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
}else if (only_extras){
|
58
|
+
printf(">%s %s\n", sname, extras);
|
59
|
+
if (extras!=NULL) printf ("%s\n",extras);
|
60
|
+
}else{
|
61
|
+
printf("@%s %s\n%s\n", sname, extras, fasta);
|
62
|
+
printf("+\n%s\n",qual);
|
63
|
+
// printf("+%s\n%s\n",sname,qual);
|
64
|
+
}
|
65
|
+
|
66
|
+
|
67
|
+
if ( fasta!=NULL ) {free(fasta);fasta=NULL;}
|
68
|
+
if ( qual!=NULL ) {free(qual);qual=NULL;}
|
69
|
+
if ( extras!=NULL ) {free(extras);extras=NULL;}
|
70
|
+
}
|
71
|
+
|
72
|
+
return res;
|
73
|
+
}
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
/*******************************************************/
|
78
|
+
/* main */
|
79
|
+
/*******************************************************/
|
80
|
+
int main(int argc, char *argv[])
|
81
|
+
{
|
82
|
+
|
83
|
+
//gzFile gzf_bin;
|
84
|
+
// struct file_data filed;
|
85
|
+
|
86
|
+
struct file_data *filed=NULL;
|
87
|
+
|
88
|
+
int ch;
|
89
|
+
|
90
|
+
int output_fasta = 0;
|
91
|
+
int output_qual = 0;
|
92
|
+
int output_extras = 0;
|
93
|
+
|
94
|
+
while ((ch = getopt(argc, argv, "Fqeh")) != -1) {
|
95
|
+
switch (ch) {
|
96
|
+
case 'F':
|
97
|
+
output_fasta = 1;
|
98
|
+
break;
|
99
|
+
case 'q':
|
100
|
+
output_qual=1;
|
101
|
+
break;
|
102
|
+
case 'e':
|
103
|
+
output_extras=1;
|
104
|
+
break;
|
105
|
+
case 'h':
|
106
|
+
usage();
|
107
|
+
break;
|
108
|
+
case '?':
|
109
|
+
default:
|
110
|
+
usage();
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
argc -= optind;
|
115
|
+
argv += optind;
|
116
|
+
// printf("argc: %d", argc);
|
117
|
+
// printf("argv: %s", argv[0]);
|
118
|
+
|
119
|
+
if (argc!=1)
|
120
|
+
{
|
121
|
+
usage();
|
122
|
+
}
|
123
|
+
|
124
|
+
// initialize reads
|
125
|
+
if (initialize_sequential_reads(&filed, argv[0])==-1){
|
126
|
+
printf("File %s does not exists",argv[0]);
|
127
|
+
exit(-1);
|
128
|
+
}
|
129
|
+
|
130
|
+
int res=print_file(filed,output_fasta,output_qual,output_extras);
|
131
|
+
|
132
|
+
close_sequential_reads(filed);
|
133
|
+
|
134
|
+
return res;
|
135
|
+
}
|
136
|
+
|