scbi_fqbin 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (116) hide show
  1. checksums.yaml +7 -0
  2. data/.DS_Store +0 -0
  3. data/.gitignore +14 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/{README.rdoc → README.md} +0 -0
  7. data/Rakefile +8 -28
  8. data/lib/scbi_fqbin.rb +3 -5
  9. data/lib/scbi_fqbin/fastabin.rb +411 -0
  10. data/lib/scbi_fqbin/fastq_file_c.rb +373 -0
  11. data/lib/scbi_fqbin/fbin_file.rb +1 -1
  12. data/lib/scbi_fqbin/t.rb +9 -0
  13. data/lib/scbi_fqbin/t2.rb +12 -0
  14. data/lib/scbi_fqbin/version.rb +3 -0
  15. data/lib_fqbin_src.zip +0 -0
  16. data/lib_fqbin_src/Makefile +66 -0
  17. data/lib_fqbin_src/fq +0 -0
  18. data/lib_fqbin_src/fq.c +165 -0
  19. data/lib_fqbin_src/hash_fqbin +0 -0
  20. data/lib_fqbin_src/hash_fqbin.c +212 -0
  21. data/lib_fqbin_src/idx_fqbin +21 -0
  22. data/lib_fqbin_src/iterate_fqbin +0 -0
  23. data/lib_fqbin_src/iterate_fqbin.c +136 -0
  24. data/lib_fqbin_src/lib_fqbin.c +1748 -0
  25. data/lib_fqbin_src/lib_fqbin.h +194 -0
  26. data/lib_fqbin_src/mk_fqbin +0 -0
  27. data/lib_fqbin_src/mk_fqbin.c +138 -0
  28. data/lib_fqbin_src/other/bwxform.c +915 -0
  29. data/lib_fqbin_src/other/bwxform.h +74 -0
  30. data/lib_fqbin_src/other/find_in_index.c +130 -0
  31. data/lib_fqbin_src/other/hash_fbin_nogzchunks.c +164 -0
  32. data/lib_fqbin_src/other/idx_fqbin +0 -0
  33. data/lib_fqbin_src/other/idx_fqbin.c +67 -0
  34. data/lib_fqbin_src/other/make_hsh.sh +14 -0
  35. data/lib_fqbin_src/other/rd_extras_fbin.c +45 -0
  36. data/lib_fqbin_src/read_fq +0 -0
  37. data/lib_fqbin_src/read_fq.c +143 -0
  38. data/lib_fqbin_src/read_fqbin +0 -0
  39. data/lib_fqbin_src/read_fqbin.c +101 -0
  40. data/lib_fqbin_src/sort_index +9 -0
  41. data/lib_fqbin_src/test.rb +13 -0
  42. data/scbi_fqbin.gemspec +25 -0
  43. data/test/build.rake +15 -0
  44. data/test/fbinfile +0 -0
  45. data/test/fbinfile.index +0 -0
  46. data/test/no_test_fill_file.rb +66 -0
  47. data/test/old/app.rb +43 -0
  48. data/test/old/bin/iterate_fastabin.rb +54 -0
  49. data/test/old/bin/mk_fastabin.rb +22 -0
  50. data/test/old/bin/rd_fastabin.rb +36 -0
  51. data/test/old/bin/rd_fq.rb +20 -0
  52. data/test/old/bioruby.rb +27 -0
  53. data/test/old/c/Makefile +34 -0
  54. data/test/old/c/fbin_lib.zip +0 -0
  55. data/test/old/c/iterate_fbin.c +54 -0
  56. data/test/old/c/libreria_gz.c +707 -0
  57. data/test/old/c/libreria_gz.h +127 -0
  58. data/test/old/c/main.c +86 -0
  59. data/test/old/c/mk_fbin.c +24 -0
  60. data/test/old/c/rd_seq_fbin.c +44 -0
  61. data/test/old/c/test_ffi/a.out +0 -0
  62. data/test/old/c/test_ffi/app.c +26 -0
  63. data/test/old/c/test_ffi/app.rb +19 -0
  64. data/test/old/c/test_ffi/liblibreria_gz.dylib +0 -0
  65. data/test/old/c/test_ffi/libmylibrary.dylib +0 -0
  66. data/test/old/c/test_ffi/my_library.rb +23 -0
  67. data/test/old/c/test_ffi/mylibrary.c +22 -0
  68. data/test/old/c/test_ffi/mylibrary.h +6 -0
  69. data/test/old/c/usage_instructions.txt +62 -0
  70. data/test/old/ext/Makefile +187 -0
  71. data/test/old/ext/Makefile.dario +34 -0
  72. data/test/old/ext/extconf.rb +8 -0
  73. data/test/old/ext/mk_fbin.c +24 -0
  74. data/test/old/ext/sample/extras.txt +4 -0
  75. data/{.gemtest → test/old/ext/sample/extras2.txt} +0 -0
  76. data/test/old/ext/sample/f1.fasta +10 -0
  77. data/test/old/ext/sample/f1.fasta.qual +10 -0
  78. data/test/old/ext/sample/f1.fbin +0 -0
  79. data/test/old/ext/sample/f1.fbin.index +0 -0
  80. data/test/old/ext/sample/main.c +86 -0
  81. data/test/old/ext/usage_instructions.txt +62 -0
  82. data/test/old/t_scbi_fastabin.rb +140 -0
  83. data/test/read_tests/10-original_sizes.sh +16 -0
  84. data/test/read_tests/20-fq_time.sh +23 -0
  85. data/test/read_tests/30-fbin_read_time.sh +23 -0
  86. data/test/read_tests/40-bsc_read_time.sh +21 -0
  87. data/test/read_tests/50-fq_time_x4.sh +25 -0
  88. data/test/read_tests/60-fbin_read_time_x4.sh +24 -0
  89. data/test/read_tests/70-bsc_read_time_x4.sh +32 -0
  90. data/test/results_bio_scbi_fasta.txt +11 -0
  91. data/test/{test_scbi_fbin_file.rb → scbi_fbin_file_test.rb} +0 -0
  92. data/test/speed.txt +81 -0
  93. data/test/t_scbi_fasta.rb +12 -0
  94. data/test/write_tests/10-original_sizes.sh +16 -0
  95. data/test/write_tests/20-zip_time.sh +17 -0
  96. data/test/write_tests/30-mk_fbin_time.sh +23 -0
  97. data/test/write_tests/31-mk_fbin_time_f30.sh +21 -0
  98. data/test/write_tests/40-gzip_time.sh +16 -0
  99. data/test/write_tests/41-bsc_time.sh +16 -0
  100. data/test/write_tests/50-zip_sizes.sh +16 -0
  101. data/test/write_tests/60-fbin_sizes.sh +17 -0
  102. data/test/write_tests/61-fbin_sizes_f30.sh +16 -0
  103. data/test/write_tests/70-gzip_sizes.sh +17 -0
  104. data/test/write_tests/80-bsc_sizes.sh +17 -0
  105. data/website/index.html +87 -0
  106. data/website/index.txt +81 -0
  107. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  108. data/website/stylesheets/screen.css +159 -0
  109. data/website/template.html.erb +50 -0
  110. metadata +208 -95
  111. data/History.txt +0 -19
  112. data/Manifest.txt +0 -12
  113. data/PostInstall.txt +0 -7
  114. data/script/console +0 -10
  115. data/script/destroy +0 -14
  116. data/script/generate +0 -14
@@ -0,0 +1,45 @@
1
+ #include "lib_fqbin.h"
2
+ #include <stdio.h>
3
+ #include <ctype.h>
4
+ #include <string.h>
5
+ #include <stdlib.h>
6
+
7
+
8
+ /*******************************************************/
9
+ /* main */
10
+ /*******************************************************/
11
+ int main(int argc, char *argv[])
12
+ {
13
+
14
+ char *fasta=NULL;
15
+ char *qual=NULL;
16
+ char *extras=NULL;
17
+ int size=5000;
18
+ int res=0;
19
+
20
+ if (argc!=3)
21
+ {
22
+ printf("Usage %s fbin_file seq_name\n\n",argv[0]);
23
+ return -1;
24
+ }
25
+
26
+
27
+
28
+ res=read_seq(argv[1],argv[2], &fasta, &qual, &extras);
29
+
30
+ if (res==0){
31
+
32
+ // printf("Extras for seq: %s\n", argv[2]);
33
+
34
+ if (extras!=NULL) printf("%s\n",extras);
35
+
36
+ }
37
+
38
+ //res=read_seq("prueba2gz.fbin","F143CJN01EN6AH", &fasta, &qual, &extras);
39
+ if ( fasta!=NULL ) {free(fasta);fasta=NULL;}
40
+ if ( qual!=NULL ) {free(qual);qual=NULL;}
41
+ if ( extras!=NULL ) {free(extras);extras=NULL;}
42
+
43
+ return res;
44
+ }
45
+
Binary file
@@ -0,0 +1,143 @@
1
+
2
+ #include "lib_fqbin.h"
3
+ #include <stdio.h>
4
+ #include <ctype.h>
5
+
6
+ #include <unistd.h>
7
+
8
+
9
+ // process a fastq file adding it to fbin file
10
+ int find_in_fastq(char *fname, char *seq_name)
11
+ {
12
+
13
+ // allocate strings
14
+ char *name;
15
+ if ((name = malloc(MAXSEQNAME)) == NULL) {
16
+ puts("Memory allocation error!");
17
+ return EXIT_FAILURE;
18
+ }
19
+
20
+ char *fasta;
21
+ if ((fasta = malloc(MAXSEQLENGTH)) == NULL) {
22
+ puts("Memory allocation error!");
23
+ return EXIT_FAILURE;
24
+ }
25
+
26
+ char *qual;
27
+ if ((qual = malloc(MAXSEQLENGTH)) == NULL) {
28
+ puts("Memory allocation error!");
29
+ return EXIT_FAILURE;
30
+ }
31
+
32
+ char *comments;
33
+ if ((comments = malloc(MAXSEQLENGTH)) == NULL) {
34
+ puts("Memory allocation error!");
35
+ return EXIT_FAILURE;
36
+ }
37
+
38
+ static time_t curr_time=0;
39
+ static time_t prev_time=0;
40
+
41
+ prev_time=time(NULL);
42
+
43
+ FILE *fastq_file=NULL;
44
+
45
+
46
+ int valid=0;
47
+ int res=0;
48
+ int r=0;
49
+ // Open fasta and qual files
50
+ if (strcmp(fname,"-")==0){
51
+ fastq_file=stdin;
52
+ }else{
53
+
54
+ open_file(fname,&fastq_file);
55
+ }
56
+
57
+ if (fastq_file==NULL){
58
+ printf("TRESb\n");
59
+ }
60
+
61
+ // for each sequence on fastq file
62
+ while (valid=get_next_seq_fastq(fastq_file,&name,&fasta,&qual,&comments)){
63
+ if(valid==1)
64
+ {
65
+ r++;
66
+ if (strcmp(name,seq_name)==0)
67
+ {
68
+ printf("@%s %s\n%s\n", name,comments, fasta);
69
+ printf("+%s\n%s\n",name,qual);
70
+ break;
71
+ }
72
+
73
+ }else{
74
+ fprintf(stderr,"Invalid sequence found %s. Aborting import.\n",name);
75
+ res=-1;
76
+ break;
77
+ }
78
+
79
+ // if ((r%10000)==0) {
80
+ // }
81
+
82
+ }
83
+
84
+ curr_time=time(NULL);
85
+ printf("\nEnd fastq processing. %d seqs in %.0f s. Rate: %8.2f seqs/s\n",r,difftime(curr_time,prev_time),r/difftime(curr_time,prev_time));
86
+
87
+ // free mem
88
+ free(name);
89
+ free(fasta);
90
+ free(qual);
91
+ free(comments);
92
+
93
+ // close files
94
+ fclose(fastq_file);
95
+
96
+ return res;
97
+ }
98
+
99
+ void usage(){
100
+ printf("Usage: read_fq fastq_file seq_name\n\n");
101
+
102
+ exit(-1);
103
+
104
+ }
105
+
106
+
107
+
108
+ /*******************************************************/
109
+ /* main */
110
+ /*******************************************************/
111
+ int main(int argc, char *argv[])
112
+ {
113
+
114
+ int ch;
115
+
116
+ while ((ch = getopt(argc, argv, "h")) != -1) {
117
+ switch (ch) {
118
+
119
+ case 'h':
120
+ usage();
121
+ break;
122
+ case '?':
123
+ default:
124
+ usage();
125
+ }
126
+ }
127
+
128
+ argc -= optind;
129
+ argv += optind;
130
+
131
+
132
+ // check params
133
+ if (argc<2)
134
+ {
135
+ usage();
136
+ return -1;
137
+ }
138
+
139
+ int res=find_in_fastq(argv[0],argv[1]);
140
+
141
+ return res;
142
+ }
143
+
Binary file
@@ -0,0 +1,101 @@
1
+ #include "lib_fqbin.h"
2
+ #include <stdio.h>
3
+ #include <ctype.h>
4
+ #include <string.h>
5
+ #include <stdlib.h>
6
+
7
+ #include <unistd.h>
8
+
9
+
10
+ void usage(){
11
+ printf("Usage: read_fqbin [-f][-e][-E] fqbin_file seq_name\n\n");
12
+ printf(" -f Output sequence in fasta format\n");
13
+ printf(" -e Output extras for sequence\n");
14
+ printf(" -E Output only extras for sequence\n");
15
+
16
+ exit(-1);
17
+
18
+ }
19
+
20
+
21
+ /*******************************************************/
22
+ /* main */
23
+ /*******************************************************/
24
+ int main(int argc, char *argv[])
25
+ {
26
+
27
+ char *fasta=NULL;
28
+ char *qual=NULL;
29
+ char *extras=NULL;
30
+ int size=5000;
31
+ int res=0;
32
+
33
+ int ch;
34
+
35
+ int output_fasta = 0;
36
+ int output_extras = 0;
37
+ int only_extras = 0;
38
+
39
+ while ((ch = getopt(argc, argv, "feEh")) != -1) {
40
+ switch (ch) {
41
+ case 'f':
42
+ output_fasta = 1;
43
+ break;
44
+ case 'e':
45
+ output_extras = 1;
46
+ break;
47
+ case 'E':
48
+ output_extras = 1;
49
+ only_extras = 1;
50
+ break;
51
+
52
+ case 'h':
53
+ usage();
54
+ break;
55
+ case '?':
56
+ default:
57
+ usage();
58
+ }
59
+ }
60
+
61
+ argc -= optind;
62
+ argv += optind;
63
+ // printf("argc: %d", argc);
64
+ // printf("argv: %s", argv[0]);
65
+
66
+
67
+ if (argc!=2)
68
+ {
69
+ usage();
70
+ return -1;
71
+ }
72
+
73
+ res=read_seq(argv[0],argv[1], &fasta, &qual, &extras);
74
+
75
+ if (res==-2)
76
+ {
77
+ printf("Index file does not exist for: %s.index\n", argv[0]);
78
+ return res;
79
+ }
80
+
81
+ if (res==0){
82
+
83
+ if (!only_extras){
84
+ if (output_fasta){
85
+ printf(">%s\n%s\n", argv[1], fasta);
86
+ }else{
87
+ printf("@%s\n%s\n", argv[1], fasta);
88
+ printf("+%s\n%s\n",argv[1],qual);
89
+ }
90
+ }
91
+ if ((extras!=NULL) && (output_extras)) printf ("EXTRAS:%s\n",extras);
92
+
93
+ }
94
+
95
+ //res=read_seq("prueba2gz.fqbin","F143CJN01EN6AH", &fasta, &qual, &extras);
96
+ if ( fasta!=NULL ) {free(fasta);fasta=NULL;}
97
+ if ( qual!=NULL ) {free(qual);qual=NULL;}
98
+ if ( extras!=NULL ) {free(extras);extras=NULL;}
99
+ return res;
100
+ }
101
+
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # La entrada a fichero es el nombre del fichero sin extensión
4
+
5
+ mv $1 $1.bkp
6
+ echo "UMACOMPRESSEDFORMAT 1 0 0 999999999999 999999999999" > $1
7
+ zgrep -v UMACOM $1.bkp | sort -k 1 >> $1
8
+ gzip $1
9
+ mv $1.gz $1.sorted
@@ -0,0 +1,13 @@
1
+ require 'scbi_fastq'
2
+
3
+ fqr=FastqFile.new(ARGV.shift)
4
+
5
+ r=0
6
+ ntcount=0
7
+ fqr.each do |f,q,n|
8
+ r+=1
9
+ ntcount+=f.length+q.length
10
+ # puts f,q,n
11
+ end
12
+ puts "Total seqs: #{r}\n";
13
+ puts "Total NT: #{ntcount}\n";
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'scbi_fqbin/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "scbi_fqbin"
8
+ spec.version = ScbiFqbin::VERSION
9
+ spec.authors = ["dariogf"]
10
+ spec.email = ["dariogf@scbi.uma.es"]
11
+ spec.summary = %q{Read/write compressed fastq or fasta files in fqbin format}
12
+ spec.description = %q{Read/write compressed fastq or fasta files in fqbin format}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+
22
+ spec.add_runtime_dependency "ffi"
23
+ spec.add_development_dependency "bundler", "~> 1.7"
24
+ spec.add_development_dependency "rake", "~> 10.0"
25
+ end
data/test/build.rake ADDED
@@ -0,0 +1,15 @@
1
+ desc "build lib"
2
+ task :build do
3
+ chdir "libfbin_src" do
4
+ sh "make"
5
+ end
6
+ end
7
+
8
+ desc "Install lib"
9
+ task :install do
10
+ chdir "libfbin_src" do
11
+ sh "sudo make install"
12
+ end
13
+ end
14
+
15
+
data/test/fbinfile ADDED
Binary file
Binary file
@@ -0,0 +1,66 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ class TestFillfile < Test::Unit::TestCase
4
+
5
+ def setup
6
+ end
7
+
8
+ TEST_FILE='/tmp/fbinfile';
9
+
10
+ SEQ_FASTA='ACTG'
11
+ SEQ_QUAL=[25]
12
+ SEQ_NAME='SEQ'
13
+ SEQ_EXTRAS='SOME EXTRAS IN SEQ'
14
+
15
+
16
+ def fill_file(n)
17
+ fb=Fastabin.new(TEST_FILE,'w')
18
+ n.times do |c|
19
+ i = c+1
20
+ fb.add_seq(SEQ_NAME+i.to_s,SEQ_FASTA*i,(SEQ_QUAL*i*SEQ_FASTA.length).join(' '),SEQ_EXTRAS)
21
+ end
22
+
23
+ fb.close
24
+ end
25
+
26
+ #
27
+ # def test_add100
28
+ #
29
+ # # make new file and fill with data
30
+ # fill_file(100)
31
+ #
32
+ ## fb=Fastabin.new(TEST_FILE,'r')
33
+ ## assert(fb.count == 100)
34
+ ## fb.close
35
+ #
36
+ # end
37
+
38
+ def test_read
39
+
40
+ # make new file and fill with data
41
+ fill_file(10)
42
+
43
+
44
+ fb=Fastabin.new(TEST_FILE,'r')
45
+
46
+ 10.times do |c|
47
+ i = c+1
48
+ n,s,q,e=fb.read_seq(SEQ_NAME+i.to_s)
49
+ #puts n,s.length,q.split(' ').length
50
+ assert(SEQ_NAME+i.to_s==n)
51
+ assert(SEQ_FASTA*i==s)
52
+ assert((SEQ_QUAL*i*SEQ_FASTA.length).join(' ')==q)
53
+ assert(SEQ_EXTRAS==e)
54
+ end
55
+
56
+ n,s,q,e=fb.read_seq(SEQ_NAME+'NO_EXIST')
57
+ assert(n.nil?)
58
+
59
+ fb.close
60
+ end
61
+
62
+
63
+
64
+
65
+
66
+ end
data/test/old/app.rb ADDED
@@ -0,0 +1,43 @@
1
+ require 'lib/scbi_fqbin/fbin_file'
2
+
3
+ file='c/sample/f1.fbin'
4
+
5
+ o = FbinFile.new(file,'r')
6
+
7
+
8
+ name,fasta,qual,extras=o.next_sequence
9
+ while !name.nil?
10
+ puts "Name:#{name}, fasta: #{fasta}"
11
+ name,fasta,qual,extras=o.next_sequence
12
+ end
13
+
14
+ puts "="*20
15
+
16
+ o.each do |name,fasta,qual,extras|
17
+ puts "Name:#{name}, fasta: #{fasta}"
18
+ end
19
+
20
+ puts "="*20
21
+
22
+ seq='FX9YN3P05C43XJ'
23
+ name,fasta,qual,extras=o.read_sequence(seq)
24
+ puts "Name:#{name}, fasta: #{fasta}"
25
+
26
+ o.close
27
+
28
+
29
+ # r=o.calculate_something(42,98.6)
30
+ # puts r
31
+
32
+ # c = MyLibrary.calculate_something(42, 98.6) # note FFI handles literals just fine
33
+ #
34
+ # if ( (errcode = MyLibrary.error_code()) != 0)
35
+ # puts "error calculating something: #{errcode}"
36
+ # exit 1
37
+ # end
38
+ #
39
+ # objptr = MyLibrary.create_object("my object") # note FFI handles string literals as well
40
+ # d = MyLibrary.calculate_something_else(c, objptr)
41
+ # MyLibrary.free_object(objptr)
42
+ #
43
+ # puts "calculated #{d}"