scbi_fqbin 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (116) hide show
  1. checksums.yaml +7 -0
  2. data/.DS_Store +0 -0
  3. data/.gitignore +14 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/{README.rdoc → README.md} +0 -0
  7. data/Rakefile +8 -28
  8. data/lib/scbi_fqbin.rb +3 -5
  9. data/lib/scbi_fqbin/fastabin.rb +411 -0
  10. data/lib/scbi_fqbin/fastq_file_c.rb +373 -0
  11. data/lib/scbi_fqbin/fbin_file.rb +1 -1
  12. data/lib/scbi_fqbin/t.rb +9 -0
  13. data/lib/scbi_fqbin/t2.rb +12 -0
  14. data/lib/scbi_fqbin/version.rb +3 -0
  15. data/lib_fqbin_src.zip +0 -0
  16. data/lib_fqbin_src/Makefile +66 -0
  17. data/lib_fqbin_src/fq +0 -0
  18. data/lib_fqbin_src/fq.c +165 -0
  19. data/lib_fqbin_src/hash_fqbin +0 -0
  20. data/lib_fqbin_src/hash_fqbin.c +212 -0
  21. data/lib_fqbin_src/idx_fqbin +21 -0
  22. data/lib_fqbin_src/iterate_fqbin +0 -0
  23. data/lib_fqbin_src/iterate_fqbin.c +136 -0
  24. data/lib_fqbin_src/lib_fqbin.c +1748 -0
  25. data/lib_fqbin_src/lib_fqbin.h +194 -0
  26. data/lib_fqbin_src/mk_fqbin +0 -0
  27. data/lib_fqbin_src/mk_fqbin.c +138 -0
  28. data/lib_fqbin_src/other/bwxform.c +915 -0
  29. data/lib_fqbin_src/other/bwxform.h +74 -0
  30. data/lib_fqbin_src/other/find_in_index.c +130 -0
  31. data/lib_fqbin_src/other/hash_fbin_nogzchunks.c +164 -0
  32. data/lib_fqbin_src/other/idx_fqbin +0 -0
  33. data/lib_fqbin_src/other/idx_fqbin.c +67 -0
  34. data/lib_fqbin_src/other/make_hsh.sh +14 -0
  35. data/lib_fqbin_src/other/rd_extras_fbin.c +45 -0
  36. data/lib_fqbin_src/read_fq +0 -0
  37. data/lib_fqbin_src/read_fq.c +143 -0
  38. data/lib_fqbin_src/read_fqbin +0 -0
  39. data/lib_fqbin_src/read_fqbin.c +101 -0
  40. data/lib_fqbin_src/sort_index +9 -0
  41. data/lib_fqbin_src/test.rb +13 -0
  42. data/scbi_fqbin.gemspec +25 -0
  43. data/test/build.rake +15 -0
  44. data/test/fbinfile +0 -0
  45. data/test/fbinfile.index +0 -0
  46. data/test/no_test_fill_file.rb +66 -0
  47. data/test/old/app.rb +43 -0
  48. data/test/old/bin/iterate_fastabin.rb +54 -0
  49. data/test/old/bin/mk_fastabin.rb +22 -0
  50. data/test/old/bin/rd_fastabin.rb +36 -0
  51. data/test/old/bin/rd_fq.rb +20 -0
  52. data/test/old/bioruby.rb +27 -0
  53. data/test/old/c/Makefile +34 -0
  54. data/test/old/c/fbin_lib.zip +0 -0
  55. data/test/old/c/iterate_fbin.c +54 -0
  56. data/test/old/c/libreria_gz.c +707 -0
  57. data/test/old/c/libreria_gz.h +127 -0
  58. data/test/old/c/main.c +86 -0
  59. data/test/old/c/mk_fbin.c +24 -0
  60. data/test/old/c/rd_seq_fbin.c +44 -0
  61. data/test/old/c/test_ffi/a.out +0 -0
  62. data/test/old/c/test_ffi/app.c +26 -0
  63. data/test/old/c/test_ffi/app.rb +19 -0
  64. data/test/old/c/test_ffi/liblibreria_gz.dylib +0 -0
  65. data/test/old/c/test_ffi/libmylibrary.dylib +0 -0
  66. data/test/old/c/test_ffi/my_library.rb +23 -0
  67. data/test/old/c/test_ffi/mylibrary.c +22 -0
  68. data/test/old/c/test_ffi/mylibrary.h +6 -0
  69. data/test/old/c/usage_instructions.txt +62 -0
  70. data/test/old/ext/Makefile +187 -0
  71. data/test/old/ext/Makefile.dario +34 -0
  72. data/test/old/ext/extconf.rb +8 -0
  73. data/test/old/ext/mk_fbin.c +24 -0
  74. data/test/old/ext/sample/extras.txt +4 -0
  75. data/{.gemtest → test/old/ext/sample/extras2.txt} +0 -0
  76. data/test/old/ext/sample/f1.fasta +10 -0
  77. data/test/old/ext/sample/f1.fasta.qual +10 -0
  78. data/test/old/ext/sample/f1.fbin +0 -0
  79. data/test/old/ext/sample/f1.fbin.index +0 -0
  80. data/test/old/ext/sample/main.c +86 -0
  81. data/test/old/ext/usage_instructions.txt +62 -0
  82. data/test/old/t_scbi_fastabin.rb +140 -0
  83. data/test/read_tests/10-original_sizes.sh +16 -0
  84. data/test/read_tests/20-fq_time.sh +23 -0
  85. data/test/read_tests/30-fbin_read_time.sh +23 -0
  86. data/test/read_tests/40-bsc_read_time.sh +21 -0
  87. data/test/read_tests/50-fq_time_x4.sh +25 -0
  88. data/test/read_tests/60-fbin_read_time_x4.sh +24 -0
  89. data/test/read_tests/70-bsc_read_time_x4.sh +32 -0
  90. data/test/results_bio_scbi_fasta.txt +11 -0
  91. data/test/{test_scbi_fbin_file.rb → scbi_fbin_file_test.rb} +0 -0
  92. data/test/speed.txt +81 -0
  93. data/test/t_scbi_fasta.rb +12 -0
  94. data/test/write_tests/10-original_sizes.sh +16 -0
  95. data/test/write_tests/20-zip_time.sh +17 -0
  96. data/test/write_tests/30-mk_fbin_time.sh +23 -0
  97. data/test/write_tests/31-mk_fbin_time_f30.sh +21 -0
  98. data/test/write_tests/40-gzip_time.sh +16 -0
  99. data/test/write_tests/41-bsc_time.sh +16 -0
  100. data/test/write_tests/50-zip_sizes.sh +16 -0
  101. data/test/write_tests/60-fbin_sizes.sh +17 -0
  102. data/test/write_tests/61-fbin_sizes_f30.sh +16 -0
  103. data/test/write_tests/70-gzip_sizes.sh +17 -0
  104. data/test/write_tests/80-bsc_sizes.sh +17 -0
  105. data/website/index.html +87 -0
  106. data/website/index.txt +81 -0
  107. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  108. data/website/stylesheets/screen.css +159 -0
  109. data/website/template.html.erb +50 -0
  110. metadata +208 -95
  111. data/History.txt +0 -19
  112. data/Manifest.txt +0 -12
  113. data/PostInstall.txt +0 -7
  114. data/script/console +0 -10
  115. data/script/destroy +0 -14
  116. data/script/generate +0 -14
@@ -0,0 +1,373 @@
1
+
2
+ # add ord method to ruby 1.8
3
+ if !String.instance_methods.include?(:ord)
4
+ class String
5
+
6
+ def ord
7
+ return self[0]
8
+ end
9
+
10
+ end
11
+ end
12
+
13
+ require 'ffi'
14
+
15
+ class FFIString<FFI::MemoryPointer
16
+
17
+ def initialize
18
+ super(:pointer,1)
19
+ # self.write_string('a')
20
+ # return FFI::MemoryPointer.from_string('a'*150000000)
21
+ end
22
+
23
+ def to_s
24
+ resPtr = self.read_pointer()
25
+
26
+ #if null, return nil, if not return string
27
+ return resPtr.null? ? nil : resPtr.read_string()
28
+ end
29
+
30
+ def inspect
31
+ self.to_s
32
+ end
33
+
34
+ end
35
+
36
+
37
+
38
+ class FastqFileC
39
+
40
+ attr_accessor :num_seqs
41
+
42
+
43
+ extend FFI::Library
44
+
45
+ ffi_lib(["libfbin"])
46
+
47
+ functions = [
48
+
49
+ [:get_next_seq_fastq, [:pointer,:pointer,:pointer,:pointer,:pointer],:int],
50
+ [:open_file,[:string, :pointer],:int],
51
+ [:close_file,[:pointer],:int],
52
+ [:free_string,[:pointer],:int]
53
+
54
+
55
+ ]
56
+
57
+ functions.each do |func|
58
+ begin
59
+ attach_function(*func)
60
+ private func[0]
61
+ rescue Object => e
62
+ puts "Could not attach #{func}, #{e.message}"
63
+ end
64
+ end
65
+
66
+
67
+ def open_fastq()
68
+ @fastq_file = FFI::MemoryPointer.new :pointer
69
+ # puts @fastq_file.address
70
+ open_file(@fasta_file_name,@fastq_file)
71
+ # inspect_file_data_struct(@gzf_bin)
72
+ # puts @fastq_file.address
73
+
74
+ @fastq_file = @fastq_file.get_pointer(0)
75
+ # puts "2"
76
+ # puts @fastq_file.address
77
+ # if @fastq_file.null?
78
+ # puts "ES NULLLLLLL"
79
+ # end
80
+
81
+ end
82
+
83
+ #------------------------------------
84
+ # Initialize instance
85
+ #------------------------------------
86
+ def initialize(fasta_file_name, mode='r', fastq_type = :sanger, qual_to_array=true, qual_to_phred=true)
87
+
88
+ @fasta_file_name=fasta_file_name
89
+
90
+ if mode.upcase.index('W')
91
+ @fastq_file = File.open(fasta_file_name,'w')
92
+ elsif mode.upcase.index('A')
93
+ if !File.exist?(fasta_file_name)
94
+ raise "File #{fasta_file_name} doesn't exists"
95
+ end
96
+
97
+ @fastq_file = File.open(fasta_file_name,'a')
98
+ else #read only
99
+ if !File.exist?(fasta_file_name)
100
+ raise "File #{fasta_file_name} doesn't exists"
101
+ end
102
+
103
+ if fasta_file_name.is_a?(IO)
104
+ @fastq_file = fasta_file_name
105
+ else
106
+ # @fastq_file = File.open(fasta_file_name,'r')
107
+ @namePtr = FFIString.new
108
+ @fastaPtr = FFIString.new
109
+ @qualPtr = FFIString.new
110
+ @extrasPtr = FFIString.new
111
+
112
+ open_fastq
113
+ end
114
+ end
115
+
116
+ @mode = mode
117
+ @num_seqs = 0
118
+ @fastq_type=fastq_type
119
+
120
+ # S - Sanger Phred+33, raw reads typically (0, 40)
121
+ # X - Solexa Solexa+64, raw reads typically (-5, 40)
122
+ # I - Illumina 1.3+ Phred+64, raw reads typically (0, 40)
123
+ # J - Illumina 1.5+ Phred+64, raw reads typically (3, 40)
124
+ # > >>> def solexa_quality_from_phred(phred_quality) :
125
+ # > ... return 10*log(10**(phred_quality/10.0) - 1, 10)
126
+ # > ...
127
+ # > >>> solexa_quality_from_phred(90)
128
+ # > 89.999999995657035
129
+ # > >>> solexa_quality_from_phred(50)
130
+ # > 49.99995657033466
131
+ # > >>> solexa_quality_from_phred(10)
132
+ # > 9.5424250943932485
133
+ # > >>> solexa_quality_from_phred(1)
134
+ # > -5.8682532438011537
135
+ # > >>> solexa_quality_from_phred(0.1)
136
+ # > -16.32774717238372
137
+ # >
138
+ # > >>> def phred_quality_from_solexa(solexa_quality) :
139
+ # > ... return 10*log(10**(solexa_quality/10.0) + 1, 10)
140
+ # > ...
141
+ # > >>> phred_quality_from_solexa(90)
142
+ # > 90.000000004342922
143
+ # > >>> phred_quality_from_solexa(10)
144
+ # > 10.41392685158225
145
+ # > >>> phred_quality_from_solexa(0)
146
+ # > 3.0102999566398116
147
+ # > >>> phred_quality_from_solexa(-20)
148
+ # > 0.043213737826425784
149
+
150
+
151
+ #sanger by default
152
+ @to_phred = lambda{|q| q - 33}
153
+ @from_phred = lambda{|q| (q+33).chr}
154
+
155
+ if @fastq_type == :ilumina
156
+ @to_phred = lambda{|q| q - 64}
157
+ # @from_phred = lambda{|q| (q+64).chr}
158
+
159
+ elsif @fastq_type == :solexa
160
+ #
161
+ # solexa to phred quals
162
+
163
+ @to_phred = lambda{|q| (10*Math.log(10**(q/10.0)+1,10)).round}
164
+ # @from_phred = lambda{|q| (10*Math.log(10**(q/10.0)-1,10)).round.chr}
165
+
166
+ #phred to solexa quals
167
+
168
+ end
169
+
170
+ @qual_to_array = qual_to_array
171
+
172
+ @qual_to_phred = qual_to_phred
173
+
174
+ end
175
+
176
+ def close
177
+ free_string(@namePtr)
178
+ free_string(@qualPtr)
179
+ free_string(@fastaPtr)
180
+ free_string(@extrasPtr)
181
+
182
+ close_file(@fastq_file)
183
+ end
184
+
185
+
186
+ #------------------------------------
187
+ # Iterate over all sequences
188
+ #------------------------------------
189
+ def each
190
+
191
+ rewind
192
+
193
+ n,f,q,c=next_seq
194
+
195
+ while (!n.nil?)
196
+ yield(n,f,q,c)
197
+ n,f,q,c=next_seq
198
+ end
199
+
200
+ rewind
201
+
202
+ end
203
+
204
+ # goto first position in file
205
+ def rewind
206
+
207
+ @num_seqs = 0;
208
+ close_file(@fastq_file)
209
+ open_fastq
210
+ # @fastq_file.pos=0
211
+
212
+ end
213
+
214
+ #------------------------------------
215
+ # Get next sequence
216
+ #------------------------------------
217
+ def next_seq
218
+ #init variables
219
+
220
+ # namePtr = FFIString.new
221
+ # fastaPtr = FFIString.new
222
+ # qualPtr = FFIString.new
223
+ # extrasPtr = FFIString.new
224
+
225
+
226
+ if get_next_seq_fastq(@fastq_file,@namePtr,@fastaPtr,@qualPtr,@extrasPtr)==1
227
+
228
+ seq_name=@namePtr.to_s
229
+ qual=@qualPtr.to_s
230
+
231
+ # if @qual_to_phred
232
+ # qual=qual.each_char.map{|e| (@to_phred.call(e.ord))}
233
+ # if !@qual_to_array
234
+ # qual=qual.join(' ')
235
+ # end
236
+ # end
237
+
238
+ fasta=@fastaPtr.to_s
239
+ extras = @extrasPtr.to_s
240
+
241
+ # free_string(namePtr)
242
+ # free_string(qualPtr)
243
+ # free_string(fastaPtr)
244
+ # free_string(extrasPtr)
245
+
246
+ return seq_name, fasta, qual, extras
247
+
248
+ else
249
+
250
+ # free_string(namePtr)
251
+ # free_string(qualPtr)
252
+ # free_string(fastaPtr)
253
+ # free_string(extrasPtr)
254
+
255
+ return nil
256
+ # raise "Invalid sequence"
257
+ end
258
+ # res = read_fastq
259
+ # return res
260
+ end
261
+
262
+ # write sequence to file in sanger format
263
+ def write_seq(seq_name,seq_fasta,seq_qual,comments='')
264
+ name = ""
265
+
266
+ @fastq_file.puts("@#{seq_name} #{comments}")
267
+ @fastq_file.puts(seq_fasta)
268
+ @fastq_file.puts("+#{seq_name} #{comments}")
269
+
270
+ if seq_qual.is_a?(Array)
271
+ @fastq_file.puts(seq_qual.map{|e| @from_phred.call(e)}.join)
272
+ else
273
+ @fastq_file.puts(seq_qual.split(/\s+/).map{|e| @from_phred.call(e.to_i)}.join)
274
+ end
275
+
276
+ end
277
+
278
+
279
+ # creates fastq otuput in sanger format
280
+ def self.to_fastq(seq_name,seq_fasta,seq_qual,comments='')
281
+
282
+ res=[]
283
+
284
+ name = ""
285
+
286
+ res << ("@#{seq_name} #{comments}")
287
+ res << (seq_fasta)
288
+ res << ("+#{seq_name} #{comments}")
289
+
290
+ if @qual_to_phred
291
+ if seq_qual.is_a?(Array)
292
+ res<<(seq_qual.map{|e| (e+33).chr}.join)
293
+ else
294
+ res<<(seq_qual.split(/\s+/).map{|e| (e.to_i+33).chr}.join)
295
+ end
296
+ else
297
+ res << seq_qual
298
+ end
299
+
300
+ return res
301
+ end
302
+
303
+ def with_qual?
304
+ true
305
+ end
306
+
307
+
308
+ private
309
+
310
+ #------------------------------------
311
+ # Read one sequence in fastq
312
+ #------------------------------------
313
+ # @GEM-108-D02
314
+ # AAAAGCTGG
315
+ # +
316
+ # :::::::::
317
+
318
+ def read_fastq
319
+
320
+ seq_name = nil
321
+ seq_fasta = nil
322
+ seq_qual = nil
323
+ comments = nil
324
+
325
+ reading = :fasta
326
+
327
+ if !@fastq_file.eof
328
+
329
+ begin
330
+ #read four lines
331
+ name_line = @fastq_file.readline.chomp
332
+ seq_fasta = @fastq_file.readline.chomp
333
+ name2_line = @fastq_file.readline.chomp
334
+ seq_qual = @fastq_file.readline.chomp
335
+
336
+
337
+ # parse name
338
+ if name_line =~ /^@\s*([^\s]+)\s*(.*)$/
339
+ # remove comments
340
+ seq_name = $1
341
+ comments=$2
342
+ else
343
+ raise "Invalid sequence name in #{name_line}"
344
+ end
345
+
346
+ # parse fasta
347
+ seq_fasta.strip! if !seq_fasta.empty?
348
+
349
+ # parse qual_name
350
+
351
+ if !seq_name.nil? && !seq_qual.empty?
352
+
353
+ @num_seqs += 1
354
+
355
+ if @qual_to_phred
356
+ seq_qual=seq_qual.each_char.map{|e| (@to_phred.call(e.ord))}
357
+
358
+ if !@qual_to_array
359
+ seq_qual=seq_qual.join(' ')
360
+ end
361
+ end
362
+
363
+ end
364
+ rescue EOFError
365
+ raise "Bad format in FastQ file"
366
+ end
367
+ end
368
+
369
+ return [seq_name,seq_fasta,seq_qual,comments]
370
+ end
371
+
372
+
373
+ end
@@ -27,7 +27,7 @@ class FbinFile
27
27
  APPEND_TO_FILE=2
28
28
  extend FFI::Library
29
29
 
30
- ffi_lib(["libfbin"])
30
+ ffi_lib(["lib_fqbin"])
31
31
 
32
32
  functions = [
33
33
 
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ require 'scbi_fastq'
3
+
4
+ f=FastqFile.new('/Users/dariogf/seqs/chromosomes/originals/ilumina_SRR314795.fastq')
5
+
6
+ f.each do |n,f,q,c|
7
+ # puts n,f,q,c
8
+ end
9
+
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+ load './fastq_file_c.rb'
3
+
4
+ f=FastqFileC.new('/Users/dariogf/seqs/chromosomes/originals/ilumina_SRR314795.fastq')
5
+
6
+ # GC.disable
7
+
8
+ f.each do |n,f,q,c|
9
+ # puts n,f,q,c
10
+ end
11
+
12
+ # GC.enable
@@ -0,0 +1,3 @@
1
+ module ScbiFqbin
2
+ VERSION = "0.2.3"
3
+ end
data/lib_fqbin_src.zip ADDED
Binary file
@@ -0,0 +1,66 @@
1
+ prefix=/usr/local
2
+ bin_prefix=$(prefix)/bin
3
+ lib_prefix=$(prefix)/lib
4
+
5
+
6
+ OS := $(shell uname)
7
+
8
+ CFLAGS=
9
+
10
+ ifeq ($(OS), Linux)
11
+ CFLAGS=-fPIC -O3
12
+ lib_name=$(LIB_NAME).so
13
+ endif
14
+
15
+ ifeq ($(OS), Darwin)
16
+ lib_name=$(LIB_NAME).dylib
17
+ endif
18
+
19
+
20
+ CC=gcc
21
+ DEPFILE=.depend
22
+ PROGS=mk_fqbin read_fqbin iterate_fqbin hash_fqbin fq read_fq
23
+ OBJS=lib_fqbin.o
24
+ LIBS=-lz
25
+ LIB_NAME=lib_fqbin
26
+
27
+ all: $(OBJS) $(PROGS) lib
28
+
29
+ $(PROGS):
30
+ $(CC) $(CFLAGS) $(OBJS) $(LIBS) $@.c -o $@
31
+
32
+ .c.o:
33
+ $(CC) $(CFLAGS) -c -o $@ $<
34
+
35
+ lib:
36
+ ifeq ($(OS), Linux)
37
+ lib_name=$(LIB_NAME).so
38
+ $(CC) -shared -Wl,-soname,$(LIB_NAME).so.1 -o $(LIB_NAME).so $(OBJS) $(LIBS)
39
+ endif
40
+
41
+ ifeq ($(OS), Darwin)
42
+ lib_name=$(LIB_NAME).dylib
43
+ $(CC) $(LIBS) -dynamiclib -o $(LIB_NAME).dylib -dylib $(OBJS)
44
+ endif
45
+
46
+ clean:
47
+ rm -f *.o $(PROGS) *.dylib *.so*
48
+
49
+ install:
50
+
51
+ install -m 0755 $(lib_name) $(lib_prefix)
52
+ for prog in $(PROGS); do \
53
+ install -m 0755 $$prog $(bin_prefix); \
54
+ done; \
55
+ install -m 0755 sort_index $(bin_prefix);
56
+ install -m 0755 idx_fqbin $(bin_prefix);
57
+
58
+ uninstall:
59
+ rm $(lib_prefix)/$(lib_name)
60
+ for prog in $(PROGS); do \
61
+ rm $(bin_prefix)/$$prog; \
62
+ done; \
63
+ rm $(bin_prefix)/sort_index
64
+ rm $(bin_prefix)/idx_fqbin
65
+
66
+