bio-ngs 0.3.2.alpha.01

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/.document +5 -0
  2. data/Gemfile +39 -0
  3. data/Gemfile.lock +81 -0
  4. data/LICENSE.txt +28 -0
  5. data/README.rdoc +240 -0
  6. data/Rakefile +60 -0
  7. data/VERSION +1 -0
  8. data/bin/biongs +35 -0
  9. data/bio-ngs.gemspec +215 -0
  10. data/ext/mkrf_conf.rb +87 -0
  11. data/lib/bio-ngs.rb +54 -0
  12. data/lib/bio/appl/ngs/bcl2qseq.rb +93 -0
  13. data/lib/bio/appl/ngs/blast.rb +36 -0
  14. data/lib/bio/appl/ngs/bowtie-inspect.rb +50 -0
  15. data/lib/bio/appl/ngs/cufflinks.rb +489 -0
  16. data/lib/bio/appl/ngs/fastx.rb +170 -0
  17. data/lib/bio/appl/ngs/samtools.rb +118 -0
  18. data/lib/bio/appl/ngs/sff_extract.rb +23 -0
  19. data/lib/bio/appl/ngs/tophat.rb +158 -0
  20. data/lib/bio/ngs/converter.rb +100 -0
  21. data/lib/bio/ngs/core_ext.rb +12 -0
  22. data/lib/bio/ngs/db.rb +66 -0
  23. data/lib/bio/ngs/db/migrate/homology/201105030707_create_blastout.rb +22 -0
  24. data/lib/bio/ngs/db/migrate/homology/201105030709_create_goannotation.rb +29 -0
  25. data/lib/bio/ngs/db/migrate/ontology/201105030708_create_go.rb +18 -0
  26. data/lib/bio/ngs/db/migrate/ontology/201105030710_create_gene_go.rb +17 -0
  27. data/lib/bio/ngs/db/migrate/ontology/201105030711_create_gene.rb +16 -0
  28. data/lib/bio/ngs/db/models.rb +1 -0
  29. data/lib/bio/ngs/db/models/homology.rb +8 -0
  30. data/lib/bio/ngs/db/models/ontology.rb +16 -0
  31. data/lib/bio/ngs/ext/bin/common/fastq_coverage_graph.sh +161 -0
  32. data/lib/bio/ngs/ext/bin/common/sff_extract +1505 -0
  33. data/lib/bio/ngs/ext/bin/linux/samtools +0 -0
  34. data/lib/bio/ngs/ext/bin/osx/samtools +0 -0
  35. data/lib/bio/ngs/ext/versions.yaml +73 -0
  36. data/lib/bio/ngs/graphics.rb +189 -0
  37. data/lib/bio/ngs/homology.rb +102 -0
  38. data/lib/bio/ngs/ontology.rb +103 -0
  39. data/lib/bio/ngs/quality.rb +64 -0
  40. data/lib/bio/ngs/record.rb +50 -0
  41. data/lib/bio/ngs/task.rb +46 -0
  42. data/lib/bio/ngs/utils.rb +176 -0
  43. data/lib/development_tasks.rb +34 -0
  44. data/lib/enumerable.rb +37 -0
  45. data/lib/tasks/bwa.thor +126 -0
  46. data/lib/tasks/convert.thor +454 -0
  47. data/lib/tasks/history.thor +51 -0
  48. data/lib/tasks/homology.thor +121 -0
  49. data/lib/tasks/ontology.thor +93 -0
  50. data/lib/tasks/project.thor +51 -0
  51. data/lib/tasks/quality.thor +142 -0
  52. data/lib/tasks/rna.thor +126 -0
  53. data/lib/tasks/sff_extract.thor +9 -0
  54. data/lib/templates/README.tt +43 -0
  55. data/lib/templates/db.tt +6 -0
  56. data/lib/wrapper.rb +225 -0
  57. data/spec/converter_qseq_spec.rb +56 -0
  58. data/spec/fixture/s_1_1_1108_qseq.txt +100 -0
  59. data/spec/quality_spec.rb +40 -0
  60. data/spec/sff_extract_spec.rb +98 -0
  61. data/spec/spec_helper.rb +55 -0
  62. data/spec/tophat_spec.rb +99 -0
  63. data/spec/utils_spec.rb +22 -0
  64. data/test/conf/test_db.yml +4 -0
  65. data/test/data/blastoutput.xml +69 -0
  66. data/test/data/gene-GO.json +1 -0
  67. data/test/data/goa_uniprot +27 -0
  68. data/test/data/goslim_goa.obo +1763 -0
  69. data/test/helper.rb +18 -0
  70. data/test/test_bio-ngs.rb +17 -0
  71. data/test/test_db.rb +21 -0
  72. data/test/test_homology.rb +102 -0
  73. data/test/test_ngs.rb +21 -0
  74. data/test/test_ontology.rb +74 -0
  75. data/test/test_utils.rb +29 -0
  76. metadata +460 -0
@@ -0,0 +1,126 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../bio/ngs/utils')
2
+ require File.expand_path(File.dirname(__FILE__) + '/../wrapper')
3
+ require File.expand_path(File.dirname(__FILE__) + '/../bio/appl/ngs/tophat')
4
+
5
+ class Rna < Thor
6
+
7
+ # you'll end up with 3 accept file, regular, sorted, sorted-indexed
8
+ desc "tophat DIST INDEX OUTPUTDIR FASTQS", "run tophat as from command line, default 6 processors and then create a sorted bam indexed."
9
+ method_option :paired, :type => :boolean, :default => false, :desc => 'Are reads paired? If you chose this option pass just the basename of the file without forward/reverse and .fastq'
10
+ Bio::Ngs::Tophat.new.thor_task(self, :tophat) do |wrapper, task, dist, index, outputdir, fastqs|
11
+ wrapper.params = task.options #merge passed options to the wrapper.
12
+ wrapper.params = {"mate-inner-dist"=>dist, "output-dir"=>outputdir, "num-threads"=>6, "solexa1.3-quals"=>true}
13
+ fastq_files = task.options[:paired] ? ["#{fastqs}_forward.fastq","#{fastqs}_reverse.fastq"] : ["#{fastqs}"]
14
+ wrapper.run :arguments=>[index, fastq_files ].flatten, :separator=>"="
15
+
16
+ accepted_hits_bam_fn = File.join(outputdir, "accepted_hits.bam")
17
+ task.invoke "convert:bam:sort", [accepted_hits_bam_fn] # call the sorting procedure.
18
+ end
19
+
20
+ desc "quant GTF OUTPUTDIR BAM ", "Genes and transcripts quantification"
21
+ Bio::Ngs::Cufflinks::Quantification.new.thor_task(self, :quant) do |wrapper, task, gtf, outputdir, bam|
22
+ wrapper.params = task.options
23
+ wrapper.params = {"num-threads" => 6, "output-dir" => outputdir, "GTF" => gtf }
24
+ wrapper.run :arguments=>[bam], :separator => "="
25
+ end
26
+
27
+ #GTFS_QUANTIFICATION is a comma separated list of gtf file names
28
+ desc "compare GTF_REF OUTPUTDIR GTFS_QUANTIFICATION", "GTFS_QUANTIFICATIONS, use a comma separated list of gtf"
29
+ Bio::Ngs::Cufflinks::Compare.new.thor_task(self, :compare) do |wrapper, task, gtf_ref, outputdir, gtfs_quantification|
30
+ # unless Dir.exists?(outputdir)
31
+ # Dir.mkdir(outputdir)
32
+ # end
33
+ # Dir.chdir(outputdir)
34
+ # #I assume GTS_QUANTIFICATION is a comma separated list of single gtf files
35
+ # gtf_tracking_filename = "#{outputdir}.gtf_tracking"
36
+ # File.open(gtf_tracking_filename, 'w') do |file|
37
+ # file.puts gtfs_quantification.gsub(/,/,"\n")
38
+ # end #file
39
+ wrapper.params = task.options
40
+ wrapper.params = {"outprefix" => outputdir, "gtf_reference"=>gtf_ref}
41
+ wrapper.run :arguments=>[gtfs_quantification.split(',')]
42
+ # Dir.chdir("../")
43
+ end
44
+
45
+ desc "mapquant DIST INDEX OUTPUTDIR FASTQS", "map and quantify"
46
+ method_option :paired, :type => :boolean, :default => false, :desc => 'Are reads paired? If you chose
47
+ this option pass just the basename
48
+ of the file without forward/reverse
49
+ and .fastq'
50
+ def mapquant(dist, index, outputdir, fastqs)
51
+ #tophat
52
+ invoke :tophat, [dist, index, outputdir, fastqs], :paired=>options.paired
53
+ #cufflinks quantification on gtf
54
+ invoke :quant, ["#{index}.gtf", File.join(outputdir,"quantification"), File.join(outputdir,"accepted_hits_sort.bam")]
55
+ end
56
+
57
+ #TODO: write test to verify the behaviour
58
+ desc "idx2fasta INDEX FASTA", "Create a fasta file from an indexed genome, using bowtie-inspect"
59
+ Bio::Ngs::BowtieInspect.new.thor_task(self, :idx2fasta) do |wrapper, task, index, fasta|
60
+ puts "Index file... #{index}"
61
+ puts "Output file... #{fasta}"
62
+ #Perhaps it would be better that the lib undertands by itself that the second arguments is the output file in case of stdoutput
63
+ wrapper.run :arguments=>[index], :output_file=>fasta
64
+ end
65
+
66
+ # desc "idx_fasta [INDEX] [FASTA]", "Create a fasta file from an indexed genome, using bowtie-inspect"
67
+ # method_option :index, :type => :string, :require => true
68
+ # method_option :fasta, :type => :string
69
+ # def idx_to_fasta
70
+ # fasta = options.fasta || "#{options.index}.fasta"
71
+ # sh "bowtie-inspect #{options.index} > #{fasta}"
72
+ # end
73
+ #
74
+ # desc "tophat_sr TEXT", "tophat alignment single reads"
75
+ # method_option :threads, :type=>:numeric, :default=>1
76
+ # def tophat_sr(text)
77
+ # puts self.inspect
78
+ # puts options.text
79
+ # # TODO tophat --num-threads 1 --solexa1.3-quals --output-dir liver_output Homo_ sapiens/UCSC/hg18/Sequence/BowtieIndex/genome liver.fastq
80
+ # end
81
+ #
82
+ # desc "tophat_pe", "tophat alignment paired ends reads"
83
+ # method_option :threads, :type=>:numeric, :default=>1
84
+ # def tophat_pe
85
+ # # TODO
86
+ # end
87
+ #
88
+ # desc "assembly_bwt", "assembly using bowtie"
89
+ # def assembly_bwt
90
+ # end
91
+ #
92
+ # desc "assembly_bwa", "assembly using bwa"
93
+ # def assembly_bwa
94
+ # end
95
+ #
96
+ # desc "cuffscmp", "make a comparison with cufflinkscompare"
97
+ # def cuffcmp
98
+ # end
99
+ #
100
+ # desc "cuffsquant", "do a complete quantification usinf cufflinks"
101
+ # def cuffcmp
102
+ # end
103
+ #
104
+ # desc "samindex", "index a genome with samtools"
105
+ # def samindex
106
+ # end
107
+ #
108
+ # desc "sammerge", "merge two set with samtools"
109
+ # def sammerge
110
+ # end
111
+ #
112
+ # desc "qseq_to_fastq_sr [PATH]", "convert a set of qseq files in fastq, single read"
113
+ # method_option :path, :type => :string, :default => "."
114
+ # def qseq_to_fastq_sr
115
+ # # TODO use code coming from Valeria
116
+ # # Bio::Ngs.qseq_to_fastq_si(path)
117
+ # end
118
+ #
119
+ # desc "qseq_to_fastq_pe [PATH]", "convert a set of qseq files in fastq, paired ends"
120
+ # method_option :path, :type => :string, :default => "."
121
+ # def qseq_to_fastq_pe
122
+ # # TODO use code coming from Valeria
123
+ # # Bio::Ngs.qseq_to_fastq_pe(path)
124
+ # end
125
+
126
+ end
@@ -0,0 +1,9 @@
1
+ class Sff < Thor
2
+
3
+ desc "extract [FILE]", "Run sff_extract on a SFF file"
4
+ Bio::Ngs::SffExtract.new.thor_task(self, :extract) do |wrapper, task, file|
5
+ wrapper.params = task.options
6
+ puts wrapper.run :arguments => [file]
7
+ end
8
+
9
+ end
@@ -0,0 +1,43 @@
1
+ Copyright (c) 2011 Raoul J.P. Bonnal and Francesco Strozzi.
2
+
3
+ Bio-NGS is an official BioRuby Plugin for Next Generation Sequencing data analysis and workflows.
4
+
5
+
6
+ == Description ==
7
+
8
+ This is an automatically generated directory tree for the '<%= name %>' project.
9
+
10
+ Here is a tree description:
11
+
12
+ <%= name %>
13
+ |
14
+ |-- data # the place to store all your raw data and output files
15
+ |
16
+ |-- log # the place to store log and temporary files
17
+ |
18
+ |-- scripts # custom scripts should go here
19
+ |
20
+ |-- tasks # your custom tasks and workflows for NGS data analyses should go here
21
+ |
22
+ |-- conf # the place for configuration files
23
+ |
24
+ |-- db # database and migration files
25
+
26
+
27
+ To view the full list of tasks and analyses available, simply type:
28
+
29
+ biongs -T
30
+
31
+
32
+
33
+ == References ==
34
+
35
+ If you use Bio-NGS for your research please cite the following references:
36
+
37
+ BioRuby: bioinformatics software for the Ruby programming language. Bioinformatics. 2010 Oct 15;26(20):2617-9.
38
+ Goto N, Prins P, Nakao M, Bonnal R, Aerts J, Katayama T.
39
+
40
+
41
+
42
+
43
+
@@ -0,0 +1,6 @@
1
+ # SQLite version 3.x
2
+ # gem install sqlite3
3
+ adapter: sqlite3
4
+ database: db/<%= type %>.sqlite3
5
+ pool: 5
6
+ timeout: 5000
data/lib/wrapper.rb ADDED
@@ -0,0 +1,225 @@
1
+ #
2
+ # wrapper.rb - Wrapper class for a generic command
3
+ #
4
+ # Copyright:: Copyright (C) 2011
5
+ # Raoul Bonnal <r@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+
10
+ #Notes
11
+ # in case you are developing a new wrapper and want to have a secure environment
12
+ # you must not define the program name and the task will not be cerated.
13
+
14
+
15
+ module Bio
16
+ module Command
17
+ module Wrapper
18
+
19
+ def self.included(base)
20
+ base.extend(ClassMethods)
21
+ end
22
+
23
+ def options
24
+ self.class.options.merge(@options)
25
+ end
26
+
27
+ def options=(option={})
28
+ #convert all keys symbols in strings
29
+ option = option.inject({}){|h,item| h[item[0].to_s]=item[1]; h}
30
+ @options.merge!(option)
31
+ end
32
+
33
+ def program
34
+ @program
35
+ end
36
+
37
+ def sub_program
38
+ self.class.sub_program
39
+ end
40
+
41
+ def initialize(binary=nil, options={})
42
+ @program = binary || self.class.program
43
+ @options = options
44
+ @params = {}
45
+ end
46
+
47
+ # Parameters are accepted ONLY if the key is present as
48
+ # a key on the options hash. Sort of validation.
49
+ # ONLY the valid options are taken into account.
50
+ # It like a third level of configuration
51
+ #TODO: check :aliases in options as well, not that now only the main option name is verified
52
+ def params=(opts={})
53
+ #add the parameters only if in options
54
+ opts.each_pair do |parameter, value|
55
+ @params[parameter.to_s] = value if options.has_key?(parameter.to_s)
56
+ end
57
+ end
58
+
59
+ def default_options
60
+ options.select do |name, opts|
61
+ opts.has_key? :default
62
+ end
63
+ end
64
+
65
+ # Return the options, which are by default, and the parameters
66
+ # setted by the user.
67
+ # Precedence goes to params setted from user
68
+ def params
69
+ default_options.merge(@params)
70
+ end
71
+
72
+ def reset_params
73
+ @params.clear
74
+ end
75
+
76
+ # Return the options and parameters formmatted as typed in the command line as a string
77
+ # opts[:separator] is important not all the applications require a "=" for separating options and values
78
+ # TODO: need to be compliant with Bio::Command ?
79
+ # TODO: make a test because it should not return an empty string.
80
+ # TODO: refactor is not beauty
81
+ def normalize_params(separator="=")
82
+ #use_aliases?
83
+ args=params.to_a.map do |option|
84
+ option_name = option[0].to_s
85
+ option_values = option[1]
86
+ #deprecated I'm not sure this code is good (at least the one with kind_of?)
87
+ if option_values.kind_of? Hash
88
+ #TODO: refactor this code and verify that the boolean needs a specific options setting.
89
+ #"--#{option_name}" + ((option_values.has_key?(:type) && option_values[:type]==:boolean) ? ("="+ (option_values[:default] ? "true": "false")) :"=#{option_values[:default]}")
90
+ if (option_values.has_key?(:type) && option_values[:type]==:boolean && option_values[:default])
91
+ "--#{option_name}"
92
+ else
93
+ use_aliases? && options[option_name].has_key?(:aliases) ? "#{options[option_name][:aliases]} #{option_values[:default]}" : "--#{option_name}#{separator}#{option_values[:default]}"
94
+ end
95
+ #deprecated up to here
96
+ else #is a value of the main hash. (mostly a parameter)
97
+ if option_values == true
98
+ use_aliases? && options[option_name].has_key?(:aliases) ? options[option_name][:aliases] : "--#{option_name}"
99
+ elsif option_values != false
100
+ use_aliases? && options[option_name].has_key?(:aliases) ? "#{options[option_name][:aliases]}#{options[option_name][:collapse] ? "": " "}#{option_values}" : "--#{option_name}#{separator}#{option_values}"
101
+ end
102
+ end
103
+ end
104
+ end
105
+
106
+ def output
107
+ self.class.output || :file
108
+ end
109
+
110
+ def use_aliases?
111
+ self.class.aliases
112
+ end
113
+
114
+
115
+ # If parameters are passed they will overwrite those already defined
116
+ # but will not save the changes
117
+ # opts = {:options=>{}, :arguments=>[]}
118
+ # in the particular case the user wants to submit other options
119
+ # these must be passed in arguments like {"option_name"=>value} similar when settin params
120
+ # opts[:separator] is important not all the applications require a "=" for separating options and values
121
+ # TODO handle output file with program which writes on stdout
122
+ #TODO: refactor mostly due to stdin/out
123
+ def run(opts = {:options=>{}, :arguments=>[], :output_file=>nil, :separator=>"="})
124
+ if program.nil?
125
+ warn "WARNING: no program is associated with #{class_name.upcase} task."
126
+ return nil
127
+ end
128
+ #REMOVE params = opts[:options]
129
+ if output == :stdout
130
+ raise "Can't write to any output file. With a program which writes on stdout you must provide a file name" if opts[:output_file].nil?
131
+ file_stdlog = File.open(opts[:output_file], 'w')
132
+ file_errlog = File.open(opts[:output_file]+".err",'w')
133
+
134
+ Bio::Command.call_command_open3([program, sub_program, normalize_params(opts[:separator]), opts[:arguments]].flatten.compact) do |pin, pout, perr|
135
+ pout.sync = true
136
+ perr.sync = true
137
+ t = Thread.start {pout.lines{|line| file_stdlog.puts line}}
138
+ begin
139
+ pin.close
140
+ ensure
141
+ t.join
142
+ end
143
+ end #command call open3
144
+ file_stdlog.close
145
+ file_errlog.close
146
+ else
147
+ # puts "Normlized #{normalize_params(opts[:separator])}"
148
+ # puts "Arguments #{opts[:arguments]}"
149
+ #puts [program, sub_program, normalize_params(opts[:separator]), opts[:arguments]].flatten.compact.inspect
150
+
151
+ Bio::Command.query_command([program, sub_program, normalize_params(opts[:separator]), opts[:arguments]].flatten.compact)
152
+ end #if
153
+ end #run
154
+
155
+ # Inject into the Thor::Sandbox::TaskName (klass) the options defined for this
156
+ # wrapper
157
+ # Example of call
158
+ # desc "task_name ARG_ONE ARG_SECOND", "run tophat as from command line"
159
+ # Bio::Ngs::Tophat.new.thor_task(self, :tophat) do |wrapper, task, ARG_ONE ARG_SECOND|
160
+ # puts ARG_ONE
161
+ # puts ARG_SECOND
162
+ # #you tasks here
163
+ # end
164
+ def thor_task(klass, task_name, &block)
165
+ if program.nil?
166
+ warn "WARNING: no program is associated with #{class_name.upcase} task, does not make sense to create a thor task."
167
+ return nil
168
+ end
169
+ if klass
170
+ wrapper = self
171
+ klass.class_eval do
172
+ wrapper.options.each_pair do |name, opt|
173
+ method_option name, opt
174
+ end #each_pair
175
+ # Thor's behavior should be respected passing attributes
176
+ define_method task_name do |*args|
177
+ #it's mandatory that the first and second parameter are respectively wrapper and task
178
+ raise ArgumentError, "wrong number of arguments (#{args.size} for #{block.parameters.size-2})" if args.size != block.parameters.size-2
179
+ yield wrapper, self, *args
180
+ end
181
+ end#class_eval
182
+ end #klass
183
+ end #thor_task
184
+
185
+ #Return the class name
186
+ def class_name
187
+ self.class.name.split("::").last.downcase
188
+ end
189
+
190
+ module ClassMethods
191
+ #TODO: do I need to set a default program name using class name or not ?
192
+ # or do we need to specify somewhere a defaitl path and looking for a real binary ?
193
+
194
+ OUTPUT = [:file, :stdout, :stdin]
195
+
196
+ # output = {:file=>true, :stdout=>}
197
+ attr_accessor :output, :program, :options, :aliases, :sub_program
198
+
199
+ #TODO I don't like this way, Is it possible to configure the variable as global and default ?
200
+ def set_output(output_type=:file)
201
+ if OUTPUT.include? output_type
202
+ @output = output_type
203
+ else
204
+ raise "Output type #{output_type} is not suported. Valid types are #{OUTPUT}"
205
+ end
206
+ end
207
+
208
+ def use_aliases
209
+ @aliases = true
210
+ end
211
+
212
+ # external_parameter can be an array a string or an hash
213
+ # def validate_parameters(external_parameters)
214
+ def add_option(name, opt={})
215
+ @options = (@options || {}).merge(name.to_s=>opt)
216
+ end
217
+
218
+ alias set_program program=
219
+ alias set_sub_program sub_program=
220
+
221
+ end #ClassMethods
222
+
223
+ end #Wrapper
224
+ end #Command
225
+ end #Bio
@@ -0,0 +1,56 @@
1
+ #
2
+ # converter_qseq_spec.rb - RSpec Test
3
+ #
4
+ # Copyright:: Copyright (C) 2011
5
+ # Raoul Bonnal <r@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+
10
+
11
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
12
+
13
+ describe "Converter" do
14
+ describe "Qseq" do
15
+ it "convert two qseq lines into fastq" do
16
+ qseq = Bio::Ngs::Converter::Qseq.new(:pe)
17
+ qseq.buffer = "H125 98 1 1108 1586 1989 CGATGT 1 CAGA.C.................A.....GAATGGCATGGATCAAGAAAATCCCCCTTGTGAAGAAGAATCAGCAG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0\nH125 98 1 1108 1188 2036 CGATGT 1 CTTGTATGCAGCATCCCCTTCTTGCCTAGGGACTTGAAGGGCCAGGCTTCCTGTCATTGCCTCACTCAAATGTAGC gggggggggggggegggggffggeggegggeagge^ggdbcgggcdgedegfggffff^ffffefdeeZefccceg 1"
18
+ fastqs = []
19
+ qseq.to_fastq do |fastq|
20
+ fastqs << fastq if fastq
21
+ end
22
+ fastqs.first.should == "@H125:1:1108:1188:2036#0/1\nCTTGTATGCAGCATCCCCTTCTTGCCTAGGGACTTGAAGGGCCAGGCTTCCTGTCATTGCCTCACTCAAATGTAGC\n+\ngggggggggggggegggggffggeggegggeagge^ggdbcgggcdgedegfggffff^ffffefdeeZefccceg"
23
+ end
24
+
25
+ it "convert a qseq File into fastq for parierd ends" do
26
+ qseq = Bio::Ngs::Converter::Qseq.new(:pe)
27
+ buffer_filename = File.dirname(__FILE__) + "/fixture/s_1_1_1108_qseq.txt"
28
+ fastq_filename = File.dirname(__FILE__) + "/fixture/s_1_1_1108_qseq.fastq"
29
+ qseq.buffer = File.open(buffer_filename,'r')
30
+ fastq_file = File.open(fastq_filename, 'w')
31
+ qseq.to_fastq do |fastq|
32
+ fastq_file.puts fastq if fastq
33
+ end
34
+ fastq_file.close
35
+ fastq_file = File.open(fastq_filename, 'r')
36
+ fastq_file.readlines[0..4].join("").should == "@H125:1:1108:1188:2036#0/1\nCTTGTATGCAGCATCCCCTTCTTGCCTAGGGACTTGAAGGGCCAGGCTTCCTGTCATTGCCTCACTCAAATGTAGC\n+\ngggggggggggggegggggffggeggegggeagge^ggdbcgggcdgedegfggffff^ffffefdeeZefccceg\n"
37
+ fastq_file.close
38
+ #File.delete(fastq_filename)
39
+ end
40
+
41
+ it "get statistics from converted reads" do
42
+ qseq = Bio::Ngs::Converter::Qseq.new(:pe)
43
+ buffer_filename = File.dirname(__FILE__) + "/fixture/s_1_1_1108_qseq.txt"
44
+ fastq_filename = File.dirname(__FILE__) + "/fixture/s_1_1_1108_qseq.fastq"
45
+ qseq.buffer = File.open(buffer_filename,'r')
46
+ fastq_file = File.open(fastq_filename, 'w')
47
+ qseq.to_fastq do |fastq|
48
+ fastq_file.puts fastq if fastq
49
+ end
50
+ fastq_file.close
51
+ #File.delete(fastq_filename)
52
+ qseq.stats.should == {:reads_total=>100, :reads_passed=>1, :reads_rejected=>99, :bases_passed_total=>76, :bases_rejected_total=>7524, :bases_passed_with_b_quality=>0, :bases_rejected_with_b_quality=>4004, :bases_passed_with_n=>0, :bases_rejected_with_n=>12}
53
+ end
54
+
55
+ end
56
+ end