bio-ngs 0.4.7.alpha.03 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -5,18 +5,16 @@ source "http://rubygems.org"
5
5
 
6
6
  gem "bio", ">= 1.4.2"
7
7
  gem "bio-samtools", ">= 0.3.2"
8
- # gem "thor", path:"/Users/bonnalraoul/Documents/Develop/thor" #, ">= 0.14.6"
9
8
  gem "thor", "= 0.14.6"
10
9
  gem "rubyvis", ">= 0.5.0"
11
10
  gem "daemons", ">= 1.1.0"
12
11
  gem "ruby-ensembl-api", ">= 1.0.1"
13
12
  gem "activerecord",">= 3.0.5"
14
- gem "sqlite3", ">= 1.3.3"
15
- gem "bio-blastxmlparser"
16
13
  gem "progressbar",">= 0.9.0"
17
14
  gem "rake", "0.9.2.2"
18
15
  gem "json"
19
16
  gem "parallel"
17
+ gem "bio-blastxmlparser"
20
18
 
21
19
  # Add dependencies to develop your gem here.
22
20
  # Include everything needed to run rake, tests, features, etc.
@@ -26,8 +24,16 @@ group :development do
26
24
  gem "jeweler", "~> 1.8.3"
27
25
  gem "rcov", "~> 0.9.11"
28
26
  gem "bio", ">= 1.4.2"
29
- # gem "thor", path:"/Users/bonnalraoul/Documents/Develop/thor" #, ">= 0.14.6"
30
- gem "thor", "= 0.14.6"
27
+
28
+ platforms :jruby do
29
+ gem 'jdbc-sqlite3', :require => true
30
+ gem "activerecord-jdbcsqlite3-adapter"
31
+ end
32
+ platforms :ruby do
33
+ gem 'sqlite3', :require => 'sqlite3'
34
+ end
35
+
36
+ gem "thor", "= 0.14.6"
31
37
  gem "ffi", ">= 1.0.6"
32
38
  gem "rubyvis", ">= 0.5.0"
33
39
  gem "rspec", ">= 2.5.0"
@@ -35,10 +41,9 @@ group :development do
35
41
  gem "bio-samtools", ">= 0.3.2"
36
42
  gem "ruby-ensembl-api", ">= 1.0.1"
37
43
  gem "activerecord",">= 3.0.5"
38
- gem "sqlite3", ">= 1.3.3"
39
- gem "bio-blastxmlparser"
40
44
  gem "progressbar",">= 0.9.0"
41
45
  gem "json"
42
46
  gem "rake", "0.9.2.2"
43
47
  gem "parallel"
48
+ gem "bio-blastxmlparser"
44
49
  end
@@ -9,6 +9,10 @@ GEM
9
9
  activesupport (= 3.2.3)
10
10
  arel (~> 3.0.2)
11
11
  tzinfo (~> 0.3.29)
12
+ activerecord-jdbc-adapter (1.2.2)
13
+ activerecord-jdbcsqlite3-adapter (1.2.2)
14
+ activerecord-jdbc-adapter (~> 1.2.2)
15
+ jdbc-sqlite3 (~> 3.7.2)
12
16
  activesupport (3.2.3)
13
17
  i18n (~> 0.6)
14
18
  multi_json (~> 1.0)
@@ -26,28 +30,33 @@ GEM
26
30
  daemons (1.1.8)
27
31
  diff-lcs (1.1.3)
28
32
  ffi (1.0.11)
33
+ ffi (1.0.11-java)
29
34
  git (1.2.5)
30
35
  i18n (0.6.0)
36
+ jdbc-sqlite3 (3.7.2)
31
37
  jeweler (1.8.3)
32
38
  bundler (~> 1.0)
33
39
  git (>= 1.2.5)
34
40
  rake
35
41
  rdoc
36
42
  json (1.7.3)
43
+ json (1.7.3-java)
37
44
  log4r (1.1.10)
38
- multi_json (1.3.5)
45
+ multi_json (1.3.6)
39
46
  nokogiri (1.5.2)
47
+ nokogiri (1.5.2-java)
40
48
  parallel (0.5.16)
41
49
  progressbar (0.11.0)
42
50
  rake (0.9.2.2)
43
51
  rcov (0.9.11)
52
+ rcov (0.9.11-java)
44
53
  rdoc (3.12)
45
54
  json (~> 1.4)
46
55
  rspec (2.10.0)
47
56
  rspec-core (~> 2.10.0)
48
57
  rspec-expectations (~> 2.10.0)
49
58
  rspec-mocks (~> 2.10.0)
50
- rspec-core (2.10.0)
59
+ rspec-core (2.10.1)
51
60
  rspec-expectations (2.10.0)
52
61
  diff-lcs (~> 1.1.3)
53
62
  rspec-mocks (2.10.1)
@@ -65,16 +74,19 @@ GEM
65
74
  tzinfo (0.3.33)
66
75
 
67
76
  PLATFORMS
77
+ java
68
78
  ruby
69
79
 
70
80
  DEPENDENCIES
71
81
  activerecord (>= 3.0.5)
82
+ activerecord-jdbcsqlite3-adapter
72
83
  bio (>= 1.4.2)
73
84
  bio-blastxmlparser
74
85
  bio-samtools (>= 0.3.2)
75
86
  bundler (~> 1.1.0)
76
87
  daemons (>= 1.1.0)
77
88
  ffi (>= 1.0.6)
89
+ jdbc-sqlite3
78
90
  jeweler (~> 1.8.3)
79
91
  json
80
92
  parallel
@@ -85,5 +97,5 @@ DEPENDENCIES
85
97
  ruby-ensembl-api (>= 1.0.1)
86
98
  rubyvis (>= 0.5.0)
87
99
  shoulda
88
- sqlite3 (>= 1.3.3)
100
+ sqlite3
89
101
  thor (= 0.14.6)
@@ -0,0 +1,408 @@
1
+ # bio-ngs
2
+
3
+ Provides a framework for handling NGS data with Bioruby.
4
+
5
+ ## Features & Aims
6
+ * Supports many tools for NGS: SAMtools, BWA, Bowtie, TopHat, Cufflinks
7
+ * Avoids conflicts: required tools and libraries are installed in a sandbox
8
+ * Detect pre insalled software at runtime
9
+ * Reporting: text and graphs
10
+ * Simple API for developing your own scripts in Ruby
11
+
12
+
13
+ ## Requirements
14
+ * http://hannonlab.cshl.edu/fastx_toolkit/ (the gem tries to install this tool by itself)
15
+ * http://www.gnuplot.info/ tested on version 4.6
16
+ * libxslt1-dev
17
+
18
+ ## Install
19
+ ### Quick Start
20
+ gem install bio-ngs
21
+ * Gems dependencies resolved by RubyGems
22
+ * External software will be downloaded, compiled and installed in a sandbox
23
+ * No root grants required, no conflict with pre installed applications
24
+
25
+ ### Do not install third party software
26
+ gem install bio-ngs -- --no-third-party
27
+
28
+ * Using system wide software
29
+
30
+ Pleas follow the instruction for your own distribution/operating system
31
+
32
+
33
+ ## Tasks
34
+ We'll try to keep this list updated but just in case type `biongs -T` to get the most updated list.
35
+ _We are working on these and other tasks, if you find some bugs, please open an issue on Github._
36
+
37
+ ### bwa
38
+
39
+ biongs bwa:aln [PREFIX] [FASTA/Q] # Run BWA aln (short reads)
40
+ biongs bwa:bwasw [PREFIX] [FASTA/Q] # Run BWA bwasw (long reads)
41
+ biongs bwa:fastmap [PREFIX] [FASTA/Q] # Run BWA Fastmap (identify super-maximal exact matches)
42
+ biongs bwa:index [FASTA] # Create BWA index
43
+ biongs bwa:sampe [PREFIX] [SAI-1 FILE] [SAI-2 FILE] [FASTA/Q-1 FILE] [FASTA/Q-2 FILE] # Run BWA SAM Paired End conversion
44
+ biongs bwa:samse [PREFIX] [SAI FILE] [FASTA/Q FILE] # Run BWA SAM Single End conversion
45
+
46
+ ### convert
47
+ Most of this tasks create sub-processes to speed up conversions
48
+
49
+ biongs convert:bam:extract_genes BAM GENES --ensembl-release=N -o, --output=OUTPUT # Extract GENES from bam. It connects to Ensembl Humnan,...
50
+ biongs convert:bam:merge -i, --input-bams=one two three # Merge multiple bams in a single one, BAMS separated by...
51
+ biongs convert:bam:sort BAM [PREFIX] # Sort and create and index for the BAM file name
52
+ biongs convert:bcl:qseq:convert RUN OUTPUT [JOBS] # Convert a bcl dataset in qseq
53
+ biongs convert:illumina:de:gene DIFF GTF # extract the transcripts
54
+ biongs convert:illumina:de:isoform DIFF GTF # extract the transcripts
55
+ biongs convert:illumina:de:rename_qs DIFF_FILE NAMES # rename q1,...,qn with names provided by the user(comma...
56
+ biongs convert:illumina:fastq:trim_b FASTQ # perform a trim on all the sequences on B qualities wit...
57
+ biongs convert:illumina:humanize:build_compare_kb GTF # Build the JSON file with the annoation from the GTF fi...
58
+ biongs convert:illumina:humanize:isoform_exp GTF ISOFORM # tag the XLOC gathering information from GTF (ensembl)
59
+ biongs convert:qseq:fastq:by_file FIRST OUTPUT # Convert a qseq file into fastq
60
+ biongs convert:qseq:fastq:by_lane LANE OUTPUT # Convert all the file in the current and descendant dir...
61
+ biongs convert:qseq:fastq:by_lane_index LANE INDEX OUTPUT # Convert the qseq from a line and index in a fastq file
62
+ biongs convert:qseq:fastq:samples_by_lane SAMPLES LANE OUTPUT # Convert the qseqs for each sample in a specific lane.
63
+ SAMPLES is an array of index codes separated by commas lane
64
+ is an integer
65
+
66
+ ### filter
67
+
68
+ biongs filter:by_list TABLE LIST # Extract from TABLE the row with a key in LIST
69
+ biongs filter:cufflinks:tra_at_idx GTF IDX # Extract transcript(s) from Cufflinks' GTF at a specific location or givin the transcript name,...
70
+ biongs filter:cufflinks:transcripts [GTF] # Extract transcripts from Cufflinks' GTF
71
+
72
+ ### history
73
+
74
+ biongs history:clear # Wipe out the tasks history
75
+
76
+ ### homology
77
+
78
+ biongs homology:convert:blast2text [XML FILE] --file-out=FILE_OUT # Convert Blast output to tab-separated file
79
+ biongs homology:convert:go2json # Convert the GO annotations from the db into a JSON file
80
+ biongs homology:db:export [TABLE] --fileout=FILEOUT # Export the data from a table to a tab-separated file
81
+ biongs homology:db:init # Initialize Homology DB
82
+ biongs homology:download:all # Download the Uniprot and GO Annotation file
83
+ biongs homology:download:goannotation # Download the Uniprot GeneOntology Annotation file
84
+ biongs homology:download:uniprot # Download the Uniprot-SwissProt file from UniprotKB
85
+ biongs homology:load:blast [FILE] # Parse Blast XML output and load the results into Homology DB
86
+ biongs homology:load:goa # Import GO Annotation file
87
+ biongs homology:report:blast # Output a graphical report on the Blast homology search
88
+
89
+ ### install
90
+
91
+ biongs install:tools # Download and install NGS tools
92
+
93
+ ### ontology
94
+
95
+ biongs ontology:db:export [TABLE] --fileout=FILEOUT # Export the data from a table to a tab-separated file
96
+ biongs ontology:db:init # Initialize Ontology DB
97
+ biongs ontology:download:all # Download the GO files
98
+ biongs ontology:download:go # Download the GeneOntology file
99
+ biongs ontology:download:goslim # Download the Uniprot GeneOntology Slim file
100
+ biongs ontology:load:genego [FILE] # Import Gene-GO file (JSON)
101
+ biongs ontology:load:go [FILE] # Import GO definition file
102
+ biongs ontology:report:go # Output a graphical report on the GO for the sequences annotated in the db
103
+
104
+ ### pre
105
+
106
+ biongs pre:illumina_filter [DIR(s)] # Filter the data using Y/N flag in FastQ headers (Illumina). Search for fastq.gz files within director...
107
+ biongs pre:merge [file(s)] # Merge together fastQ files (accepts wildcards)
108
+ biongs pre:paired_merge [file(s)] # Merge together FastQ files while checking for correct pairing (accepts wildcards)
109
+ biongs pre:trim [fastq(s)] # Calulate quality profile and trim the all the reads using FastX (accepts wildcards)
110
+ biongs pre:uncompress [file(s)] # Uncompress multiple files in parallel (accepts wildcards)
111
+
112
+ ### project
113
+
114
+ biongs project:new [NAME] # Create a new NGS project directory
115
+
116
+ ### quality
117
+
118
+ biongs quality:boxplot FASTQ_QUALITY_STATS # plot reads quality as boxplot
119
+ biongs quality:fastq_stats FASTQ # Reports quality of FASTQ file
120
+ biongs quality:illumina_b_profile_raw FASTQ --read-length=N # perform a profile for reads coming fom Illumina 1.5+ and write the report in ...
121
+ biongs quality:illumina_b_profile_svg FASTQ --read-length=N # perform a profile for reads coming fom Illumina 1.5+
122
+ biongs quality:illumina_projects_stats # Reports quality of FASTQ files in an Illumina project directory
123
+ biongs quality:nucleotide_distribution FASTQ_QUALITY_STATS # plot reads quality as boxplot
124
+ biongs quality:quality_trim FASTQ # Trim all the sequences using quality information
125
+ biongs quality:reads FASTQ # perform quality check for NGS reads
126
+ biongs quality:reads_coverage FASTQ_QUALITY_STATS # plot reads coverage in bases
127
+ biongs quality:scatterplot EXPR1 EXPR2 OUTPUT # plot quantification values as scatterplot in png format
128
+
129
+ ### rna
130
+
131
+ biongs rna:compare GTF_REF OUTPUTDIR GTFS_QUANTIFICATION # GTFS_QUANTIFICATIONS, use a comma separated list of gtf
132
+ biongs rna:idx2fasta INDEX FASTA # Create a fasta file from an indexed genome, using bowtie-inspect
133
+ biongs rna:mapquant DIST INDEX OUTPUTDIR FASTQS # map and quantify
134
+ biongs rna:merge GTF_REF FASTA_REF ASSEMBLY_GTF_LIST # GTFS_QUANTIFICATIONS, use a comma separated list of gtf
135
+ biongs rna:quant GTF OUTPUTDIR BAM # Genes and transcripts quantification
136
+ biongs rna:quantdenovo GTF_guide OUTPUTDIR BAM # Genes and transcripts quantification discovering de novo transcripts
137
+ biongs rna:tophat DIST INDEX OUTPUTDIR FASTQS # run tophat as from command line, default 6 processors and then create a sorted b...
138
+
139
+ ### sff
140
+
141
+ biongs sff:extract [FILE] # Run sff_extract on a SFF file
142
+
143
+ ## TasksExamples
144
+
145
+ ### Conversion
146
+ biongs convert:bam:extract_genes your_original.bam BLID,GATA3,PTPRC --ensembl_release=61 --ensembl_specie=homo_sapiens
147
+
148
+ ### Filtering
149
+ When you have your mapped reads to a reference genome, you can decide to filter the output (GTF) to extract only those transcripts which have your desired requirements. You can filter for lenght, if it's multi or mono exon, the coverage, if it's a brand new transcript or an altrady annotated gene but with a new isoform or just the annotated transcripts.
150
+
151
+ Scenario: filtering transcripts
152
+ Having a transcripts.gtf dataset generated from CufflinksQuantification
153
+ I want a only the new transcripts (also with an annotated gene)
154
+ Which are multi exons
155
+ With a lenght greater than 1340
156
+ With minimum coverage greater than 10
157
+ Then I want to save them in my_filtered_data.gtf
158
+ ***
159
+ biongs filter:cufflinks:transcripts your_original.gtf -m -l 1340 -c 10.0 -n -o my_filtered_data.gtf
160
+
161
+ Then in some case I need to extract only some of them or maybe parsing them from external programs. Biongs has a specific trask for this:
162
+
163
+ Having my_filtered_data.gtf
164
+ Generated by "filtering transcripts"
165
+ I want to extract transcript number 10
166
+ Then I want to save it in BED format
167
+ Using UCSC notation
168
+ ***
169
+ biongs filter:cufflinks:tra_at_idx my_filtered_data.gtf #of_the_transcript_to_retrieve -u
170
+
171
+ The first time tra_at_idx is used, it will take more time than usual becase it creates an internal index: a simple HASH mashalled and dumped, stored in a file with the name similar to the imput with an idx as postfix.
172
+
173
+
174
+ # ForDevelopers
175
+
176
+ ## HowToContribute
177
+ 1. Clone Main Repository
178
+ `git clone https://github.com/helios/bioruby-ngs`
179
+ This command will create a local copy of the main repository
180
+
181
+ 2. Install Bioinformatics Tools into the repository directory
182
+ `rake devenv:bio_tools`
183
+
184
+ ## Wrapper
185
+ Bio-Ngs comes with a build-in wrapper to map binary software directly in BioRuby as objects. From this wrapper object is possible to create Thor task as well, with a lot of sugar.
186
+ ### Wrapping a binary
187
+
188
+ We want wrap TopHat the famous tool for NGS analyses.
189
+ 1. The first step is to include the Wrapping module
190
+ 2. set the name of the binary to call. Note: if you avid to set the program name it would not be possible to create a thor task and/or run the program
191
+ 3. add the options that the binary accepts, usually if preferred to declare all the options, discover them typing `your_program_name -h`
192
+
193
+ module Bio
194
+ module Ngs
195
+ class Tophat
196
+ include Bio::Command::Wrapper
197
+
198
+ set_program Bio::Ngs::Utils.binary("tophat/tophat")
199
+ add_option "output-dir",:type => :string, :aliases => '-o'
200
+ add_option "min-anchor", :type => :numeric, :aliases => '-a'
201
+ add_option "splice-mismatches", :type => :numeric, :aliases => '-m'
202
+ #all other options that you want to expose with the wrapping
203
+ end #Tophat
204
+ end #Ngs
205
+ end #Bio
206
+
207
+ is possible to use specify in the class
208
+ use_aliases
209
+ if you want to give a priority to short notation or if your program has only the short notation but you want to extend the task with the long one as well.
210
+ We defined a new property for add_option called `:collapse => true` is used only with `use_aliases` and it collapse the passed parameter to the short notation. An example coming from _fastx.rb_ wrapper, _note last row_:
211
+
212
+ module Bio
213
+ module Ngs
214
+ module Fastx
215
+ class Trim
216
+ include Bio::Command::Wrapper
217
+ set_program Bio::Ngs::Utils.binary("fastq_quality_trimmer")
218
+ use_aliases
219
+ add_option :min_size, :type=>:numeric, :default=>20, :aliases => "-l", :desc=>"Minimum length - sequences shorter than this (after trimming)
220
+ will be discarded. Default = 0 = no minimum length."
221
+ add_option :min_quality, :type=>:numeric, :default=>10, :aliases => "-t", :desc=>"Quality threshold - nucleotides with lower
222
+ quality will be trimmed (from the end of the sequence)."
223
+ add_option :output, :type=>:string, :aliases => "-o", :desc => "FASTQ output file.", :collapse=>true
224
+ add_option :input, :type=>:string, :aliases => "-i", :desc => "FASTQ input file.", :collapse=>true
225
+ add_option :gzip, :type => :boolean, :aliases => "-z", :desc => "Compress output with GZIP."
226
+ add_option :verbose, :type => :boolean, :aliases => "-v", :desc => "[-v] = Verbose - report number of sequences.
227
+ If [-o] is specified, report will be printed to STDOUT.
228
+ If [-o] is not specified (and output goes to STDOUT),
229
+ report will be printed to STDERR."
230
+ add_option :quality_type, :type=>:numeric, :default => 33, :aliases => "-Q", :desc=>"Quality of fastq file"
231
+ end
232
+ end
233
+ end
234
+ end
235
+
236
+ fastq_quality_trimmer accepts only short notation options and we need to pass an input file, but for some reason popen used internally doesn't work properly with the standard behavior so using `:collapse=>true` the application will be called:
237
+
238
+ fastq_quality_trimmer -t 20 -t 10 -Q 33 -iinput_file_name.fastq -ooutput_file_name.fastq_trim
239
+
240
+ running the program by hand form the command line using a space as separator after `-i` and `-o` works as expected. `:collapse` is a work around for this problem.
241
+
242
+
243
+
244
+ In case you program work like git which has a main program and the `sub_programs` for each feature you can use specify the sub program name with
245
+
246
+ set_sub_program "sub_name"
247
+
248
+ The wrapper will run the command composing:
249
+
250
+ set_program set_sub_program options arguments
251
+
252
+ A practical example of this behavior is samtools which has multiple sub programs view, merge, sort, ....
253
+ SamTools is a particular case because in biongs we are using bio-samtools a binding with FFI and the wrapper because the merge function was too complicated for the binding or at least we do not spent enough time on it, so we make the wrapping for this functionality.
254
+
255
+ This step is very similar to define a Thor task, add_option is grabbed/inspired from Thor.
256
+ Then you can user this binary also from a bioruby script just calling:
257
+
258
+ tophat = Bio::Ngs::Tophat.new
259
+ tophat.params = {"mate-inner-dist"=>dist, "output-dir"=>outputdir, "num-threads"=>1, "solexa1.3-quals"=>true}
260
+
261
+ __very important__: _you can pass parameters that have a name which has been previously declared in the Tophat's class. if you want to pass not declared parameters/options please use arguments._
262
+
263
+ tophat.run :arguments=>[index, "#{fastqs}" ]
264
+
265
+ ### Define the Task
266
+ With our new wrapper, let's define a Thor task on the fly
267
+
268
+ class MyTasks < Thor
269
+ desc "tophat DIST INDEX OUTPUTDIR FASTQS", "run tophat as from command line, default 6 processors"
270
+ Bio::Ngs::Tophat.new.thor_task(self, :tophat) do |wrapper, task, dist, index, outputdir, fastqs|
271
+ wrapper.params = {"mate-inner-dist"=>dist, "output-dir"=>outputdir, "num-threads"=>1, "solexa1.3-quals"=>true}
272
+ wrapper.run :arguments=>[index, "#{fastqs}" ], :separator=>"="
273
+ #you tasks here
274
+ end
275
+ end
276
+
277
+ Now is you list the tasks with `thor -T` you will see the new task.
278
+
279
+ You can create a new wrapper and configure it and run it from inside a Thor's tasks, like in `biongs quality:boxplot`
280
+
281
+ desc "boxplot FASTQ_QUALITY_STATS", "plot reads quality as boxplot"
282
+ method_option :title, :type=>:string, :aliases =>"-t", :desc => "Title (usually the solexa file name) - will be plotted on the graph."
283
+ method_option :output, :type=>:string, :aliases =>"-o", :desc => "Output file name. default is input file_name with .txt."
284
+ def boxplot(fastq_quality_stats)
285
+ output_file = options.output || "#{fastq_quality_stats}.png"
286
+ boxplot = Bio::Ngs::Fastx::ReadsBoxPlot.new
287
+ boxplot.params={input:fastq_quality_stats, output:output_file}
288
+ boxplot.run
289
+ end
290
+
291
+ ### Override the run command when the binary dosen't behave normally
292
+ module Bio
293
+ module Ngs
294
+ module Samtools
295
+ class View
296
+ include Bio::Command::Wrapper
297
+ set_program Bio::Ngs::Utils.binary("samtools")
298
+ add_option "output", :type => :string, :aliases => '-o'
299
+
300
+ alias :original_run :run
301
+ def run(opts = {:options=>{}, :arguments=>[], :output_file=>nil, :separator=>"="})
302
+ opts[:arguments].insert(0,"view")
303
+ opts[:arguments].insert(1,"-b")
304
+ opts[:arguments].insert(2,"-o")
305
+ original_run(opts)
306
+ end
307
+ end #View
308
+ end #Samtools
309
+ end #Ngs
310
+ end #Bio
311
+
312
+ #### Disable binary check at load time
313
+ When a wrapping is defined BioNGS verify that the program is installed on the local system, if it is not it thrown an warning message and the task is disabled by default. This check is made for each binary wrapped, so it could takes long the first time you load BioNGS.
314
+ To skip this check the user can define an environment variable assigning one of these terms "true yes ok 1" to BIONGS_SKIP_CHECK_BINARIES
315
+
316
+ export BIONGS_SKIP_CHECK_BINARIES=true
317
+
318
+ you can also add this setting to the .bashrc or .profile in the user home directory.
319
+
320
+ ## Features
321
+ ### Iterators for output files
322
+
323
+ Example CuffDiff. In this class is possible to define an iterator for a specific set of output files: genes, isoforms, tss_groups, cds.
324
+ To activate the iterator is just a matter of call a class method in the class definition
325
+
326
+ class Bio::Ngs::Cufflinks::Diff
327
+ #... all the previous definitions
328
+ #define iterators
329
+ add_iterator_for :genes
330
+ add_iterator_for :isoforms
331
+ add_iterator_for :cds
332
+ add_iterator_for :tss_groups
333
+ end
334
+
335
+ This is an example of CuffDiff, parsing `genes.fpkm_tracking` file:
336
+
337
+ Bio::Ngs::Cufflinks::Diff.foreach_gene_tracked("path_to_cuffdiff_output_directory") do |gene_fpkm_track|
338
+ expression_profile = (1..7).map do |sample_idx|
339
+ gene_fpkm_track["q#{sample_idx}_FPKM"].to_f
340
+ end
341
+
342
+ #do your stuff accessing this tabular file with gene_fpkm_track["name of the field"]
343
+ end
344
+
345
+ In this case internally CSV library has been used to parse in an easy way the file, there is a lack of performances with huge files, gaining in flexibility.
346
+
347
+ ## Loading or Not tasks from outside
348
+ If in your external library or binary you define LoadBaseTasks in Bio::Ngs (as a costant) requiring `'bio-ngs'` bio-ngs's tasks will not load but only the libraries.
349
+
350
+ module Bio
351
+ module Ngs
352
+ LoadBaseTasks = true
353
+ end
354
+ end
355
+
356
+ This is something useful if you want to develop a separate binary which uses bio-ngs librariys.
357
+ Is not yet possible to define a list of desired tasks to load.
358
+
359
+ ### Notes
360
+ * It's possible to add more sugar and we are working hard on it
361
+ * aliases are not well supported at this time. ToDo
362
+
363
+ # TODO
364
+ * Write Tutorial for Wrapper & Pipes
365
+ * Write Tutorial for handling Illumina/Fastq.gz with BioNGS Bio::Ngs::Illumina::FastqGz
366
+ * Report the version of every software installed/used from bio-ngs
367
+ * Develop fastq quality reports with RibuVis ?
368
+ * Write documentation
369
+ * DONE: Wrapper: better support for aliases and Wrapper#params
370
+ * Convert: re factor code to use ::Daemons
371
+ * DONE:misk_tasks? Extract genes/regions of interest from a bam file and create a smaller bam
372
+ * BRANCH:misk_tasks Explore possibility to user DelayedJobs
373
+ * biongs ann:ensembl:gtf:features:categorize GTF GTF categorize also by chromosome not only by BioType
374
+ * configuration file input,output, experimental design
375
+ * DONE: include fastx toolkit, download and compile
376
+ * ANSWER: how to put in background tasks that can be run in parallel? Use Parallel (see code for quality:illumina_project_stats)
377
+ * is it possible to establish a relation between input data and output data ? like fastq task_selected output/s
378
+ * add description for developers on howto include news external tool with versions.yaml
379
+
380
+ # ChangeLog
381
+ * 2011.05-26: Bump to version 0.2.0 Complete support for installing fastx and possibly other downloadable tool, inside the gem
382
+ * 2011-05-25: Bump to version 0.1.0 Update Cufflinks toolkit 1.0.2. Added initial support to fastx tool kit (binaries not included)
383
+ * 2011-04-08: Tasks for filtering Ensembl annotation and create classifications. (misk_tasks branch)
384
+
385
+
386
+ # Contributing to bio-ngs
387
+
388
+ Please do not hesitate to contact us:
389
+
390
+ Raoul J.P. Bonnal, <http://github.com/helios>, r -at- bioruby -dot- org
391
+ Francesco Strozzi, <http://github.com/fstrozzi>
392
+
393
+ Post issues on <https://github.com/helios/bioruby-ngs/issues>
394
+
395
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
396
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
397
+ * [Fork](https://github.com/helios/bioruby-ngs/fork_select) the project
398
+ * Start a feature/bugfix branch
399
+ * Commit and push until you are happy with your contribution
400
+ * [Pull request](https://github.com/helios/bioruby-ngs/pull/new/master) to BioNGS
401
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
402
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
403
+
404
+ ## Copyright
405
+
406
+ Copyright (c) 2011 Francesco Strozzi and Raoul J.P. Bonnal. See LICENSE.txt for
407
+ further details.
408
+