bio-ngs 0.4.6.alpha.01 → 0.4.6.alpha.02

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. data/Gemfile +4 -2
  2. data/Gemfile.lock +21 -21
  3. data/README.rdoc +51 -4
  4. data/VERSION +1 -1
  5. data/bin/biongs +1 -0
  6. data/bio-ngs.gemspec +36 -8
  7. data/features/cufflinks_gtf_parser.feature +22 -0
  8. data/features/cufflinks_gtf_parser_indexing.feature +20 -0
  9. data/features/step_definitions/cufflinks_gtf.rb +30 -0
  10. data/features/step_definitions/cufflinks_gtf_parser_indexing.rb +53 -0
  11. data/features/support/env.rb +2 -0
  12. data/lib/bio-ngs.rb +19 -5
  13. data/lib/bio/appl/ngs/cufflinks.rb +447 -281
  14. data/lib/bio/appl/ngs/cufflinks/gtf/gtf.rb +23 -0
  15. data/lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb +248 -0
  16. data/lib/bio/appl/ngs/cufflinks/gtf/transcript.rb +154 -0
  17. data/lib/bio/ngs/fs.rb +46 -0
  18. data/lib/bio/ngs/illumina/fastq.rb +176 -0
  19. data/lib/bio/ngs/illumina/illumina.rb +64 -0
  20. data/lib/bio/ngs/illumina/project.rb +81 -0
  21. data/lib/bio/ngs/illumina/sample.rb +85 -0
  22. data/lib/bio/ngs/task.rb +1 -1
  23. data/lib/bio/ngs/utils.rb +124 -112
  24. data/lib/meta.rb +162 -0
  25. data/lib/tasks/convert.thor +14 -14
  26. data/lib/tasks/filter.thor +158 -23
  27. data/lib/tasks/quality.thor +24 -4
  28. data/lib/tasks/rna.thor +26 -0
  29. data/lib/wrapper.rb +28 -0
  30. data/spec/bio/ngs/fs_spec.rb +70 -0
  31. data/spec/bio/ngs/illumina/fastq_spec.rb +52 -0
  32. data/spec/bio/ngs/illumina/illumina_spec.rb +21 -0
  33. data/spec/bio/ngs/illumina/project_spec.rb +0 -0
  34. data/spec/bio/ngs/illumina/sample_spec.rb +0 -0
  35. data/spec/bio/ngs/illumina/samples_spec.rb +0 -0
  36. data/spec/filter_spec.rb +25 -0
  37. data/spec/fixture/table_filter_list.txt +3 -0
  38. data/spec/fixture/table_filter_list_first_column.txt +2 -0
  39. data/spec/fixture/table_filter_source.tsv +44 -0
  40. data/spec/fixture/test-filtered-reference.fastq.gz +0 -0
  41. data/spec/fixture/test-merged-reference.fastq.gz +0 -0
  42. data/spec/fixture/test.fastq.gz +0 -0
  43. data/spec/meta_spec.rb +117 -0
  44. data/spec/spec_helper.rb +1 -1
  45. metadata +97 -69
data/Gemfile CHANGED
@@ -5,7 +5,8 @@ source "http://rubygems.org"
5
5
 
6
6
  gem "bio", ">= 1.4.2"
7
7
  gem "bio-samtools", ">= 0.3.2"
8
- gem "thor", ">= 0.14.6"
8
+ # gem "thor", path:"/Users/bonnalraoul/Documents/Develop/thor" #, ">= 0.14.6"
9
+ gem "thor", "= 0.14.6"
9
10
  gem "rubyvis", ">= 0.5.0"
10
11
  gem "daemons", ">= 1.1.0"
11
12
  gem "ruby-ensembl-api", ">= 1.0.1"
@@ -25,7 +26,8 @@ group :development do
25
26
  gem "jeweler", "~> 1.8.3"
26
27
  gem "rcov", "~> 0.9.11"
27
28
  gem "bio", ">= 1.4.2"
28
- gem "thor", ">= 0.14.6"
29
+ # gem "thor", path:"/Users/bonnalraoul/Documents/Develop/thor" #, ">= 0.14.6"
30
+ gem "thor", "= 0.14.6"
29
31
  gem "ffi", ">= 1.0.6"
30
32
  gem "rubyvis", ">= 0.5.0"
31
33
  gem "rspec", ">= 2.5.0"
@@ -1,15 +1,15 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- activemodel (3.2.2)
5
- activesupport (= 3.2.2)
4
+ activemodel (3.2.3)
5
+ activesupport (= 3.2.3)
6
6
  builder (~> 3.0.0)
7
- activerecord (3.2.2)
8
- activemodel (= 3.2.2)
9
- activesupport (= 3.2.2)
7
+ activerecord (3.2.3)
8
+ activemodel (= 3.2.3)
9
+ activesupport (= 3.2.3)
10
10
  arel (~> 3.0.2)
11
11
  tzinfo (~> 0.3.29)
12
- activesupport (3.2.2)
12
+ activesupport (3.2.3)
13
13
  i18n (~> 0.6)
14
14
  multi_json (~> 1.0)
15
15
  arel (3.0.2)
@@ -17,9 +17,9 @@ GEM
17
17
  bio-blastxmlparser (1.0.1)
18
18
  bio-logger (>= 1.0.0)
19
19
  nokogiri (>= 1.5.0)
20
- bio-logger (1.0.0)
20
+ bio-logger (1.0.1)
21
21
  log4r (>= 1.1.9)
22
- bio-samtools (0.5.2)
22
+ bio-samtools (0.5.3)
23
23
  bio (>= 1.4.2)
24
24
  ffi
25
25
  builder (3.0.0)
@@ -33,24 +33,24 @@ GEM
33
33
  git (>= 1.2.5)
34
34
  rake
35
35
  rdoc
36
- json (1.6.5)
36
+ json (1.7.3)
37
37
  log4r (1.1.10)
38
- multi_json (1.1.0)
38
+ multi_json (1.3.5)
39
39
  nokogiri (1.5.2)
40
40
  parallel (0.5.16)
41
- progressbar (0.10.0)
41
+ progressbar (0.11.0)
42
42
  rake (0.9.2.2)
43
43
  rcov (0.9.11)
44
44
  rdoc (3.12)
45
45
  json (~> 1.4)
46
- rspec (2.9.0)
47
- rspec-core (~> 2.9.0)
48
- rspec-expectations (~> 2.9.0)
49
- rspec-mocks (~> 2.9.0)
50
- rspec-core (2.9.0)
51
- rspec-expectations (2.9.0)
46
+ rspec (2.10.0)
47
+ rspec-core (~> 2.10.0)
48
+ rspec-expectations (~> 2.10.0)
49
+ rspec-mocks (~> 2.10.0)
50
+ rspec-core (2.10.0)
51
+ rspec-expectations (2.10.0)
52
52
  diff-lcs (~> 1.1.3)
53
- rspec-mocks (2.9.0)
53
+ rspec-mocks (2.10.1)
54
54
  ruby-ensembl-api (1.0.1)
55
55
  activerecord
56
56
  bio (>= 1)
@@ -60,9 +60,9 @@ GEM
60
60
  shoulda-matchers (~> 1.0.0)
61
61
  shoulda-context (1.0.0)
62
62
  shoulda-matchers (1.0.0)
63
- sqlite3 (1.3.5)
63
+ sqlite3 (1.3.6)
64
64
  thor (0.14.6)
65
- tzinfo (0.3.32)
65
+ tzinfo (0.3.33)
66
66
 
67
67
  PLATFORMS
68
68
  ruby
@@ -86,4 +86,4 @@ DEPENDENCIES
86
86
  rubyvis (>= 0.5.0)
87
87
  shoulda
88
88
  sqlite3 (>= 1.3.3)
89
- thor (>= 0.14.6)
89
+ thor (= 0.14.6)
@@ -35,7 +35,7 @@ Most of this tasks create sub-processes to speed up conversions
35
35
  * biongs convert:qseq:fastq:samples_by_lane SAMPLES LANE OUTPUT # Convert the qseqs for each sample in a specific lane. SAMPLES is an array of index codes separated by commas lane is an integer
36
36
 
37
37
  === filter
38
- * biongs filter:by_list QUANTIFICATION LIST # Extract from QUANTIFICATION rows with a key in LIST
38
+ * biongs filter:by_list TABLE LIST # Extract from TABLE rows with a key in LIST
39
39
 
40
40
  === quality
41
41
 
@@ -43,6 +43,7 @@ Most of this tasks create sub-processes to speed up conversions
43
43
  * biongs quality:fastq_stats FASTQ # Reports quality of FASTQ file
44
44
  * biongs quality:illumina_b_profile_raw FASTQ --read-length=N # perform a profile for reads coming fom Illumina 1.5+ and write the report in a txt file
45
45
  * biongs quality:illumina_b_profile_svg FASTQ --read-length=N # perform a profile for reads coming fom Illumina 1.5+
46
+ * biongs quality:illumina_project_stats # Reports quality of FASTQ files in an Illumina project directory
46
47
  * biongs quality:reads FASTQ # perform quality check for NGS reads
47
48
  * biongs quality:reads_coverage FASTQ_QUALITY_STATS # plot reads coverage in bases
48
49
  * biongs quality:trim FASTQ # trim all the sequences
@@ -65,8 +66,34 @@ Most of this tasks create sub-processes to speed up conversions
65
66
 
66
67
  == TasksExamples
67
68
 
69
+ === Conversion
68
70
  biongs convert:bam:extract_genes your_original.bam BLID,GATA3,PTPRC --ensembl_release=61 --ensembl_specie=homo_sapiens
69
71
 
72
+ === Filtering
73
+ When you have your mapped reads to a reference genome, you can decide to filter the output (GTF) to extract only those transcripts which have your desired requirements. You can filter for lenght, if it's multi or mono exon, the coverage, if it's a brand new transcript or an altrady annotated gene but with a new isoform or just the annotated transcripts.
74
+
75
+ Scenario: filtering transcripts
76
+ Having a transcripts.gtf dataset generated from CufflinksQuantification
77
+ I want a only the new transcripts (also with an annotated gene)
78
+ Which are multi exons
79
+ With a lenght greater than 1340
80
+ With minimum coverage greater than 10
81
+ Then I want to save them in my_filtered_data.gtf
82
+
83
+ biongs filter:cufflinks:transcripts your_original.gtf -m -l 1340 -c 10.0 -n -o my_filtered_data.gtf
84
+
85
+ Then in some case I need to extract only some of them or maybe parsing them from external programs. Biongs has a specific trask for this:
86
+
87
+ Having my_filtered_data.gtf
88
+ Generated by "filtering transcripts"
89
+ I want to extract transcript number 10
90
+ Then I want to save it in BED format
91
+ Using UCSC notation
92
+
93
+ biongs filter:cufflinks:tra_at_idx my_filtered_data.gtf #of_the_transcript_to_retrieve -u
94
+
95
+ The first time tra_at_idx is used, it will take more time than usual becase it creates an internal index: a simple HASH mashalled and dumped, stored in a file with the name similar to the imput with an idx as postfix.
96
+
70
97
 
71
98
  = ForDevelopers
72
99
 
@@ -197,6 +224,12 @@ You can create a new wrapper and configure it and run it from inside a Thor's ta
197
224
  end #Ngs
198
225
  end #Bio
199
226
 
227
+ ==== Disable binary check at load time
228
+ When a wrapping is defined BioNGS verify that the program is installed on the local system, if it is not it thrown an warning message and the task is disabled by default. This check is made for each binary wrapped, so it could takes long the first time you load BioNGS.
229
+ To skip this check the user can define an environment variable assigning one of these terms "true yes ok 1" to BIONGS_SKIP_CHECK_BINARIES
230
+ export BIONGS_SKIP_CHECK_BINARIES=true
231
+ you can also add this setting to the .bashrc or .profile in the user home directory.
232
+
200
233
  == Features
201
234
  === Iterators for output files
202
235
 
@@ -223,17 +256,31 @@ This is an example of CuffDiff, parsing genes.fpkm_tracking file:
223
256
 
224
257
  In this case internally CSV library has been used to parse in an easy way the file, there is a lack of performances with huge files, gaining in flexibility.
225
258
 
259
+ == Loading or Not tasks from outside
260
+ If in your external library or binary you define LoadBaseTasks in Bio::Ngs (as a costant) requiring 'bio-ngs' bio-ngs's tasks will not load but only the libraries.
261
+ module Bio
262
+ module Ngs
263
+ LoadBaseTasks = true
264
+ end
265
+ end
266
+
267
+ This is something useful if you want to develop a separate binary which uses bio-ngs librariys.
268
+ Is not yet possible to define a list of desired tasks to load.
269
+
226
270
  === Notes
227
271
  * It's possible to add more sugar and we are working hard on it
228
272
  * aliases are not well supported at this time. ToDo
229
273
 
230
274
  = REQUIREMENTS
231
275
  * http://hannonlab.cshl.edu/fastx_toolkit/ (the gem tries to install this tool by itself)
232
- * http://www.gnuplot.info/
276
+ * http://www.gnuplot.info/ tested on version 4.6
277
+ * libxslt1-dev
233
278
 
234
279
  Pleas follow the instruction for your own distribution/operating system
235
280
 
236
281
  = TODO
282
+ * Write Tutorial for Wrapper & Pipes
283
+ * Write Tutorial for handling Illumina/Fastq.gz with BioNGS Bio::Ngs::Illumina::FastqGz
237
284
  * Report the version of every software installed/used from bio-ngs
238
285
  * Develop fastq quality reports with RibuVis ?
239
286
  * Write documentation
@@ -243,8 +290,8 @@ Pleas follow the instruction for your own distribution/operating system
243
290
  * BRANCH:misk_tasks Explore possibility to user DelayedJobs
244
291
  * biongs ann:ensembl:gtf:features:categorize GTF GTF categorize also by chromosome not only by BioType
245
292
  * configuration file input,output, experimental design
246
- * include fastx toolkit, download and compile
247
- * how to put in background tasks that can be run in parallel
293
+ * DONE: include fastx toolkit, download and compile
294
+ * ANSWER: how to put in background tasks that can be run in parallel? Use Parallel (see code for quality:illumina_project_stats)
248
295
  * is it possible to establish a relation between input data and output data ? like fastq task_selected output/s
249
296
  * add description for developers on howto include news external tool with versions.yaml
250
297
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.6.alpha.01
1
+ 0.4.6.alpha.02
data/bin/biongs CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  module Bio
4
4
  module Ngs
5
+ LoadBaseTasks = true
5
6
 
6
7
  path = File.expand_path(File.dirname(__FILE__))
7
8
  $:<<File.join(File.dirname(path),"lib")
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new("> 1.3.1") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Francesco Strozzi", "Raoul J.P. Bonnal"]
12
- s.date = "2012-03-21"
12
+ s.date = "2012-05-18"
13
13
  s.description = "bio-ngs provides a framework for handling NGS data with BioRuby"
14
14
  s.email = "francesco.strozzi@gmail.com"
15
15
  s.executables = ["biongs"]
@@ -29,6 +29,11 @@ Gem::Specification.new do |s|
29
29
  "bin/biongs",
30
30
  "bio-ngs.gemspec",
31
31
  "ext/mkrf_conf.rb",
32
+ "features/cufflinks_gtf_parser.feature",
33
+ "features/cufflinks_gtf_parser_indexing.feature",
34
+ "features/step_definitions/cufflinks_gtf.rb",
35
+ "features/step_definitions/cufflinks_gtf_parser_indexing.rb",
36
+ "features/support/env.rb",
32
37
  "lib/bio-ngs.rb",
33
38
  "lib/bio/appl/ngs/bcftools.rb",
34
39
  "lib/bio/appl/ngs/bcl2qseq.rb",
@@ -36,6 +41,9 @@ Gem::Specification.new do |s|
36
41
  "lib/bio/appl/ngs/bowtie-inspect.rb",
37
42
  "lib/bio/appl/ngs/bwa.rb",
38
43
  "lib/bio/appl/ngs/cufflinks.rb",
44
+ "lib/bio/appl/ngs/cufflinks/gtf/gtf.rb",
45
+ "lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb",
46
+ "lib/bio/appl/ngs/cufflinks/gtf/transcript.rb",
39
47
  "lib/bio/appl/ngs/cufflinks/iterators.rb",
40
48
  "lib/bio/appl/ngs/fastx.rb",
41
49
  "lib/bio/appl/ngs/samtools.rb",
@@ -57,8 +65,13 @@ Gem::Specification.new do |s|
57
65
  "lib/bio/ngs/ext/bin/linux/samtools",
58
66
  "lib/bio/ngs/ext/bin/osx/samtools",
59
67
  "lib/bio/ngs/ext/versions.yaml",
68
+ "lib/bio/ngs/fs.rb",
60
69
  "lib/bio/ngs/graphics.rb",
61
70
  "lib/bio/ngs/homology.rb",
71
+ "lib/bio/ngs/illumina/fastq.rb",
72
+ "lib/bio/ngs/illumina/illumina.rb",
73
+ "lib/bio/ngs/illumina/project.rb",
74
+ "lib/bio/ngs/illumina/sample.rb",
62
75
  "lib/bio/ngs/ontology.rb",
63
76
  "lib/bio/ngs/quality.rb",
64
77
  "lib/bio/ngs/record.rb",
@@ -66,6 +79,7 @@ Gem::Specification.new do |s|
66
79
  "lib/bio/ngs/utils.rb",
67
80
  "lib/development_tasks.rb",
68
81
  "lib/enumerable.rb",
82
+ "lib/meta.rb",
69
83
  "lib/tasks/bwa.thor",
70
84
  "lib/tasks/convert.thor",
71
85
  "lib/tasks/filter.thor",
@@ -80,8 +94,22 @@ Gem::Specification.new do |s|
80
94
  "lib/templates/README.tt",
81
95
  "lib/templates/db.tt",
82
96
  "lib/wrapper.rb",
97
+ "spec/bio/ngs/fs_spec.rb",
98
+ "spec/bio/ngs/illumina/fastq_spec.rb",
99
+ "spec/bio/ngs/illumina/illumina_spec.rb",
100
+ "spec/bio/ngs/illumina/project_spec.rb",
101
+ "spec/bio/ngs/illumina/sample_spec.rb",
102
+ "spec/bio/ngs/illumina/samples_spec.rb",
83
103
  "spec/converter_qseq_spec.rb",
104
+ "spec/filter_spec.rb",
84
105
  "spec/fixture/s_1_1_1108_qseq.txt",
106
+ "spec/fixture/table_filter_list.txt",
107
+ "spec/fixture/table_filter_list_first_column.txt",
108
+ "spec/fixture/table_filter_source.tsv",
109
+ "spec/fixture/test-filtered-reference.fastq.gz",
110
+ "spec/fixture/test-merged-reference.fastq.gz",
111
+ "spec/fixture/test.fastq.gz",
112
+ "spec/meta_spec.rb",
85
113
  "spec/quality_spec.rb",
86
114
  "spec/sff_extract_spec.rb",
87
115
  "spec/spec_helper.rb",
@@ -103,7 +131,7 @@ Gem::Specification.new do |s|
103
131
  s.homepage = "http://github.com/helios/bioruby-ngs"
104
132
  s.licenses = ["MIT"]
105
133
  s.require_paths = ["lib"]
106
- s.rubygems_version = "1.8.10"
134
+ s.rubygems_version = "1.8.15"
107
135
  s.summary = "bio-ngs provides a framework for handling NGS data with BioRuby"
108
136
 
109
137
  if s.respond_to? :specification_version then
@@ -112,7 +140,7 @@ Gem::Specification.new do |s|
112
140
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
113
141
  s.add_runtime_dependency(%q<bio>, [">= 1.4.2"])
114
142
  s.add_runtime_dependency(%q<bio-samtools>, [">= 0.3.2"])
115
- s.add_runtime_dependency(%q<thor>, [">= 0.14.6"])
143
+ s.add_runtime_dependency(%q<thor>, ["= 0.14.6"])
116
144
  s.add_runtime_dependency(%q<rubyvis>, [">= 0.5.0"])
117
145
  s.add_runtime_dependency(%q<daemons>, [">= 1.1.0"])
118
146
  s.add_runtime_dependency(%q<ruby-ensembl-api>, [">= 1.0.1"])
@@ -128,7 +156,7 @@ Gem::Specification.new do |s|
128
156
  s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
129
157
  s.add_development_dependency(%q<rcov>, ["~> 0.9.11"])
130
158
  s.add_development_dependency(%q<bio>, [">= 1.4.2"])
131
- s.add_development_dependency(%q<thor>, [">= 0.14.6"])
159
+ s.add_development_dependency(%q<thor>, ["= 0.14.6"])
132
160
  s.add_development_dependency(%q<ffi>, [">= 1.0.6"])
133
161
  s.add_development_dependency(%q<rubyvis>, [">= 0.5.0"])
134
162
  s.add_development_dependency(%q<rspec>, [">= 2.5.0"])
@@ -145,7 +173,7 @@ Gem::Specification.new do |s|
145
173
  else
146
174
  s.add_dependency(%q<bio>, [">= 1.4.2"])
147
175
  s.add_dependency(%q<bio-samtools>, [">= 0.3.2"])
148
- s.add_dependency(%q<thor>, [">= 0.14.6"])
176
+ s.add_dependency(%q<thor>, ["= 0.14.6"])
149
177
  s.add_dependency(%q<rubyvis>, [">= 0.5.0"])
150
178
  s.add_dependency(%q<daemons>, [">= 1.1.0"])
151
179
  s.add_dependency(%q<ruby-ensembl-api>, [">= 1.0.1"])
@@ -161,7 +189,7 @@ Gem::Specification.new do |s|
161
189
  s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
162
190
  s.add_dependency(%q<rcov>, ["~> 0.9.11"])
163
191
  s.add_dependency(%q<bio>, [">= 1.4.2"])
164
- s.add_dependency(%q<thor>, [">= 0.14.6"])
192
+ s.add_dependency(%q<thor>, ["= 0.14.6"])
165
193
  s.add_dependency(%q<ffi>, [">= 1.0.6"])
166
194
  s.add_dependency(%q<rubyvis>, [">= 0.5.0"])
167
195
  s.add_dependency(%q<rspec>, [">= 2.5.0"])
@@ -179,7 +207,7 @@ Gem::Specification.new do |s|
179
207
  else
180
208
  s.add_dependency(%q<bio>, [">= 1.4.2"])
181
209
  s.add_dependency(%q<bio-samtools>, [">= 0.3.2"])
182
- s.add_dependency(%q<thor>, [">= 0.14.6"])
210
+ s.add_dependency(%q<thor>, ["= 0.14.6"])
183
211
  s.add_dependency(%q<rubyvis>, [">= 0.5.0"])
184
212
  s.add_dependency(%q<daemons>, [">= 1.1.0"])
185
213
  s.add_dependency(%q<ruby-ensembl-api>, [">= 1.0.1"])
@@ -195,7 +223,7 @@ Gem::Specification.new do |s|
195
223
  s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
196
224
  s.add_dependency(%q<rcov>, ["~> 0.9.11"])
197
225
  s.add_dependency(%q<bio>, [">= 1.4.2"])
198
- s.add_dependency(%q<thor>, [">= 0.14.6"])
226
+ s.add_dependency(%q<thor>, ["= 0.14.6"])
199
227
  s.add_dependency(%q<ffi>, [">= 1.0.6"])
200
228
  s.add_dependency(%q<rubyvis>, [">= 0.5.0"])
201
229
  s.add_dependency(%q<rspec>, [">= 2.5.0"])
@@ -0,0 +1,22 @@
1
+ Feature: Manipulate Cufflinks GTF files
2
+ In order to extract data and informtion plus new insights from RNASeq data
3
+ As a bioinformatician
4
+ I want to parse transcript.gtf file created by Cufflinks after Quantification analysis
5
+
6
+ Scenario: iterating over Cufflinks GTF transcripts
7
+ Given the file "transcripts.gtf" from quantification analysis
8
+ Then I want to print "each_transcript" on stdout
9
+
10
+ Scenario: count new isoforms
11
+ Given the file "transcripts.gtf" from quantification analysis
12
+ Then I want to "count" the "brand_new_isoforms"
13
+
14
+ Scenario: counts new isoforms for many quantifications
15
+ Given a list of parameters "-b -m -l 200 -c 3.0 -x -d"
16
+ Then I want to "count" the "brand_new_isoforms" in each subdirectory
17
+
18
+
19
+ Scenario: Save each transcript in single files
20
+ Given the file "transcripts.gtf" from quantification analysis
21
+ And a list of parameters "-b -m -l 200 -c 3.0"
22
+ Then I want to save "each_trasncript" in single files formatted in "bed" format
@@ -0,0 +1,20 @@
1
+ Feature: Indexing Cufflinks GTF features
2
+ In order to extract features quickly
3
+ As a bioinformatician
4
+ I want to use an index for random access
5
+
6
+ Scenario: build a GTF index
7
+ Given the file "transcripts.gtf" from quantification analysis
8
+ Then I want to build its index
9
+ And save it as "transcripts.gtf.idx"
10
+
11
+ Scenario: extract n-th feature from a GTF
12
+ Given the file "transcripts.gtf" from quantification analysis
13
+ Then I want to extract feature number "2"
14
+ And I want to extract feature number "5"
15
+ And I want to extract feature number "15"
16
+
17
+ Scenario: extract multiple features from a GTF
18
+ Given the file "transcripts.gtf" from quantification analysis
19
+ And a range from "2" to "11"
20
+ Then I want to obtain a bed file for each position in the range
@@ -0,0 +1,30 @@
1
+
2
+ Given /^the file "(.*?)" from quantification analysis$/ do |gtf_fn|
3
+ # pending # express the regexp above with the code you wish you had
4
+ @gtf_fn_ap = File.absolute_path File.join("spec/fixture/",gtf_fn)
5
+ File.exists?(File.join("spec/fixture/",gtf_fn)).should be true
6
+ @gtf = Bio::Ngs::Cufflinks::Gtf.new(@gtf_fn_ap)
7
+ @gtf.should_not be nil
8
+ end
9
+
10
+ Then /^I want to print "(.*?)" on stdout$/ do |each_method|
11
+ str=@gtf.send each_method do |transcript|
12
+ break(transcript.to_s)
13
+ end
14
+ #pending # express the regexp above with the code you wish you had
15
+ puts str
16
+ str.should =~ //
17
+ end
18
+
19
+ Then /^I want to "(.*?)" the "(.*?)"$/ do |operation, subset|
20
+ @gtf.send(subset).count.should == 17
21
+ end
22
+
23
+
24
+ Given /^a list of parameters "(.*?)"$/ do |arg1|
25
+ pending # express the regexp above with the code you wish you had
26
+ end
27
+
28
+ Then /^I want to "(.*?)" the "(.*?)" in each subdirectory$/ do |arg1, arg2|
29
+ pending # express the regexp above with the code you wish you had
30
+ end
@@ -0,0 +1,53 @@
1
+ Given /^a range from "(.*?)" to "(.*?)"$/ do |start, stop|
2
+ #pending # express the regexp above with the code you wish you had
3
+ @range = (start.to_i..stop.to_i)
4
+ end
5
+
6
+ Then /^I want to build its index$/ do
7
+ @gtf.build_idx.should == {:transcripts=>[801, 604, 425, 425, 857, 610, 610, 607, 607, 616, 622, 809, 1003, 821, 429, 412, 1023, 610, 607, 1003, 616, 607, 404, 404, 2973, 3219, 3420, 3219, 813, 1213, 1213, 1231, 613, 408, 408, 814, 1014, 626, 814, 408, 436, 610, 662, 1016, 816, 613, 2227, 616, 616, 616, 420, 643, 7588, 813, 814, 1014, 814, 614, 614, 3018, 2013, 814, 613, 865, 433, 433, 433, 643, 619, 3454, 3454, 2043, 1237, 628, 1225, 1023, 1243, 1023, 1225, 410, 410, 1662, 1238, 1004, 1023, 2642, 2237, 2439, 2033, 2439, 2236, 834, 2280, 2642, 821, 1832, 2236, 2439, 2236, 2439, 2033, 1629, 1225, 1427, 1225, 1225, 821, 1427, 412, 412, 611, 412, 412, 412, 414, 1678, 1629, 1630, 822, 620, 1225, 1267, 821, 426, 821, 626, 1831, 821, 629, 412, 418, 1831, 1629, 1670, 1427, 1694, 1427, 1831, 1630, 1831, 620, 1629, 1629, 1225, 1023, 1427, 1427, 1427, 822, 1225, 1225, 822, 620, 818, 817, 617, 4063, 3860, 3657, 3300, 1023], :exons=>[]}
8
+ end
9
+
10
+ Then /^save it as "(.*?)"$/ do |gtf_index_filename|
11
+ @gtf_fn_idx = File.absolute_path File.join("spec/fixture/","#{gtf_index_filename}")
12
+ @gtf.dump_idx
13
+ File.exists?(@gtf_fn_idx).should == true
14
+ end
15
+
16
+ Then /^I want to extract feature number "(.*?)"$/ do |index_to_get|
17
+ tow = <<DATA
18
+ 1 Cufflinks transcript 35245 36073 1 - . gene_id "CUFF.1"; transcript_id "ENST00000461467"; FPKM "0.0000000000"; frac "0.000000"; conf_lo "0.000000"; conf_hi "0.000000"; cov "0.000000"; full_read_support "no";
19
+ 1 Cufflinks exon 35245 35481 1 - . gene_id "CUFF.1"; transcript_id "ENST00000461467"; exon_number "1"; FPKM "0.0000000000"; frac "0.000000"; conf_lo "0.000000"; conf_hi "0.000000"; cov "0.000000";
20
+ 1 Cufflinks exon 35721 36073 1 - . gene_id "CUFF.1"; transcript_id "ENST00000461467"; exon_number "2"; FPKM "0.0000000000"; frac "0.000000"; conf_lo "0.000000"; conf_hi "0.000000"; cov "0.000000";
21
+ DATA
22
+
23
+ five = <<DATA
24
+ 1 Cufflinks transcript 521369 523833 1000 - . gene_id "ENSG00000231709"; transcript_id "ENST00000417636"; FPKM "0.0050659782"; frac "1.000000"; conf_lo "0.000000"; conf_hi "0.116517"; cov "0.009533"; full_read_support "no";
25
+ 1 Cufflinks exon 521369 521738 1000 - . gene_id "ENSG00000231709"; transcript_id "ENST00000417636"; exon_number "1"; FPKM "0.0050659782"; frac "1.000000"; conf_lo "0.000000"; conf_hi "0.116517"; cov "0.009533";
26
+ 1 Cufflinks exon 522201 522335 1000 - . gene_id "ENSG00000231709"; transcript_id "ENST00000417636"; exon_number "2"; FPKM "0.0050659782"; frac "1.000000"; conf_lo "0.000000"; conf_hi "0.116517"; cov "0.009533";
27
+ 1 Cufflinks exon 523497 523833 1000 - . gene_id "ENSG00000231709"; transcript_id "ENST00000417636"; exon_number "3"; FPKM "0.0050659782"; frac "1.000000"; conf_lo "0.000000"; conf_hi "0.116517"; cov "0.009533";
28
+ DATA
29
+
30
+ fifteen = <<DATA
31
+ 1 Cufflinks transcript 808847 808957 1 - . gene_id "ENSG00000221146"; transcript_id "ENST00000408219"; FPKM "0.0000000000"; frac "0.000000"; conf_lo "0.000000"; conf_hi "0.000000"; cov "0.000000"; full_read_support "no";
32
+ 1 Cufflinks exon 808847 808957 1 - . gene_id "ENSG00000221146"; transcript_id "ENST00000408219"; exon_number "1"; FPKM "0.0000000000"; frac "0.000000"; conf_lo "0.000000"; conf_hi "0.000000"; cov "0.000000";
33
+ DATA
34
+
35
+ pre_defined_transcripts={"2"=>tow,"5"=>five, "15"=> fifteen}
36
+ @gtf[index_to_get.to_i].to_s.tr('"','\"').should == pre_defined_transcripts[index_to_get]
37
+ end
38
+
39
+ Then /^I want to obtain a bed file for each position in the range$/ do
40
+ str = @range.map do |index|
41
+ capture(:stdout) do
42
+ Thor::Runner.start (["filter:cufflinks:tra_at_idx",@gtf_fn_ap,index])
43
+ end
44
+ end
45
+ str.each do |file|
46
+ FileUtils.rm(file.chop)
47
+ end
48
+ str.should == ["CUFF.1-ENST00000461467.bed\n", "ENSG00000240361-ENST00000492842.bed\n",
49
+ "ENSG00000186092-ENST00000335137.bed\n", "ENSG00000231709-ENST00000417636.bed\n",
50
+ "CUFF.2-ENST00000423796.bed\n", "CUFF.2-ENST00000450696.bed\n",
51
+ "CUFF.2-TCONS_00000124.bed\n", "CUFF.2-TCONS_00000125.bed\n",
52
+ "CUFF.3-TCONS_00000796.bed\n", "XLOC_000669-TCONS_00001368.bed\n"]
53
+ end