bio-ngs 0.4.6.alpha.01 → 0.4.6.alpha.02
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -2
- data/Gemfile.lock +21 -21
- data/README.rdoc +51 -4
- data/VERSION +1 -1
- data/bin/biongs +1 -0
- data/bio-ngs.gemspec +36 -8
- data/features/cufflinks_gtf_parser.feature +22 -0
- data/features/cufflinks_gtf_parser_indexing.feature +20 -0
- data/features/step_definitions/cufflinks_gtf.rb +30 -0
- data/features/step_definitions/cufflinks_gtf_parser_indexing.rb +53 -0
- data/features/support/env.rb +2 -0
- data/lib/bio-ngs.rb +19 -5
- data/lib/bio/appl/ngs/cufflinks.rb +447 -281
- data/lib/bio/appl/ngs/cufflinks/gtf/gtf.rb +23 -0
- data/lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb +248 -0
- data/lib/bio/appl/ngs/cufflinks/gtf/transcript.rb +154 -0
- data/lib/bio/ngs/fs.rb +46 -0
- data/lib/bio/ngs/illumina/fastq.rb +176 -0
- data/lib/bio/ngs/illumina/illumina.rb +64 -0
- data/lib/bio/ngs/illumina/project.rb +81 -0
- data/lib/bio/ngs/illumina/sample.rb +85 -0
- data/lib/bio/ngs/task.rb +1 -1
- data/lib/bio/ngs/utils.rb +124 -112
- data/lib/meta.rb +162 -0
- data/lib/tasks/convert.thor +14 -14
- data/lib/tasks/filter.thor +158 -23
- data/lib/tasks/quality.thor +24 -4
- data/lib/tasks/rna.thor +26 -0
- data/lib/wrapper.rb +28 -0
- data/spec/bio/ngs/fs_spec.rb +70 -0
- data/spec/bio/ngs/illumina/fastq_spec.rb +52 -0
- data/spec/bio/ngs/illumina/illumina_spec.rb +21 -0
- data/spec/bio/ngs/illumina/project_spec.rb +0 -0
- data/spec/bio/ngs/illumina/sample_spec.rb +0 -0
- data/spec/bio/ngs/illumina/samples_spec.rb +0 -0
- data/spec/filter_spec.rb +25 -0
- data/spec/fixture/table_filter_list.txt +3 -0
- data/spec/fixture/table_filter_list_first_column.txt +2 -0
- data/spec/fixture/table_filter_source.tsv +44 -0
- data/spec/fixture/test-filtered-reference.fastq.gz +0 -0
- data/spec/fixture/test-merged-reference.fastq.gz +0 -0
- data/spec/fixture/test.fastq.gz +0 -0
- data/spec/meta_spec.rb +117 -0
- data/spec/spec_helper.rb +1 -1
- metadata +97 -69
data/Gemfile
CHANGED
@@ -5,7 +5,8 @@ source "http://rubygems.org"
|
|
5
5
|
|
6
6
|
gem "bio", ">= 1.4.2"
|
7
7
|
gem "bio-samtools", ">= 0.3.2"
|
8
|
-
gem "thor", ">= 0.14.6"
|
8
|
+
# gem "thor", path:"/Users/bonnalraoul/Documents/Develop/thor" #, ">= 0.14.6"
|
9
|
+
gem "thor", "= 0.14.6"
|
9
10
|
gem "rubyvis", ">= 0.5.0"
|
10
11
|
gem "daemons", ">= 1.1.0"
|
11
12
|
gem "ruby-ensembl-api", ">= 1.0.1"
|
@@ -25,7 +26,8 @@ group :development do
|
|
25
26
|
gem "jeweler", "~> 1.8.3"
|
26
27
|
gem "rcov", "~> 0.9.11"
|
27
28
|
gem "bio", ">= 1.4.2"
|
28
|
-
gem "thor", ">= 0.14.6"
|
29
|
+
# gem "thor", path:"/Users/bonnalraoul/Documents/Develop/thor" #, ">= 0.14.6"
|
30
|
+
gem "thor", "= 0.14.6"
|
29
31
|
gem "ffi", ">= 1.0.6"
|
30
32
|
gem "rubyvis", ">= 0.5.0"
|
31
33
|
gem "rspec", ">= 2.5.0"
|
data/Gemfile.lock
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
-
activemodel (3.2.
|
5
|
-
activesupport (= 3.2.
|
4
|
+
activemodel (3.2.3)
|
5
|
+
activesupport (= 3.2.3)
|
6
6
|
builder (~> 3.0.0)
|
7
|
-
activerecord (3.2.
|
8
|
-
activemodel (= 3.2.
|
9
|
-
activesupport (= 3.2.
|
7
|
+
activerecord (3.2.3)
|
8
|
+
activemodel (= 3.2.3)
|
9
|
+
activesupport (= 3.2.3)
|
10
10
|
arel (~> 3.0.2)
|
11
11
|
tzinfo (~> 0.3.29)
|
12
|
-
activesupport (3.2.
|
12
|
+
activesupport (3.2.3)
|
13
13
|
i18n (~> 0.6)
|
14
14
|
multi_json (~> 1.0)
|
15
15
|
arel (3.0.2)
|
@@ -17,9 +17,9 @@ GEM
|
|
17
17
|
bio-blastxmlparser (1.0.1)
|
18
18
|
bio-logger (>= 1.0.0)
|
19
19
|
nokogiri (>= 1.5.0)
|
20
|
-
bio-logger (1.0.
|
20
|
+
bio-logger (1.0.1)
|
21
21
|
log4r (>= 1.1.9)
|
22
|
-
bio-samtools (0.5.
|
22
|
+
bio-samtools (0.5.3)
|
23
23
|
bio (>= 1.4.2)
|
24
24
|
ffi
|
25
25
|
builder (3.0.0)
|
@@ -33,24 +33,24 @@ GEM
|
|
33
33
|
git (>= 1.2.5)
|
34
34
|
rake
|
35
35
|
rdoc
|
36
|
-
json (1.
|
36
|
+
json (1.7.3)
|
37
37
|
log4r (1.1.10)
|
38
|
-
multi_json (1.
|
38
|
+
multi_json (1.3.5)
|
39
39
|
nokogiri (1.5.2)
|
40
40
|
parallel (0.5.16)
|
41
|
-
progressbar (0.
|
41
|
+
progressbar (0.11.0)
|
42
42
|
rake (0.9.2.2)
|
43
43
|
rcov (0.9.11)
|
44
44
|
rdoc (3.12)
|
45
45
|
json (~> 1.4)
|
46
|
-
rspec (2.
|
47
|
-
rspec-core (~> 2.
|
48
|
-
rspec-expectations (~> 2.
|
49
|
-
rspec-mocks (~> 2.
|
50
|
-
rspec-core (2.
|
51
|
-
rspec-expectations (2.
|
46
|
+
rspec (2.10.0)
|
47
|
+
rspec-core (~> 2.10.0)
|
48
|
+
rspec-expectations (~> 2.10.0)
|
49
|
+
rspec-mocks (~> 2.10.0)
|
50
|
+
rspec-core (2.10.0)
|
51
|
+
rspec-expectations (2.10.0)
|
52
52
|
diff-lcs (~> 1.1.3)
|
53
|
-
rspec-mocks (2.
|
53
|
+
rspec-mocks (2.10.1)
|
54
54
|
ruby-ensembl-api (1.0.1)
|
55
55
|
activerecord
|
56
56
|
bio (>= 1)
|
@@ -60,9 +60,9 @@ GEM
|
|
60
60
|
shoulda-matchers (~> 1.0.0)
|
61
61
|
shoulda-context (1.0.0)
|
62
62
|
shoulda-matchers (1.0.0)
|
63
|
-
sqlite3 (1.3.
|
63
|
+
sqlite3 (1.3.6)
|
64
64
|
thor (0.14.6)
|
65
|
-
tzinfo (0.3.
|
65
|
+
tzinfo (0.3.33)
|
66
66
|
|
67
67
|
PLATFORMS
|
68
68
|
ruby
|
@@ -86,4 +86,4 @@ DEPENDENCIES
|
|
86
86
|
rubyvis (>= 0.5.0)
|
87
87
|
shoulda
|
88
88
|
sqlite3 (>= 1.3.3)
|
89
|
-
thor (
|
89
|
+
thor (= 0.14.6)
|
data/README.rdoc
CHANGED
@@ -35,7 +35,7 @@ Most of this tasks create sub-processes to speed up conversions
|
|
35
35
|
* biongs convert:qseq:fastq:samples_by_lane SAMPLES LANE OUTPUT # Convert the qseqs for each sample in a specific lane. SAMPLES is an array of index codes separated by commas lane is an integer
|
36
36
|
|
37
37
|
=== filter
|
38
|
-
* biongs filter:by_list
|
38
|
+
* biongs filter:by_list TABLE LIST # Extract from TABLE rows with a key in LIST
|
39
39
|
|
40
40
|
=== quality
|
41
41
|
|
@@ -43,6 +43,7 @@ Most of this tasks create sub-processes to speed up conversions
|
|
43
43
|
* biongs quality:fastq_stats FASTQ # Reports quality of FASTQ file
|
44
44
|
* biongs quality:illumina_b_profile_raw FASTQ --read-length=N # perform a profile for reads coming fom Illumina 1.5+ and write the report in a txt file
|
45
45
|
* biongs quality:illumina_b_profile_svg FASTQ --read-length=N # perform a profile for reads coming fom Illumina 1.5+
|
46
|
+
* biongs quality:illumina_project_stats # Reports quality of FASTQ files in an Illumina project directory
|
46
47
|
* biongs quality:reads FASTQ # perform quality check for NGS reads
|
47
48
|
* biongs quality:reads_coverage FASTQ_QUALITY_STATS # plot reads coverage in bases
|
48
49
|
* biongs quality:trim FASTQ # trim all the sequences
|
@@ -65,8 +66,34 @@ Most of this tasks create sub-processes to speed up conversions
|
|
65
66
|
|
66
67
|
== TasksExamples
|
67
68
|
|
69
|
+
=== Conversion
|
68
70
|
biongs convert:bam:extract_genes your_original.bam BLID,GATA3,PTPRC --ensembl_release=61 --ensembl_specie=homo_sapiens
|
69
71
|
|
72
|
+
=== Filtering
|
73
|
+
When you have your mapped reads to a reference genome, you can decide to filter the output (GTF) to extract only those transcripts which have your desired requirements. You can filter for lenght, if it's multi or mono exon, the coverage, if it's a brand new transcript or an altrady annotated gene but with a new isoform or just the annotated transcripts.
|
74
|
+
|
75
|
+
Scenario: filtering transcripts
|
76
|
+
Having a transcripts.gtf dataset generated from CufflinksQuantification
|
77
|
+
I want a only the new transcripts (also with an annotated gene)
|
78
|
+
Which are multi exons
|
79
|
+
With a lenght greater than 1340
|
80
|
+
With minimum coverage greater than 10
|
81
|
+
Then I want to save them in my_filtered_data.gtf
|
82
|
+
|
83
|
+
biongs filter:cufflinks:transcripts your_original.gtf -m -l 1340 -c 10.0 -n -o my_filtered_data.gtf
|
84
|
+
|
85
|
+
Then in some case I need to extract only some of them or maybe parsing them from external programs. Biongs has a specific trask for this:
|
86
|
+
|
87
|
+
Having my_filtered_data.gtf
|
88
|
+
Generated by "filtering transcripts"
|
89
|
+
I want to extract transcript number 10
|
90
|
+
Then I want to save it in BED format
|
91
|
+
Using UCSC notation
|
92
|
+
|
93
|
+
biongs filter:cufflinks:tra_at_idx my_filtered_data.gtf #of_the_transcript_to_retrieve -u
|
94
|
+
|
95
|
+
The first time tra_at_idx is used, it will take more time than usual becase it creates an internal index: a simple HASH mashalled and dumped, stored in a file with the name similar to the imput with an idx as postfix.
|
96
|
+
|
70
97
|
|
71
98
|
= ForDevelopers
|
72
99
|
|
@@ -197,6 +224,12 @@ You can create a new wrapper and configure it and run it from inside a Thor's ta
|
|
197
224
|
end #Ngs
|
198
225
|
end #Bio
|
199
226
|
|
227
|
+
==== Disable binary check at load time
|
228
|
+
When a wrapping is defined BioNGS verify that the program is installed on the local system, if it is not it thrown an warning message and the task is disabled by default. This check is made for each binary wrapped, so it could takes long the first time you load BioNGS.
|
229
|
+
To skip this check the user can define an environment variable assigning one of these terms "true yes ok 1" to BIONGS_SKIP_CHECK_BINARIES
|
230
|
+
export BIONGS_SKIP_CHECK_BINARIES=true
|
231
|
+
you can also add this setting to the .bashrc or .profile in the user home directory.
|
232
|
+
|
200
233
|
== Features
|
201
234
|
=== Iterators for output files
|
202
235
|
|
@@ -223,17 +256,31 @@ This is an example of CuffDiff, parsing genes.fpkm_tracking file:
|
|
223
256
|
|
224
257
|
In this case internally CSV library has been used to parse in an easy way the file, there is a lack of performances with huge files, gaining in flexibility.
|
225
258
|
|
259
|
+
== Loading or Not tasks from outside
|
260
|
+
If in your external library or binary you define LoadBaseTasks in Bio::Ngs (as a costant) requiring 'bio-ngs' bio-ngs's tasks will not load but only the libraries.
|
261
|
+
module Bio
|
262
|
+
module Ngs
|
263
|
+
LoadBaseTasks = true
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
This is something useful if you want to develop a separate binary which uses bio-ngs librariys.
|
268
|
+
Is not yet possible to define a list of desired tasks to load.
|
269
|
+
|
226
270
|
=== Notes
|
227
271
|
* It's possible to add more sugar and we are working hard on it
|
228
272
|
* aliases are not well supported at this time. ToDo
|
229
273
|
|
230
274
|
= REQUIREMENTS
|
231
275
|
* http://hannonlab.cshl.edu/fastx_toolkit/ (the gem tries to install this tool by itself)
|
232
|
-
* http://www.gnuplot.info/
|
276
|
+
* http://www.gnuplot.info/ tested on version 4.6
|
277
|
+
* libxslt1-dev
|
233
278
|
|
234
279
|
Pleas follow the instruction for your own distribution/operating system
|
235
280
|
|
236
281
|
= TODO
|
282
|
+
* Write Tutorial for Wrapper & Pipes
|
283
|
+
* Write Tutorial for handling Illumina/Fastq.gz with BioNGS Bio::Ngs::Illumina::FastqGz
|
237
284
|
* Report the version of every software installed/used from bio-ngs
|
238
285
|
* Develop fastq quality reports with RibuVis ?
|
239
286
|
* Write documentation
|
@@ -243,8 +290,8 @@ Pleas follow the instruction for your own distribution/operating system
|
|
243
290
|
* BRANCH:misk_tasks Explore possibility to user DelayedJobs
|
244
291
|
* biongs ann:ensembl:gtf:features:categorize GTF GTF categorize also by chromosome not only by BioType
|
245
292
|
* configuration file input,output, experimental design
|
246
|
-
* include fastx toolkit, download and compile
|
247
|
-
* how to put in background tasks that can be run in parallel
|
293
|
+
* DONE: include fastx toolkit, download and compile
|
294
|
+
* ANSWER: how to put in background tasks that can be run in parallel? Use Parallel (see code for quality:illumina_project_stats)
|
248
295
|
* is it possible to establish a relation between input data and output data ? like fastq task_selected output/s
|
249
296
|
* add description for developers on howto include news external tool with versions.yaml
|
250
297
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.6.alpha.
|
1
|
+
0.4.6.alpha.02
|
data/bin/biongs
CHANGED
data/bio-ngs.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
|
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new("> 1.3.1") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Francesco Strozzi", "Raoul J.P. Bonnal"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-05-18"
|
13
13
|
s.description = "bio-ngs provides a framework for handling NGS data with BioRuby"
|
14
14
|
s.email = "francesco.strozzi@gmail.com"
|
15
15
|
s.executables = ["biongs"]
|
@@ -29,6 +29,11 @@ Gem::Specification.new do |s|
|
|
29
29
|
"bin/biongs",
|
30
30
|
"bio-ngs.gemspec",
|
31
31
|
"ext/mkrf_conf.rb",
|
32
|
+
"features/cufflinks_gtf_parser.feature",
|
33
|
+
"features/cufflinks_gtf_parser_indexing.feature",
|
34
|
+
"features/step_definitions/cufflinks_gtf.rb",
|
35
|
+
"features/step_definitions/cufflinks_gtf_parser_indexing.rb",
|
36
|
+
"features/support/env.rb",
|
32
37
|
"lib/bio-ngs.rb",
|
33
38
|
"lib/bio/appl/ngs/bcftools.rb",
|
34
39
|
"lib/bio/appl/ngs/bcl2qseq.rb",
|
@@ -36,6 +41,9 @@ Gem::Specification.new do |s|
|
|
36
41
|
"lib/bio/appl/ngs/bowtie-inspect.rb",
|
37
42
|
"lib/bio/appl/ngs/bwa.rb",
|
38
43
|
"lib/bio/appl/ngs/cufflinks.rb",
|
44
|
+
"lib/bio/appl/ngs/cufflinks/gtf/gtf.rb",
|
45
|
+
"lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb",
|
46
|
+
"lib/bio/appl/ngs/cufflinks/gtf/transcript.rb",
|
39
47
|
"lib/bio/appl/ngs/cufflinks/iterators.rb",
|
40
48
|
"lib/bio/appl/ngs/fastx.rb",
|
41
49
|
"lib/bio/appl/ngs/samtools.rb",
|
@@ -57,8 +65,13 @@ Gem::Specification.new do |s|
|
|
57
65
|
"lib/bio/ngs/ext/bin/linux/samtools",
|
58
66
|
"lib/bio/ngs/ext/bin/osx/samtools",
|
59
67
|
"lib/bio/ngs/ext/versions.yaml",
|
68
|
+
"lib/bio/ngs/fs.rb",
|
60
69
|
"lib/bio/ngs/graphics.rb",
|
61
70
|
"lib/bio/ngs/homology.rb",
|
71
|
+
"lib/bio/ngs/illumina/fastq.rb",
|
72
|
+
"lib/bio/ngs/illumina/illumina.rb",
|
73
|
+
"lib/bio/ngs/illumina/project.rb",
|
74
|
+
"lib/bio/ngs/illumina/sample.rb",
|
62
75
|
"lib/bio/ngs/ontology.rb",
|
63
76
|
"lib/bio/ngs/quality.rb",
|
64
77
|
"lib/bio/ngs/record.rb",
|
@@ -66,6 +79,7 @@ Gem::Specification.new do |s|
|
|
66
79
|
"lib/bio/ngs/utils.rb",
|
67
80
|
"lib/development_tasks.rb",
|
68
81
|
"lib/enumerable.rb",
|
82
|
+
"lib/meta.rb",
|
69
83
|
"lib/tasks/bwa.thor",
|
70
84
|
"lib/tasks/convert.thor",
|
71
85
|
"lib/tasks/filter.thor",
|
@@ -80,8 +94,22 @@ Gem::Specification.new do |s|
|
|
80
94
|
"lib/templates/README.tt",
|
81
95
|
"lib/templates/db.tt",
|
82
96
|
"lib/wrapper.rb",
|
97
|
+
"spec/bio/ngs/fs_spec.rb",
|
98
|
+
"spec/bio/ngs/illumina/fastq_spec.rb",
|
99
|
+
"spec/bio/ngs/illumina/illumina_spec.rb",
|
100
|
+
"spec/bio/ngs/illumina/project_spec.rb",
|
101
|
+
"spec/bio/ngs/illumina/sample_spec.rb",
|
102
|
+
"spec/bio/ngs/illumina/samples_spec.rb",
|
83
103
|
"spec/converter_qseq_spec.rb",
|
104
|
+
"spec/filter_spec.rb",
|
84
105
|
"spec/fixture/s_1_1_1108_qseq.txt",
|
106
|
+
"spec/fixture/table_filter_list.txt",
|
107
|
+
"spec/fixture/table_filter_list_first_column.txt",
|
108
|
+
"spec/fixture/table_filter_source.tsv",
|
109
|
+
"spec/fixture/test-filtered-reference.fastq.gz",
|
110
|
+
"spec/fixture/test-merged-reference.fastq.gz",
|
111
|
+
"spec/fixture/test.fastq.gz",
|
112
|
+
"spec/meta_spec.rb",
|
85
113
|
"spec/quality_spec.rb",
|
86
114
|
"spec/sff_extract_spec.rb",
|
87
115
|
"spec/spec_helper.rb",
|
@@ -103,7 +131,7 @@ Gem::Specification.new do |s|
|
|
103
131
|
s.homepage = "http://github.com/helios/bioruby-ngs"
|
104
132
|
s.licenses = ["MIT"]
|
105
133
|
s.require_paths = ["lib"]
|
106
|
-
s.rubygems_version = "1.8.
|
134
|
+
s.rubygems_version = "1.8.15"
|
107
135
|
s.summary = "bio-ngs provides a framework for handling NGS data with BioRuby"
|
108
136
|
|
109
137
|
if s.respond_to? :specification_version then
|
@@ -112,7 +140,7 @@ Gem::Specification.new do |s|
|
|
112
140
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
113
141
|
s.add_runtime_dependency(%q<bio>, [">= 1.4.2"])
|
114
142
|
s.add_runtime_dependency(%q<bio-samtools>, [">= 0.3.2"])
|
115
|
-
s.add_runtime_dependency(%q<thor>, ["
|
143
|
+
s.add_runtime_dependency(%q<thor>, ["= 0.14.6"])
|
116
144
|
s.add_runtime_dependency(%q<rubyvis>, [">= 0.5.0"])
|
117
145
|
s.add_runtime_dependency(%q<daemons>, [">= 1.1.0"])
|
118
146
|
s.add_runtime_dependency(%q<ruby-ensembl-api>, [">= 1.0.1"])
|
@@ -128,7 +156,7 @@ Gem::Specification.new do |s|
|
|
128
156
|
s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
|
129
157
|
s.add_development_dependency(%q<rcov>, ["~> 0.9.11"])
|
130
158
|
s.add_development_dependency(%q<bio>, [">= 1.4.2"])
|
131
|
-
s.add_development_dependency(%q<thor>, ["
|
159
|
+
s.add_development_dependency(%q<thor>, ["= 0.14.6"])
|
132
160
|
s.add_development_dependency(%q<ffi>, [">= 1.0.6"])
|
133
161
|
s.add_development_dependency(%q<rubyvis>, [">= 0.5.0"])
|
134
162
|
s.add_development_dependency(%q<rspec>, [">= 2.5.0"])
|
@@ -145,7 +173,7 @@ Gem::Specification.new do |s|
|
|
145
173
|
else
|
146
174
|
s.add_dependency(%q<bio>, [">= 1.4.2"])
|
147
175
|
s.add_dependency(%q<bio-samtools>, [">= 0.3.2"])
|
148
|
-
s.add_dependency(%q<thor>, ["
|
176
|
+
s.add_dependency(%q<thor>, ["= 0.14.6"])
|
149
177
|
s.add_dependency(%q<rubyvis>, [">= 0.5.0"])
|
150
178
|
s.add_dependency(%q<daemons>, [">= 1.1.0"])
|
151
179
|
s.add_dependency(%q<ruby-ensembl-api>, [">= 1.0.1"])
|
@@ -161,7 +189,7 @@ Gem::Specification.new do |s|
|
|
161
189
|
s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
|
162
190
|
s.add_dependency(%q<rcov>, ["~> 0.9.11"])
|
163
191
|
s.add_dependency(%q<bio>, [">= 1.4.2"])
|
164
|
-
s.add_dependency(%q<thor>, ["
|
192
|
+
s.add_dependency(%q<thor>, ["= 0.14.6"])
|
165
193
|
s.add_dependency(%q<ffi>, [">= 1.0.6"])
|
166
194
|
s.add_dependency(%q<rubyvis>, [">= 0.5.0"])
|
167
195
|
s.add_dependency(%q<rspec>, [">= 2.5.0"])
|
@@ -179,7 +207,7 @@ Gem::Specification.new do |s|
|
|
179
207
|
else
|
180
208
|
s.add_dependency(%q<bio>, [">= 1.4.2"])
|
181
209
|
s.add_dependency(%q<bio-samtools>, [">= 0.3.2"])
|
182
|
-
s.add_dependency(%q<thor>, ["
|
210
|
+
s.add_dependency(%q<thor>, ["= 0.14.6"])
|
183
211
|
s.add_dependency(%q<rubyvis>, [">= 0.5.0"])
|
184
212
|
s.add_dependency(%q<daemons>, [">= 1.1.0"])
|
185
213
|
s.add_dependency(%q<ruby-ensembl-api>, [">= 1.0.1"])
|
@@ -195,7 +223,7 @@ Gem::Specification.new do |s|
|
|
195
223
|
s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
|
196
224
|
s.add_dependency(%q<rcov>, ["~> 0.9.11"])
|
197
225
|
s.add_dependency(%q<bio>, [">= 1.4.2"])
|
198
|
-
s.add_dependency(%q<thor>, ["
|
226
|
+
s.add_dependency(%q<thor>, ["= 0.14.6"])
|
199
227
|
s.add_dependency(%q<ffi>, [">= 1.0.6"])
|
200
228
|
s.add_dependency(%q<rubyvis>, [">= 0.5.0"])
|
201
229
|
s.add_dependency(%q<rspec>, [">= 2.5.0"])
|
@@ -0,0 +1,22 @@
|
|
1
|
+
Feature: Manipulate Cufflinks GTF files
|
2
|
+
In order to extract data and informtion plus new insights from RNASeq data
|
3
|
+
As a bioinformatician
|
4
|
+
I want to parse transcript.gtf file created by Cufflinks after Quantification analysis
|
5
|
+
|
6
|
+
Scenario: iterating over Cufflinks GTF transcripts
|
7
|
+
Given the file "transcripts.gtf" from quantification analysis
|
8
|
+
Then I want to print "each_transcript" on stdout
|
9
|
+
|
10
|
+
Scenario: count new isoforms
|
11
|
+
Given the file "transcripts.gtf" from quantification analysis
|
12
|
+
Then I want to "count" the "brand_new_isoforms"
|
13
|
+
|
14
|
+
Scenario: counts new isoforms for many quantifications
|
15
|
+
Given a list of parameters "-b -m -l 200 -c 3.0 -x -d"
|
16
|
+
Then I want to "count" the "brand_new_isoforms" in each subdirectory
|
17
|
+
|
18
|
+
|
19
|
+
Scenario: Save each transcript in single files
|
20
|
+
Given the file "transcripts.gtf" from quantification analysis
|
21
|
+
And a list of parameters "-b -m -l 200 -c 3.0"
|
22
|
+
Then I want to save "each_trasncript" in single files formatted in "bed" format
|
@@ -0,0 +1,20 @@
|
|
1
|
+
Feature: Indexing Cufflinks GTF features
|
2
|
+
In order to extract features quickly
|
3
|
+
As a bioinformatician
|
4
|
+
I want to use an index for random access
|
5
|
+
|
6
|
+
Scenario: build a GTF index
|
7
|
+
Given the file "transcripts.gtf" from quantification analysis
|
8
|
+
Then I want to build its index
|
9
|
+
And save it as "transcripts.gtf.idx"
|
10
|
+
|
11
|
+
Scenario: extract n-th feature from a GTF
|
12
|
+
Given the file "transcripts.gtf" from quantification analysis
|
13
|
+
Then I want to extract feature number "2"
|
14
|
+
And I want to extract feature number "5"
|
15
|
+
And I want to extract feature number "15"
|
16
|
+
|
17
|
+
Scenario: extract multiple features from a GTF
|
18
|
+
Given the file "transcripts.gtf" from quantification analysis
|
19
|
+
And a range from "2" to "11"
|
20
|
+
Then I want to obtain a bed file for each position in the range
|
@@ -0,0 +1,30 @@
|
|
1
|
+
|
2
|
+
Given /^the file "(.*?)" from quantification analysis$/ do |gtf_fn|
|
3
|
+
# pending # express the regexp above with the code you wish you had
|
4
|
+
@gtf_fn_ap = File.absolute_path File.join("spec/fixture/",gtf_fn)
|
5
|
+
File.exists?(File.join("spec/fixture/",gtf_fn)).should be true
|
6
|
+
@gtf = Bio::Ngs::Cufflinks::Gtf.new(@gtf_fn_ap)
|
7
|
+
@gtf.should_not be nil
|
8
|
+
end
|
9
|
+
|
10
|
+
Then /^I want to print "(.*?)" on stdout$/ do |each_method|
|
11
|
+
str=@gtf.send each_method do |transcript|
|
12
|
+
break(transcript.to_s)
|
13
|
+
end
|
14
|
+
#pending # express the regexp above with the code you wish you had
|
15
|
+
puts str
|
16
|
+
str.should =~ //
|
17
|
+
end
|
18
|
+
|
19
|
+
Then /^I want to "(.*?)" the "(.*?)"$/ do |operation, subset|
|
20
|
+
@gtf.send(subset).count.should == 17
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
Given /^a list of parameters "(.*?)"$/ do |arg1|
|
25
|
+
pending # express the regexp above with the code you wish you had
|
26
|
+
end
|
27
|
+
|
28
|
+
Then /^I want to "(.*?)" the "(.*?)" in each subdirectory$/ do |arg1, arg2|
|
29
|
+
pending # express the regexp above with the code you wish you had
|
30
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
Given /^a range from "(.*?)" to "(.*?)"$/ do |start, stop|
|
2
|
+
#pending # express the regexp above with the code you wish you had
|
3
|
+
@range = (start.to_i..stop.to_i)
|
4
|
+
end
|
5
|
+
|
6
|
+
Then /^I want to build its index$/ do
|
7
|
+
@gtf.build_idx.should == {:transcripts=>[801, 604, 425, 425, 857, 610, 610, 607, 607, 616, 622, 809, 1003, 821, 429, 412, 1023, 610, 607, 1003, 616, 607, 404, 404, 2973, 3219, 3420, 3219, 813, 1213, 1213, 1231, 613, 408, 408, 814, 1014, 626, 814, 408, 436, 610, 662, 1016, 816, 613, 2227, 616, 616, 616, 420, 643, 7588, 813, 814, 1014, 814, 614, 614, 3018, 2013, 814, 613, 865, 433, 433, 433, 643, 619, 3454, 3454, 2043, 1237, 628, 1225, 1023, 1243, 1023, 1225, 410, 410, 1662, 1238, 1004, 1023, 2642, 2237, 2439, 2033, 2439, 2236, 834, 2280, 2642, 821, 1832, 2236, 2439, 2236, 2439, 2033, 1629, 1225, 1427, 1225, 1225, 821, 1427, 412, 412, 611, 412, 412, 412, 414, 1678, 1629, 1630, 822, 620, 1225, 1267, 821, 426, 821, 626, 1831, 821, 629, 412, 418, 1831, 1629, 1670, 1427, 1694, 1427, 1831, 1630, 1831, 620, 1629, 1629, 1225, 1023, 1427, 1427, 1427, 822, 1225, 1225, 822, 620, 818, 817, 617, 4063, 3860, 3657, 3300, 1023], :exons=>[]}
|
8
|
+
end
|
9
|
+
|
10
|
+
Then /^save it as "(.*?)"$/ do |gtf_index_filename|
|
11
|
+
@gtf_fn_idx = File.absolute_path File.join("spec/fixture/","#{gtf_index_filename}")
|
12
|
+
@gtf.dump_idx
|
13
|
+
File.exists?(@gtf_fn_idx).should == true
|
14
|
+
end
|
15
|
+
|
16
|
+
Then /^I want to extract feature number "(.*?)"$/ do |index_to_get|
|
17
|
+
tow = <<DATA
|
18
|
+
1 Cufflinks transcript 35245 36073 1 - . gene_id "CUFF.1"; transcript_id "ENST00000461467"; FPKM "0.0000000000"; frac "0.000000"; conf_lo "0.000000"; conf_hi "0.000000"; cov "0.000000"; full_read_support "no";
|
19
|
+
1 Cufflinks exon 35245 35481 1 - . gene_id "CUFF.1"; transcript_id "ENST00000461467"; exon_number "1"; FPKM "0.0000000000"; frac "0.000000"; conf_lo "0.000000"; conf_hi "0.000000"; cov "0.000000";
|
20
|
+
1 Cufflinks exon 35721 36073 1 - . gene_id "CUFF.1"; transcript_id "ENST00000461467"; exon_number "2"; FPKM "0.0000000000"; frac "0.000000"; conf_lo "0.000000"; conf_hi "0.000000"; cov "0.000000";
|
21
|
+
DATA
|
22
|
+
|
23
|
+
five = <<DATA
|
24
|
+
1 Cufflinks transcript 521369 523833 1000 - . gene_id "ENSG00000231709"; transcript_id "ENST00000417636"; FPKM "0.0050659782"; frac "1.000000"; conf_lo "0.000000"; conf_hi "0.116517"; cov "0.009533"; full_read_support "no";
|
25
|
+
1 Cufflinks exon 521369 521738 1000 - . gene_id "ENSG00000231709"; transcript_id "ENST00000417636"; exon_number "1"; FPKM "0.0050659782"; frac "1.000000"; conf_lo "0.000000"; conf_hi "0.116517"; cov "0.009533";
|
26
|
+
1 Cufflinks exon 522201 522335 1000 - . gene_id "ENSG00000231709"; transcript_id "ENST00000417636"; exon_number "2"; FPKM "0.0050659782"; frac "1.000000"; conf_lo "0.000000"; conf_hi "0.116517"; cov "0.009533";
|
27
|
+
1 Cufflinks exon 523497 523833 1000 - . gene_id "ENSG00000231709"; transcript_id "ENST00000417636"; exon_number "3"; FPKM "0.0050659782"; frac "1.000000"; conf_lo "0.000000"; conf_hi "0.116517"; cov "0.009533";
|
28
|
+
DATA
|
29
|
+
|
30
|
+
fifteen = <<DATA
|
31
|
+
1 Cufflinks transcript 808847 808957 1 - . gene_id "ENSG00000221146"; transcript_id "ENST00000408219"; FPKM "0.0000000000"; frac "0.000000"; conf_lo "0.000000"; conf_hi "0.000000"; cov "0.000000"; full_read_support "no";
|
32
|
+
1 Cufflinks exon 808847 808957 1 - . gene_id "ENSG00000221146"; transcript_id "ENST00000408219"; exon_number "1"; FPKM "0.0000000000"; frac "0.000000"; conf_lo "0.000000"; conf_hi "0.000000"; cov "0.000000";
|
33
|
+
DATA
|
34
|
+
|
35
|
+
pre_defined_transcripts={"2"=>tow,"5"=>five, "15"=> fifteen}
|
36
|
+
@gtf[index_to_get.to_i].to_s.tr('"','\"').should == pre_defined_transcripts[index_to_get]
|
37
|
+
end
|
38
|
+
|
39
|
+
Then /^I want to obtain a bed file for each position in the range$/ do
|
40
|
+
str = @range.map do |index|
|
41
|
+
capture(:stdout) do
|
42
|
+
Thor::Runner.start (["filter:cufflinks:tra_at_idx",@gtf_fn_ap,index])
|
43
|
+
end
|
44
|
+
end
|
45
|
+
str.each do |file|
|
46
|
+
FileUtils.rm(file.chop)
|
47
|
+
end
|
48
|
+
str.should == ["CUFF.1-ENST00000461467.bed\n", "ENSG00000240361-ENST00000492842.bed\n",
|
49
|
+
"ENSG00000186092-ENST00000335137.bed\n", "ENSG00000231709-ENST00000417636.bed\n",
|
50
|
+
"CUFF.2-ENST00000423796.bed\n", "CUFF.2-ENST00000450696.bed\n",
|
51
|
+
"CUFF.2-TCONS_00000124.bed\n", "CUFF.2-TCONS_00000125.bed\n",
|
52
|
+
"CUFF.3-TCONS_00000796.bed\n", "XLOC_000669-TCONS_00001368.bed\n"]
|
53
|
+
end
|