full_lengther_next 0.0.8 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/History.txt +2 -2
- data/Manifest.txt +33 -18
- data/Rakefile +4 -2
- data/bin/download_fln_dbs.rb +310 -158
- data/bin/full_lengther_next +160 -103
- data/bin/make_test_dataset.rb +236 -0
- data/bin/make_user_db.rb +101 -117
- data/bin/plot_fln.rb +270 -0
- data/bin/plot_taxonomy.rb +70 -0
- data/lib/expresscanvas.zip +0 -0
- data/lib/full_lengther_next.rb +3 -3
- data/lib/full_lengther_next/classes/artifacts.rb +66 -0
- data/lib/full_lengther_next/classes/blast_functions.rb +326 -0
- data/lib/full_lengther_next/classes/cdhit.rb +154 -0
- data/lib/full_lengther_next/classes/chimeric_seqs.rb +315 -57
- data/lib/full_lengther_next/classes/common_functions.rb +105 -63
- data/lib/full_lengther_next/classes/exonerate_result.rb +258 -0
- data/lib/full_lengther_next/classes/fl_analysis.rb +226 -617
- data/lib/full_lengther_next/classes/fl_string_utils.rb +4 -2
- data/lib/full_lengther_next/classes/fln_stats.rb +598 -557
- data/lib/full_lengther_next/classes/handle_db.rb +30 -0
- data/lib/full_lengther_next/classes/my_worker.rb +308 -138
- data/lib/full_lengther_next/classes/my_worker_EST.rb +54 -0
- data/lib/full_lengther_next/classes/my_worker_manager_EST.rb +69 -0
- data/lib/full_lengther_next/classes/my_worker_manager_fln.rb +389 -0
- data/lib/full_lengther_next/classes/nc_rna.rb +5 -7
- data/lib/full_lengther_next/classes/reptrans.rb +210 -0
- data/lib/full_lengther_next/classes/sequence.rb +439 -80
- data/lib/full_lengther_next/classes/test_code.rb +15 -16
- data/lib/full_lengther_next/classes/types.rb +12 -0
- data/lib/full_lengther_next/classes/une_los_hit.rb +148 -230
- data/lib/full_lengther_next/classes/warnings.rb +40 -0
- metadata +207 -93
- data/lib/full_lengther_next/classes/lcs.rb +0 -33
- data/lib/full_lengther_next/classes/my_worker_manager.rb +0 -240
@@ -0,0 +1,40 @@
|
|
1
|
+
$warnings_hash = {}
|
2
|
+
|
3
|
+
#Chimeras
|
4
|
+
$warnings_hash['solved'] = 'SOLVED'
|
5
|
+
|
6
|
+
#Ncrna
|
7
|
+
$warnings_hash['<200nt'] = 'Sequence length < 200 nt'
|
8
|
+
|
9
|
+
#Artifacts
|
10
|
+
$warnings_hash['ERROR#1'] = 'ERROR#1, contains sense and antisense hits!!!, putative chimeric sequence, '
|
11
|
+
$warnings_hash['ERROR#2'] = 'ERROR#2, query has several hsps that overlaps on same protein zone'
|
12
|
+
$warnings_hash['ERROR#3'] = 'ERROR#3, Sequence has a lot of errors or reference protein is too different'
|
13
|
+
|
14
|
+
#FL_analisys
|
15
|
+
$warnings_hash['PositionResult'] = 'Selected protein is hit number (*replace*) in blast result, '
|
16
|
+
$warnings_hash['SingleResult'] = 'Selected protein is the only available option, '
|
17
|
+
$warnings_hash['noM1'] = 'W1: There is no M at the beginning, possible sequencing error, '
|
18
|
+
$warnings_hash['UnexpSTOP5p'] = ' Unexpected STOP codon in 5 prime region, '
|
19
|
+
$warnings_hash['NoStopMfar'] = 'No stop codon before M and M found is too far from subject M, '
|
20
|
+
$warnings_hash['noM2'] = 'W2: There is no M at the beginning, '
|
21
|
+
$warnings_hash['UnexpSTOP3p'] = ' Unexpected STOP codon at 3\' end. '
|
22
|
+
$warnings_hash['UnexpSTOP3pDist'] = ' Unexpected STOP codon at 3\' end. Distance to subject end: (*replace*) aas, '
|
23
|
+
$warnings_hash['DistSubj'] = 'Distance to subject end: (*replace*) aas, not enough to search stop codon'
|
24
|
+
$warnings_hash['qStopTooFar'] = 'query STOP codon too far from subject stop. Distance to subject end: (*replace*) aas, putative chimeric sequence,'
|
25
|
+
$warnings_hash['qStopFar'] = 'query STOP codon is far from subject stop. Distance to subject end: (*replace*) aas,'
|
26
|
+
$warnings_hash['noStopDist'] = ' STOP codon was not found. Distance to subject end: (*replace*) aas, '
|
27
|
+
$warnings_hash['UnexpStopBegSeq'] = 'Unexpected stop codon in the beginning of your sequence, '
|
28
|
+
$warnings_hash['SeqLonger'] = ' your sequence is longer than subject: (*replace*) - (*replace*)'
|
29
|
+
$warnings_hash['SeqShorter'] = 'your sequence is shorter than subject: (*replace*) - (*replace*)'
|
30
|
+
$warnings_hash['VeryShorter'] = '. Was predicted as Complete, but is very much shorter than subject'
|
31
|
+
$warnings_hash['NoStop'] = ' STOP codon was not found, '
|
32
|
+
$warnings_hash['ProtFusion'] = ' Possible protein fusion, '
|
33
|
+
$warnings_hash['QueryTooLong'] = ' Query too long, '
|
34
|
+
$warnings_hash['ExFrameS'] = ' Frameshift at (*replace*) (ex), '
|
35
|
+
|
36
|
+
#UneLosHit
|
37
|
+
$warnings_hash['OverlapHit'] = 'Overlapping hits, possible frame ERROR between (*replace*) and (*replace*), '
|
38
|
+
$warnings_hash['AndOverlapHit'] = ' and overlapping frame ERROR between (*replace*) and (*replace*), '
|
39
|
+
$warnings_hash['SeparatedHit'] = 'Separated hits, possible frame ERROR between (*replace*) and (*replace*), '
|
40
|
+
$warnings_hash['AndSeparatedHit'] = ' and possible frame ERROR between (*replace*) and (*replace*), '
|
metadata
CHANGED
@@ -1,152 +1,266 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: full_lengther_next
|
3
|
-
version: !ruby/object:Gem::Version
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.5.6
|
4
5
|
prerelease:
|
5
|
-
version: 0.0.8
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
8
|
-
- Noe Fernandez & Dario Guerrero
|
7
|
+
authors:
|
8
|
+
- ! 'Pedro Seoane & Noe Fernandez & Dario Guerrero '
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2015-12-07 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
16
15
|
name: xml-simple
|
17
|
-
|
18
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
19
17
|
none: false
|
20
|
-
requirements:
|
21
|
-
- -
|
22
|
-
- !ruby/object:Gem::Version
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
23
21
|
version: 1.0.12
|
24
22
|
type: :runtime
|
25
|
-
version_requirements: *id001
|
26
|
-
- !ruby/object:Gem::Dependency
|
27
|
-
name: scbi_blast
|
28
23
|
prerelease: false
|
29
|
-
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.0.12
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: scbi_blast
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
30
33
|
none: false
|
31
|
-
requirements:
|
32
|
-
- -
|
33
|
-
- !ruby/object:Gem::Version
|
34
|
-
version: 0.0.
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 0.0.32
|
35
38
|
type: :runtime
|
36
|
-
version_requirements: *id002
|
37
|
-
- !ruby/object:Gem::Dependency
|
38
|
-
name: scbi_mapreduce
|
39
39
|
prerelease: false
|
40
|
-
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 0.0.32
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: scbi_mapreduce
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
41
49
|
none: false
|
42
|
-
requirements:
|
43
|
-
- -
|
44
|
-
- !ruby/object:Gem::Version
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
45
53
|
version: 0.0.29
|
46
54
|
type: :runtime
|
47
|
-
|
48
|
-
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.0.29
|
62
|
+
- !ruby/object:Gem::Dependency
|
49
63
|
name: scbi_fasta
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 0.1.7
|
70
|
+
type: :runtime
|
50
71
|
prerelease: false
|
51
|
-
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
73
|
none: false
|
53
|
-
requirements:
|
54
|
-
- -
|
55
|
-
- !ruby/object:Gem::Version
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
56
77
|
version: 0.1.7
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: bio-cd-hit-report
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: 0.1.0
|
57
86
|
type: :runtime
|
58
|
-
version_requirements: *id004
|
59
|
-
- !ruby/object:Gem::Dependency
|
60
|
-
name: scbi_plot
|
61
87
|
prerelease: false
|
62
|
-
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
63
89
|
none: false
|
64
|
-
requirements:
|
65
|
-
- -
|
66
|
-
- !ruby/object:Gem::Version
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 0.1.0
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: bio
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: 1.4.3
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.4.3
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: scbi_plot
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
67
117
|
version: 0.0.6
|
68
118
|
type: :runtime
|
69
|
-
version_requirements: *id005
|
70
|
-
- !ruby/object:Gem::Dependency
|
71
|
-
name: hoe
|
72
119
|
prerelease: false
|
73
|
-
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
74
121
|
none: false
|
75
|
-
requirements:
|
76
|
-
- -
|
77
|
-
- !ruby/object:Gem::Version
|
78
|
-
version:
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: 0.0.6
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: rdoc
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ~>
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '3.10'
|
79
134
|
type: :development
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
135
|
+
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ~>
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '3.10'
|
142
|
+
- !ruby/object:Gem::Dependency
|
143
|
+
name: newgem
|
144
|
+
requirement: !ruby/object:Gem::Requirement
|
145
|
+
none: false
|
146
|
+
requirements:
|
147
|
+
- - ! '>='
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: 1.5.3
|
150
|
+
type: :development
|
151
|
+
prerelease: false
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
153
|
+
none: false
|
154
|
+
requirements:
|
155
|
+
- - ! '>='
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: 1.5.3
|
158
|
+
- !ruby/object:Gem::Dependency
|
159
|
+
name: hoe
|
160
|
+
requirement: !ruby/object:Gem::Requirement
|
161
|
+
none: false
|
162
|
+
requirements:
|
163
|
+
- - ~>
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '3.3'
|
166
|
+
type: :development
|
167
|
+
prerelease: false
|
168
|
+
version_requirements: !ruby/object:Gem::Requirement
|
169
|
+
none: false
|
170
|
+
requirements:
|
171
|
+
- - ~>
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '3.3'
|
174
|
+
description: FULL-LENGTHERNEXT is a tool adapted to NGS technologies, able to work
|
175
|
+
in parallel and in a distributed way to minimise computing time. It is able to classify
|
176
|
+
unigenes to full-length, 5’-end, 3’-end and internal, suggesting which unknown genes
|
177
|
+
are coding or not. It will be also shown that FULL-LENGTHERNEXT fixes frame shifts,
|
178
|
+
one of the main mistake found in wrong entries of full-length sequences databases,
|
179
|
+
and it is a fast tool to compare different transcriptome assemblies.
|
180
|
+
email:
|
181
|
+
- seoanezonjic@hotmail.com & noeisneo@gmail.com & dariogf@gmail.com
|
182
|
+
executables:
|
183
|
+
- plot_taxonomy.rb
|
184
|
+
- plot_fln.rb
|
85
185
|
- download_fln_dbs.rb
|
86
|
-
- make_user_db.rb
|
87
186
|
- full_lengther_next
|
187
|
+
- make_user_db.rb
|
188
|
+
- make_test_dataset.rb
|
88
189
|
extensions: []
|
89
|
-
|
90
|
-
|
190
|
+
extra_rdoc_files:
|
191
|
+
- PostInstall.txt
|
192
|
+
- README.rdoc
|
91
193
|
- History.txt
|
92
194
|
- Manifest.txt
|
93
|
-
|
94
|
-
|
195
|
+
files:
|
196
|
+
- Rakefile
|
197
|
+
- script/generate
|
198
|
+
- script/destroy
|
199
|
+
- script/console
|
200
|
+
- test/test_full_lengther_next.rb
|
201
|
+
- test/test_helper.rb
|
202
|
+
- bin/plot_taxonomy.rb
|
203
|
+
- bin/plot_fln.rb
|
95
204
|
- bin/download_fln_dbs.rb
|
96
|
-
- bin/make_user_db.rb
|
97
205
|
- bin/full_lengther_next
|
206
|
+
- bin/make_user_db.rb
|
207
|
+
- bin/make_test_dataset.rb
|
208
|
+
- PostInstall.txt
|
209
|
+
- README.rdoc
|
98
210
|
- History.txt
|
99
|
-
-
|
211
|
+
- Manifest.txt
|
212
|
+
- lib/full_lengther_next/classes/blast_functions.rb
|
213
|
+
- lib/full_lengther_next/classes/my_worker_manager_fln.rb
|
214
|
+
- lib/full_lengther_next/classes/types.rb
|
100
215
|
- lib/full_lengther_next/classes/chimeric_seqs.rb
|
216
|
+
- lib/full_lengther_next/classes/artifacts.rb
|
217
|
+
- lib/full_lengther_next/classes/cdhit.rb
|
101
218
|
- lib/full_lengther_next/classes/fl_analysis.rb
|
102
219
|
- lib/full_lengther_next/classes/fl_string_utils.rb
|
103
|
-
- lib/full_lengther_next/classes/fln_stats.rb
|
104
|
-
- lib/full_lengther_next/classes/lcs.rb
|
105
220
|
- lib/full_lengther_next/classes/my_worker.rb
|
106
|
-
- lib/full_lengther_next/classes/my_worker_manager.rb
|
107
|
-
- lib/full_lengther_next/classes/nc_rna.rb
|
108
|
-
- lib/full_lengther_next/classes/orf.rb
|
109
221
|
- lib/full_lengther_next/classes/sequence.rb
|
222
|
+
- lib/full_lengther_next/classes/my_worker_EST.rb
|
110
223
|
- lib/full_lengther_next/classes/test_code.rb
|
224
|
+
- lib/full_lengther_next/classes/orf.rb
|
111
225
|
- lib/full_lengther_next/classes/une_los_hit.rb
|
226
|
+
- lib/full_lengther_next/classes/warnings.rb
|
227
|
+
- lib/full_lengther_next/classes/fln_stats.rb
|
228
|
+
- lib/full_lengther_next/classes/my_worker_manager_EST.rb
|
229
|
+
- lib/full_lengther_next/classes/nc_rna.rb
|
230
|
+
- lib/full_lengther_next/classes/reptrans.rb
|
231
|
+
- lib/full_lengther_next/classes/common_functions.rb
|
232
|
+
- lib/full_lengther_next/classes/exonerate_result.rb
|
233
|
+
- lib/full_lengther_next/classes/handle_db.rb
|
112
234
|
- lib/full_lengther_next.rb
|
113
|
-
-
|
114
|
-
-
|
115
|
-
- Rakefile
|
116
|
-
- README.rdoc
|
117
|
-
- script/console
|
118
|
-
- script/destroy
|
119
|
-
- script/generate
|
120
|
-
- test/test_full_lengther_next.rb
|
121
|
-
- test/test_helper.rb
|
235
|
+
- lib/expresscanvas.zip
|
236
|
+
- .gemtest
|
122
237
|
homepage: http://www.scbi.uma.es/downloads
|
123
238
|
licenses: []
|
124
|
-
|
125
239
|
post_install_message: PostInstall.txt
|
126
|
-
rdoc_options:
|
240
|
+
rdoc_options:
|
127
241
|
- --main
|
128
242
|
- README.rdoc
|
129
|
-
require_paths:
|
243
|
+
require_paths:
|
130
244
|
- lib
|
131
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
245
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
132
246
|
none: false
|
133
|
-
requirements:
|
134
|
-
- -
|
135
|
-
- !ruby/object:Gem::Version
|
136
|
-
version:
|
137
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
247
|
+
requirements:
|
248
|
+
- - ! '>='
|
249
|
+
- !ruby/object:Gem::Version
|
250
|
+
version: '0'
|
251
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
138
252
|
none: false
|
139
|
-
requirements:
|
140
|
-
- -
|
141
|
-
- !ruby/object:Gem::Version
|
142
|
-
version:
|
253
|
+
requirements:
|
254
|
+
- - ! '>='
|
255
|
+
- !ruby/object:Gem::Version
|
256
|
+
version: '0'
|
143
257
|
requirements: []
|
144
|
-
|
145
258
|
rubyforge_project: full_lengther_next
|
146
|
-
rubygems_version: 1.8.
|
259
|
+
rubygems_version: 1.8.23
|
147
260
|
signing_key:
|
148
261
|
specification_version: 3
|
149
|
-
summary: FULL-LENGTHERNEXT is a tool adapted to NGS technologies, able to work in
|
150
|
-
|
262
|
+
summary: FULL-LENGTHERNEXT is a tool adapted to NGS technologies, able to work in
|
263
|
+
parallel and in a distributed way to minimise computing time
|
264
|
+
test_files:
|
151
265
|
- test/test_full_lengther_next.rb
|
152
266
|
- test/test_helper.rb
|
@@ -1,33 +0,0 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
class String
|
4
|
-
|
5
|
-
def lcs(s2)
|
6
|
-
s1=self
|
7
|
-
res=""
|
8
|
-
num=Array.new(s1.size){Array.new(s2.size)}
|
9
|
-
len,ans=0
|
10
|
-
lastsub=0
|
11
|
-
s1.scan(/./).each_with_index do |l1,i |
|
12
|
-
s2.scan(/./).each_with_index do |l2,j |
|
13
|
-
unless l1==l2
|
14
|
-
num[i][j]=0
|
15
|
-
else
|
16
|
-
(i==0 || j==0)? num[i][j]=1 : num[i][j]=1 + num[i-1][j-1]
|
17
|
-
if num[i][j] > len
|
18
|
-
len = ans = num[i][j]
|
19
|
-
thissub = i
|
20
|
-
thissub -= num[i-1][j-1] unless num[i-1][j-1].nil?
|
21
|
-
if lastsub==thissub
|
22
|
-
res+=s1[i,1]
|
23
|
-
else
|
24
|
-
lastsub=thissub
|
25
|
-
res=s1[lastsub, (i+1)-lastsub]
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
res
|
32
|
-
end
|
33
|
-
end
|
@@ -1,240 +0,0 @@
|
|
1
|
-
require 'json'
|
2
|
-
require 'scbi_fasta'
|
3
|
-
require 'sequence'
|
4
|
-
|
5
|
-
require 'fln_stats'
|
6
|
-
include FlnStats
|
7
|
-
|
8
|
-
class MyWorkerManager < ScbiMapreduce::WorkManager
|
9
|
-
|
10
|
-
# open files and prepare global data
|
11
|
-
def self.init_work_manager(options,chunk_size=100)
|
12
|
-
|
13
|
-
input_file=options[:fasta]
|
14
|
-
|
15
|
-
if !File.exists?('fln_results')
|
16
|
-
Dir.mkdir('fln_results')
|
17
|
-
end
|
18
|
-
|
19
|
-
file_head = "Query_id\tfasta_length\tSubject_id\tdb_name\tStatus\tt_code\te_value\tp_ident\tprotein_length\ts_length\tWarning_msgs\tframe\tORF_start\tORF_end\ts_start\ts_end\tDescription\tProtein_sequence"
|
20
|
-
|
21
|
-
@@fasta_file = FastaQualFile.new(input_file,'')
|
22
|
-
@@chunk_size=chunk_size
|
23
|
-
@@options = options
|
24
|
-
|
25
|
-
@@annotation_file = File.open("fln_results/dbannotated.txt", 'w')
|
26
|
-
@@annotation_file.puts file_head
|
27
|
-
|
28
|
-
@@alignment_file = File.open("fln_results/alignments.txt", 'w')
|
29
|
-
@@prot_file = File.open("fln_results/proteins.fasta", 'w')
|
30
|
-
@@nts_file = File.open("fln_results/nt_seq.txt", 'w')
|
31
|
-
@@tcode_file=File.open("fln_results/new_coding.txt", 'w')
|
32
|
-
@@tcode_file.puts file_head
|
33
|
-
|
34
|
-
@@nc_rna_file = File.open("fln_results/nc_rnas.txt", 'w')
|
35
|
-
@@nc_rna_file.puts file_head
|
36
|
-
|
37
|
-
if (!options[:chimera].nil?)
|
38
|
-
@@chimera_file = File.open("fln_results/chimeric_sequences.txt", 'w')
|
39
|
-
@@chimera_file.puts file_head
|
40
|
-
else
|
41
|
-
if File.exists?("fln_results/chimeric_sequences.txt")
|
42
|
-
File.delete("fln_results/chimeric_sequences.txt")
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
# @@error_fasta_file = File.open("fln_results/error_seqs.fasta", 'w')
|
47
|
-
# @@error_file = File.open("fln_results/errors_info.txt", 'w')
|
48
|
-
|
49
|
-
end
|
50
|
-
|
51
|
-
# close files
|
52
|
-
def self.end_work_manager
|
53
|
-
# @@fasta_file.close
|
54
|
-
|
55
|
-
@@annotation_file.close
|
56
|
-
@@alignment_file.close
|
57
|
-
@@prot_file.close
|
58
|
-
@@nts_file.close
|
59
|
-
@@tcode_file.close
|
60
|
-
@@nc_rna_file.close
|
61
|
-
|
62
|
-
if (!@@options[:chimera].nil?)
|
63
|
-
@@chimera_file.close
|
64
|
-
end
|
65
|
-
|
66
|
-
# @@error_fasta_file.close
|
67
|
-
# @@error_file.close
|
68
|
-
|
69
|
-
summary_stats
|
70
|
-
end
|
71
|
-
|
72
|
-
def error_received(worker_error, obj)
|
73
|
-
puts "Error while processing object #{obj.inspect}\n" + worker_error.original_exception.message + ":\n" +worker_error.original_exception.backtrace.join("\n")
|
74
|
-
end
|
75
|
-
|
76
|
-
def too_many_errors_received
|
77
|
-
$LOG.error "Too many errors: #{@@error_count} errors on #{@@count} executed sequences, exiting before finishing"
|
78
|
-
end
|
79
|
-
|
80
|
-
# send initial config
|
81
|
-
def worker_initial_config
|
82
|
-
return @@options
|
83
|
-
end
|
84
|
-
|
85
|
-
# this method is called every time a worker needs a new work
|
86
|
-
# Return the work data or nil if no more data is available
|
87
|
-
def next_work
|
88
|
-
|
89
|
-
# seqs=[]
|
90
|
-
# if (@@count % 2==0)
|
91
|
-
# $LOG.info("Processed #{@@count}")
|
92
|
-
# end
|
93
|
-
|
94
|
-
# prepare work
|
95
|
-
# @@chunk_size.times do
|
96
|
-
n,f,q = @@fasta_file.next_seq
|
97
|
-
|
98
|
-
if !n.nil?
|
99
|
-
return Sequence.new(n,f,q)
|
100
|
-
else
|
101
|
-
return nil
|
102
|
-
end
|
103
|
-
|
104
|
-
# end
|
105
|
-
|
106
|
-
# return work
|
107
|
-
# if !seqs.empty?
|
108
|
-
# return seqs
|
109
|
-
# else
|
110
|
-
# return nil
|
111
|
-
# end
|
112
|
-
|
113
|
-
end
|
114
|
-
|
115
|
-
# this method is ejecuted each time an obj is finished
|
116
|
-
def work_received(obj)
|
117
|
-
|
118
|
-
obj.each do |seq|
|
119
|
-
# puts seq.seq_name
|
120
|
-
|
121
|
-
write_seq(seq)
|
122
|
-
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
|
127
|
-
def write_seq(seq)
|
128
|
-
begin
|
129
|
-
# -------------------------------------------------------- Chimeric Seqs
|
130
|
-
if (!@@options[:chimera].nil?)
|
131
|
-
if (q=seq.get_annotations(:chimera).first)
|
132
|
-
@@chimera_file.puts q[:message]
|
133
|
-
# -------------------------------------------------- Complete Seqs
|
134
|
-
elsif (e=seq.get_annotations(:complete).first)
|
135
|
-
|
136
|
-
@@annotation_file.puts e[:message]
|
137
|
-
|
138
|
-
if (a=seq.get_annotations(:alignment).first)
|
139
|
-
@@alignment_file.puts a[:message]
|
140
|
-
end
|
141
|
-
|
142
|
-
if (p=seq.get_annotations(:protein).first)
|
143
|
-
@@prot_file.puts p[:message]
|
144
|
-
end
|
145
|
-
|
146
|
-
if (n=seq.get_annotations(:nucleotide).first)
|
147
|
-
@@nts_file.puts n[:message]
|
148
|
-
end
|
149
|
-
# --------------------------------------------------- Non Complete Seqs
|
150
|
-
elsif (e=seq.get_annotations(:tmp_annotation).first)
|
151
|
-
|
152
|
-
@@annotation_file.puts e[:message][0]
|
153
|
-
|
154
|
-
if (a=seq.get_annotations(:alignment).first)
|
155
|
-
if !a[:message].empty?
|
156
|
-
@@alignment_file.puts a[:message]
|
157
|
-
end
|
158
|
-
end
|
159
|
-
|
160
|
-
if (p=seq.get_annotations(:protein).first)
|
161
|
-
if !p[:message].empty?
|
162
|
-
@@prot_file.puts p[:message]
|
163
|
-
end
|
164
|
-
end
|
165
|
-
|
166
|
-
if (n=seq.get_annotations(:nucleotide).first)
|
167
|
-
@@nts_file.puts n[:message]
|
168
|
-
end
|
169
|
-
# ------------------------------------------------- nc RNA
|
170
|
-
elsif (nc=seq.get_annotations(:ncrna).first)
|
171
|
-
@@nc_rna_file.puts nc[:message]
|
172
|
-
# ------------------------------------------------- Test Code
|
173
|
-
elsif (t=seq.get_annotations(:tcode).first)
|
174
|
-
@@tcode_file.puts t[:message]
|
175
|
-
end
|
176
|
-
# ---------------------------------------------------------------------------------
|
177
|
-
# -------------------------------------------------------- without Chimeric Seqs Mode
|
178
|
-
else
|
179
|
-
# ------------------------------------------------- Complete Seqs
|
180
|
-
if (e=seq.get_annotations(:complete).first)
|
181
|
-
|
182
|
-
@@annotation_file.puts e[:message]
|
183
|
-
|
184
|
-
if (a=seq.get_annotations(:alignment).first)
|
185
|
-
@@alignment_file.puts a[:message]
|
186
|
-
end
|
187
|
-
|
188
|
-
if (p=seq.get_annotations(:protein).first)
|
189
|
-
@@prot_file.puts p[:message]
|
190
|
-
end
|
191
|
-
|
192
|
-
if (n=seq.get_annotations(:nucleotide).first)
|
193
|
-
@@nts_file.puts n[:message]
|
194
|
-
end
|
195
|
-
# ------------------------------------------------- Non Complete Seqs
|
196
|
-
elsif (e=seq.get_annotations(:tmp_annotation).first)
|
197
|
-
|
198
|
-
@@annotation_file.puts e[:message][0]
|
199
|
-
|
200
|
-
if (a=seq.get_annotations(:alignment).first)
|
201
|
-
if !a[:message].empty?
|
202
|
-
@@alignment_file.puts a[:message]
|
203
|
-
end
|
204
|
-
end
|
205
|
-
|
206
|
-
if (p=seq.get_annotations(:protein).first)
|
207
|
-
if !p[:message].empty?
|
208
|
-
@@prot_file.puts p[:message]
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
|
-
if (n=seq.get_annotations(:nucleotide).first)
|
213
|
-
@@nts_file.puts n[:message]
|
214
|
-
end
|
215
|
-
# ------------------------------------------------- nc RNA
|
216
|
-
elsif (nc=seq.get_annotations(:ncrna).first)
|
217
|
-
@@nc_rna_file.puts nc[:message]
|
218
|
-
# ------------------------------------------------- Test Code
|
219
|
-
elsif (t=seq.get_annotations(:tcode).first)
|
220
|
-
@@tcode_file.puts t[:message]
|
221
|
-
end
|
222
|
-
end
|
223
|
-
# ------------------------------------------------- errors
|
224
|
-
# if e=seq.get_annotations(:error).first
|
225
|
-
# if !e[:message].empty?
|
226
|
-
# @@error_fasta_file.puts ">#{seq.seq_name}\n#{seq.seq_fasta}"
|
227
|
-
# @@error_file.puts e[:message]
|
228
|
-
# end
|
229
|
-
# end
|
230
|
-
|
231
|
-
rescue
|
232
|
-
puts "Error printing #{seq.seq_name}"
|
233
|
-
end
|
234
|
-
|
235
|
-
end
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
end
|
240
|
-
|