full_lengther_next 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,287 @@
1
+
2
+ require 'common_functions'
3
+ include CommonFunctions
4
+
5
+ class UneLosHit
6
+
7
+ attr_reader :output_seq, :final_hit, :msgs, :number_x, :wrong_seq, :is_ok, :q_index_start, :full_prot
8
+
9
+ def initialize(q, query_fasta, pident_threshold)
10
+
11
+ (mismas_ids_array, query_fasta, wrong_seq) = hits_misma_id(q, query_fasta)
12
+
13
+ @wrong_seq = wrong_seq
14
+ @mismas_ids_array = mismas_ids_array
15
+ @msgs = ''
16
+ @number_x = 0
17
+ num_x = ''
18
+
19
+ @output_seq = query_fasta
20
+
21
+ if (mismas_ids_array.count > 1)
22
+ mismas_ids_array.sort! {|h1,h2| h1.q_beg <=> h2.q_beg} # ordenamos los hit por el inicio del query
23
+ @final_hit = mismas_ids_array[0].dup
24
+
25
+ mismas_ids_array.each do |hit|
26
+ if (hit.ident >= pident_threshold)
27
+ # if ($verbose)
28
+ # puts "#{hit.acc}\tsc:#{hit.score}\teval:#{hit.e_val}\tid:#{hit.ident}\tframe:#{hit.q_frame}\tqb:#{hit.q_beg + 1}\tqe:#{hit.q_end + 1}\tsb:#{hit.s_beg + 1}\tse:#{hit.s_end + 1}"
29
+ # puts "#{query_fasta[hit.q_beg..hit.q_end].translate}"
30
+ # end
31
+
32
+ same = same_hit(hit)
33
+
34
+ if (!same)
35
+ if (@final_hit.q_frame == hit.q_frame)
36
+ same_frame_hits(hit)
37
+ else
38
+ # condiciones para corregir el frame en el que tiene que continuar la seq de nt
39
+ correccion_x = 0
40
+ if (@final_hit.q_frame - hit.q_frame == 1)
41
+ correccion_x = 1
42
+ elsif (@final_hit.q_frame - hit.q_frame == 2)
43
+ correccion_x = 2
44
+ elsif (@final_hit.q_frame - hit.q_frame == -1)
45
+ correccion_x = 2
46
+ elsif (@final_hit.q_frame - hit.q_frame == -2)
47
+ correccion_x = 1
48
+ end
49
+
50
+ # las secuencias solapan en el query
51
+ if ((@final_hit.q_end >= hit.q_beg) && (@final_hit.q_end < hit.q_end)) && (hit.q_end > @final_hit.q_end + 15)
52
+ overlapped_hits(hit, correccion_x, q)
53
+ # puts "---#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate}"
54
+
55
+ elsif (@final_hit.q_end < hit.q_beg) && (hit.q_end > @final_hit.q_end + 15) # las secuencias estan separadas en el query
56
+ separated_hits(hit, correccion_x, q)
57
+ # puts "----------#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate}"
58
+ else
59
+ @msgs = 'warning!, putative chimeric sequence! or repetitive structure'
60
+ # puts "\nWARNING!, PUTATIVE CHIMERIC SEQUENCE !!!\n\n"
61
+ end
62
+ end
63
+ end
64
+ end # pident
65
+ end
66
+
67
+ else
68
+
69
+ @final_hit = mismas_ids_array[0].dup
70
+
71
+ # if ($verbose)
72
+ # puts "***#{@final_hit.acc}\t#{@final_hit.score}\t#{@final_hit.e_val}\t#{@final_hit.ident}\t#{@final_hit.align_len}\t#{@final_hit.q_frame}\t#{@final_hit.q_beg + 1}\t#{@final_hit.q_end + 1}\t#{@final_hit.s_beg + 1}\t#{@final_hit.s_end + 1}\t#{@final_hit.q_seq}"
73
+ # puts "#{query_fasta[@final_hit.q_beg..@final_hit.q_end].translate}"
74
+ # end
75
+ end
76
+ # puts "***#{@final_hit.acc}\t#{@final_hit.score}\t#{@final_hit.e_val}\t#{@final_hit.ident}\t#{@final_hit.align_len}\t#{@final_hit.q_frame}\t#{@final_hit.q_beg + 1}\t#{@final_hit.q_end + 1}\t#{@final_hit.s_beg + 1}\t#{@final_hit.s_end + 1}\t#{@final_hit.q_seq}"
77
+ # puts "#{query_fasta[@final_hit.q_beg..@final_hit.q_end].translate}"
78
+
79
+ @full_prot = @output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate
80
+ # puts "\nfull_prot_ulh: \n#{full_prot}"
81
+
82
+ (@is_ok, @q_index_start) = contenidos_en_prot(@final_hit, @full_prot, q)
83
+
84
+ end
85
+
86
+
87
+
88
+ def same_hit(hit)
89
+
90
+ same = false
91
+ if (hit.score == @final_hit.score &&
92
+ hit.q_beg == @final_hit.q_beg &&
93
+ hit.q_end == @final_hit.q_end &&
94
+ hit.s_beg == @final_hit.s_beg &&
95
+ hit.s_end == @final_hit.s_end)
96
+
97
+ same = true
98
+ end
99
+
100
+ return same
101
+ end
102
+
103
+
104
+
105
+ def same_frame_hits(hit)
106
+
107
+ @final_hit.q_beg = @final_hit.q_beg
108
+ @final_hit.q_end = hit.q_end
109
+
110
+ @final_hit.s_beg = [@final_hit.s_beg,hit.s_beg].min
111
+ @final_hit.s_end = [@final_hit.s_end,hit.s_end].max
112
+
113
+ end
114
+
115
+
116
+
117
+ def overlapped_hits(hit,correccion_x,q)
118
+ # puts q.inspect
119
+ # puts "los hits solapan!!!"
120
+
121
+ if (@msgs.empty?)
122
+ @msgs = "Overlapping hits, possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
123
+ else
124
+ @msgs += " and overlapping frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
125
+ end
126
+
127
+ # ------------------------------------- preparamos los nt y aas que solapan
128
+ overlapped_nt = 0
129
+ overlapped_nt = (@final_hit.q_end - hit.q_beg + 1)
130
+
131
+ overlapped_aas = 0
132
+ overlapped_aas = (@final_hit.s_end - hit.s_beg + 1)
133
+ # -------------------------------------
134
+ if (overlapped_nt % 3 == 1)
135
+ overlapped_nt += 2
136
+ elsif (overlapped_nt % 3 == 2)
137
+ overlapped_nt += 1
138
+ end
139
+ # ------------------------------------- calculamos el numero de x a añadir
140
+ @number_x = (((correccion_x + overlapped_nt)/3)+1)*3
141
+ @number_x_aa = overlapped_aas
142
+ num_x = ''
143
+ num_x_aa = ''
144
+
145
+ if (@number_x.to_i > 0)
146
+ num_x = 'x'*@number_x.to_i
147
+ elsif (@number_x.to_i == 0)
148
+ num_x = ''
149
+ else
150
+ @msgs = "ERROR#2 unexpected negative index in x_number, "
151
+ # puts "ERROR#2 unexpected negative index in x_number"
152
+ end
153
+
154
+ if (@number_x_aa.to_i > 0)
155
+ num_x_aa = 'x'*@number_x_aa.to_i
156
+ elsif (@number_x_aa.to_i == 0)
157
+ num_x_aa = ''
158
+ else
159
+ num_x_aa = 'x'*@number_x_aa.to_i.abs
160
+ @msgs = "Warning!, your query overlaps and the subject is separated, "
161
+ end
162
+
163
+ if (@number_x_aa.to_i >= 0)
164
+ @final_hit.q_seq = "#{@final_hit.q_seq[0..@final_hit.q_seq.length - 1 - overlapped_aas]}#{num_x_aa}#{hit.q_seq[overlapped_aas..hit.q_seq.length]}"
165
+ @final_hit.s_seq = "#{@final_hit.s_seq[0..@final_hit.s_seq.length - 1 - overlapped_aas]}#{num_x_aa}#{hit.s_seq[overlapped_aas..hit.s_seq.length]}"
166
+ else
167
+ @final_hit.q_seq = "#{@final_hit.q_seq[0..@final_hit.q_seq.length - 1]}#{num_x_aa}#{hit.q_seq[0..hit.q_seq.length]}"
168
+ @final_hit.s_seq = "#{@final_hit.s_seq[0..@final_hit.s_seq.length - 1]}#{num_x_aa}#{hit.s_seq[0..hit.s_seq.length]}"
169
+ end
170
+
171
+
172
+ output_seq_tmp = "#{@output_seq[0..(@final_hit.q_end - overlapped_nt)]}#{num_x}#{@output_seq[(hit.q_beg + overlapped_nt)..(@output_seq.length)]}"
173
+
174
+ full_prot_tmp = output_seq_tmp[@final_hit.q_frame-1, output_seq_tmp.length+1].translate
175
+
176
+ (is_ok_tmp, q_index_start_tmp) = contenidos_en_prot(@final_hit, full_prot_tmp, q)
177
+
178
+ @output_seq = output_seq_tmp.dup
179
+
180
+ @final_hit.q_beg = @final_hit.q_beg
181
+ @final_hit.q_end = hit.q_end
182
+
183
+ @final_hit.s_beg = [@final_hit.s_beg,hit.s_beg].min
184
+ @final_hit.s_end = [@final_hit.s_end,hit.s_end].max
185
+
186
+ end
187
+
188
+
189
+
190
+ def separated_hits(hit,correccion_x,q)
191
+
192
+ # puts "los hits estan separados!!!"
193
+
194
+ if (@msgs.empty?)
195
+ @msgs = "Separated hits, possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
196
+ else
197
+ @msgs += " and possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
198
+ end
199
+
200
+ # PROBAR CON % 3!!!!!!!!!!!!!!!!!!!!!!!!
201
+ @number_x = (hit.q_beg - @final_hit.q_end - 1)
202
+ # @number_x = (hit.q_beg - @final_hit.q_end - 1) + correccion_x
203
+ if (@number_x % 3 == 1)
204
+ @number_x += 2
205
+ elsif (@number_x % 3 == 2)
206
+ @number_x += 1
207
+ end
208
+
209
+
210
+ if (@number_x.to_i > 0)
211
+ num_x = 'x'*@number_x.to_i
212
+ num_x_aa = 'x'*(@number_x.to_i/3)
213
+ elsif (@number_x.to_i == 0)
214
+ num_x = ''
215
+ num_x_aa = ''
216
+ else
217
+ @msgs = "ERROR#2 unexpected negative index in x_number"
218
+ # puts "ERROR#2 unexpected negative index in x_number"
219
+ end
220
+
221
+ @output_seq = @output_seq[0..@final_hit.q_end-1]+num_x+@output_seq[hit.q_beg-1..@output_seq.length-1]
222
+ # @output_seq = @output_seq[0..@final_hit.q_end-1]+num_x+@output_seq[hit.q_beg-1..@output_seq.length-1]
223
+
224
+ @final_hit.score += 1
225
+ @final_hit.q_beg = @final_hit.q_beg
226
+ @final_hit.q_end = hit.q_end
227
+ @final_hit.s_beg = @final_hit.s_beg
228
+ @final_hit.s_end = hit.s_end
229
+
230
+ @final_hit.q_seq = "#{@final_hit.q_seq[0, @final_hit.q_seq.length]}#{num_x_aa}#{hit.q_seq[0, hit.q_seq.length]}"
231
+ @final_hit.s_seq = "#{@final_hit.s_seq[0, @final_hit.s_seq.length]}#{num_x_aa}#{hit.s_seq[0, hit.s_seq.length]}"
232
+
233
+ num_x = ''
234
+ num_x_aa = ''
235
+
236
+ full_prot_tmp = @output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate
237
+ # puts "\n\nfull_prot_tmp:#{full_prot_tmp}"
238
+ # puts "\n\n#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1]}"
239
+
240
+ (is_ok_tmp, q_index_start_tmp) = contenidos_en_prot(@final_hit, full_prot_tmp, q)
241
+
242
+ # puts "#{q.query_def}: is_ok_tmp: #{is_ok_tmp} separated hits"
243
+ end
244
+
245
+
246
+
247
+ # creamos un array en el que esten solo los hits con la misma id.
248
+ def hits_misma_id(q, query_fasta_ori)
249
+
250
+ # Se les hace la reveso complementaria si es necesario, y se comprueba que para un mismo query no hay hits en sentidos diferentes
251
+ wrong_seq = false
252
+
253
+ misma_id = []
254
+
255
+ query_fasta = query_fasta_ori.dup
256
+ frame_ori = q.hits[0].q_frame
257
+
258
+ q.hits.each do |h|
259
+
260
+ # puts "#{q.query_def} f_ori :#{frame_ori} y h_f: #{h.q_frame}"
261
+ reversed_hit = false # con respecto al primer hit, que es el de mayor score o evalue
262
+ if (h.acc == q.hits[0].acc)
263
+
264
+ # comprobar si los frames tiene el mismo sentido
265
+ if ((frame_ori < 0 && h.q_frame > 0) or (frame_ori > 0 && h.q_frame < 0))
266
+ wrong_seq = true
267
+ reversed_hit = true
268
+ # puts "The sequence #{q.query_def} contains sense and antisense hits!!! #{frame_ori} y #{h.q_frame}"
269
+ end
270
+
271
+ if (reversed_hit == false)
272
+ if (h.q_frame.to_i < 0) # si la secuencia esta al reves le damos la vuelta
273
+ (query_fasta, h.q_frame, h.q_beg, h.q_end) = reverse_seq(query_fasta_ori, h.q_frame, h.q_beg, h.q_end)
274
+ h.reversed = true
275
+ end
276
+ misma_id.push h
277
+ end
278
+
279
+ end
280
+
281
+ end
282
+
283
+ return [misma_id, query_fasta, wrong_seq]
284
+ end
285
+
286
+
287
+ end
data/script/console ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ libs << " -r #{File.dirname(__FILE__) + '/../lib/full_lengther_next.rb'}"
9
+ puts "Loading full_lengther_next gem"
10
+ exec "#{irb} #{libs} --simple-prompt"
data/script/destroy ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
data/script/generate ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,11 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ class TestFullLengtherNext < Test::Unit::TestCase
4
+
5
+ def setup
6
+ end
7
+
8
+ def test_truth
9
+ assert true
10
+ end
11
+ end
@@ -0,0 +1,3 @@
1
+ require 'stringio'
2
+ require 'test/unit'
3
+ require File.dirname(__FILE__) + '/../lib/full_lengther_next'
metadata ADDED
@@ -0,0 +1,150 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: full_lengther_next
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.1
6
+ platform: ruby
7
+ authors:
8
+ - Noe Fernandez & Dario Guerrero
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2012-02-06 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: xml-simple
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.0.12
24
+ type: :runtime
25
+ version_requirements: *id001
26
+ - !ruby/object:Gem::Dependency
27
+ name: scbi_blast
28
+ prerelease: false
29
+ requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: 0.0.32
35
+ type: :runtime
36
+ version_requirements: *id002
37
+ - !ruby/object:Gem::Dependency
38
+ name: scbi_mapreduce
39
+ prerelease: false
40
+ requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: 0.0.29
46
+ type: :runtime
47
+ version_requirements: *id003
48
+ - !ruby/object:Gem::Dependency
49
+ name: scbi_fasta
50
+ prerelease: false
51
+ requirement: &id004 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: 0.1.7
57
+ type: :runtime
58
+ version_requirements: *id004
59
+ - !ruby/object:Gem::Dependency
60
+ name: scbi_plot
61
+ prerelease: false
62
+ requirement: &id005 !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: 0.0.6
68
+ type: :runtime
69
+ version_requirements: *id005
70
+ - !ruby/object:Gem::Dependency
71
+ name: hoe
72
+ prerelease: false
73
+ requirement: &id006 !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: 2.8.0
79
+ type: :development
80
+ version_requirements: *id006
81
+ description: "FULL-LENGTHERNEXT is a tool adapted to NGS technologies, able to work in parallel and in a distributed way to minimise computing time. It is able to classify unigenes to full-length, 5\xE2\x80\x99-end, 3\xE2\x80\x99-end and internal, suggesting which unknown genes are coding or not. It will be also shown that FULL-LENGTHERNEXT fixes frame shifts, one of the main mistake found in wrong entries of full-length sequences databases, and it is a fast tool to compare different transcriptome assemblies."
82
+ email:
83
+ - noeisneo@gmail.com & dariogf@gmail.com
84
+ executables:
85
+ - download_fln_dbs.rb
86
+ - make_user_db.rb
87
+ - full_lengther_next
88
+ extensions: []
89
+
90
+ extra_rdoc_files:
91
+ - History.txt
92
+ - Manifest.txt
93
+ - PostInstall.txt
94
+ files:
95
+ - bin/download_fln_dbs.rb
96
+ - bin/make_user_db.rb
97
+ - bin/full_lengther_next
98
+ - History.txt
99
+ - lib/full_lengther_next/classes/common_functions.rb
100
+ - lib/full_lengther_next/classes/fl2_stats.rb
101
+ - lib/full_lengther_next/classes/fl_analysis.rb
102
+ - lib/full_lengther_next/classes/fl_string_utils.rb
103
+ - lib/full_lengther_next/classes/lcs.rb
104
+ - lib/full_lengther_next/classes/my_worker.rb
105
+ - lib/full_lengther_next/classes/my_worker_manager.rb
106
+ - lib/full_lengther_next/classes/orf.rb
107
+ - lib/full_lengther_next/classes/sequence.rb
108
+ - lib/full_lengther_next/classes/test_code.rb
109
+ - lib/full_lengther_next/classes/une_los_hit.rb
110
+ - lib/full_lengther_next.rb
111
+ - Manifest.txt
112
+ - PostInstall.txt
113
+ - Rakefile
114
+ - README.rdoc
115
+ - script/console
116
+ - script/destroy
117
+ - script/generate
118
+ - test/test_full_lengther_next.rb
119
+ - test/test_helper.rb
120
+ homepage: http://www.scbi.uma.es/downloads
121
+ licenses: []
122
+
123
+ post_install_message: PostInstall.txt
124
+ rdoc_options:
125
+ - --main
126
+ - README.rdoc
127
+ require_paths:
128
+ - lib
129
+ required_ruby_version: !ruby/object:Gem::Requirement
130
+ none: false
131
+ requirements:
132
+ - - ">="
133
+ - !ruby/object:Gem::Version
134
+ version: "0"
135
+ required_rubygems_version: !ruby/object:Gem::Requirement
136
+ none: false
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ version: "0"
141
+ requirements: []
142
+
143
+ rubyforge_project: full_lengther_next
144
+ rubygems_version: 1.7.2
145
+ signing_key:
146
+ specification_version: 3
147
+ summary: FULL-LENGTHERNEXT is a tool adapted to NGS technologies, able to work in parallel and in a distributed way to minimise computing time
148
+ test_files:
149
+ - test/test_full_lengther_next.rb
150
+ - test/test_helper.rb