full_lengther_next 0.9.9 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 4a6a23e7a0bc8c6ba6ad5e3f1579accb80a16bf1
4
- data.tar.gz: 3980b046483ecbfe54147cffb20cf8f6a61bea69
2
+ SHA256:
3
+ metadata.gz: e6d28d54912b46305ba0047c8458469afffe6660189cb1950c1d30290982e2c4
4
+ data.tar.gz: fca1a71701c8b1c763102623b6fc60d7699b697ebd636ce53f14886d07fb35f4
5
5
  SHA512:
6
- metadata.gz: c5ecb5ebcf8077a0000a5ce404dcc6a1468654fdeac3f2a48aa3dd8f2ef3eed6db51f6123abf3452ddc33ef56c3b896ad15125fc76621105d1db6e5a486954c6
7
- data.tar.gz: 795f3cea0505142218ad29411e1fdbc8e0ce2b4e72c42a18b54763f77f9e0d4c7e711836fe1f0561557224cb089bd511078e7a6a81fe87fb4e5d68fce3864910
6
+ metadata.gz: c1398e4d8448f10550769e4f6cb7baa2492337f377fab2f33b3de7067213f1d45ceae1a717de69c59c113c80d585d85bd7d4f1b0d6ed236fb0a5c8d7ac244f3e
7
+ data.tar.gz: 85c5238d3569e17509135e97383e748183fadc00c2c9eca9f984492b12ba4b4bd22e908eaae755162e4070c5eda8df1fcdffdbdb794c216537a0304b7bbe45bd
@@ -10,6 +10,7 @@ require 'scbi_zcat'
10
10
  require 'optparse'
11
11
  require 'cdhit'
12
12
  require 'handle_db'
13
+ require 'bio_patch'
13
14
 
14
15
  ##############################################################################################
15
16
  ## METHODS
@@ -69,29 +70,30 @@ def filtering_seqs(fasta_file, max_length, black_list)
69
70
  end
70
71
 
71
72
  def compare_list(string, list)
72
- res = FALSE
73
+ res = false
73
74
  list.each do |word|
74
75
  if string.include?(word)
75
- res = TRUE
76
+ res = true
76
77
  break
77
78
  end
78
79
  end
79
80
  return res
80
81
  end
81
82
 
82
- def conecta_uniprot(my_array, formatted_db_path)
83
+ def conecta_uniprot(my_array, formatted_db_path, no_trembl, passive_ftp)
83
84
 
84
85
  Dir.mkdir(formatted_db_path) if !File.exists?(formatted_db_path)
85
86
  varsplic_out=File.join(formatted_db_path,'uniprot_sprot_varsplic.fasta.gz')
86
87
 
87
88
  $ftp = Net::FTP.new()
89
+ $ftp.passive = true if passive_ftp
88
90
  $ftp.connect('ftp.ebi.ac.uk')
89
91
  $ftp.login
90
92
 
91
93
  puts "connected to UniProt"
92
94
  my_array.each do |db_group|
93
95
  puts "Downloading #{db_group}"
94
- download_uniprot(db_group, formatted_db_path)
96
+ download_uniprot(db_group, formatted_db_path, no_trembl)
95
97
  end
96
98
 
97
99
  #archivo de variantes de splicing. POR QUE?
@@ -102,13 +104,13 @@ def conecta_uniprot(my_array, formatted_db_path)
102
104
  puts "isoform files downloaded"
103
105
  end
104
106
 
105
- def download_uniprot(uniprot_group, formatted_db_path)
107
+ def download_uniprot(uniprot_group, formatted_db_path, no_trembl)
106
108
 
107
109
  sp_out=File.join(formatted_db_path,"uniprot_sprot_#{uniprot_group}.dat.gz")
108
110
  tr_out=File.join(formatted_db_path,"uniprot_trembl_#{uniprot_group}.dat.gz")
109
111
  $ftp.chdir("/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions")
110
112
  $ftp.getbinaryfile("uniprot_sprot_#{uniprot_group}.dat.gz", sp_out)
111
- $ftp.getbinaryfile("uniprot_trembl_#{uniprot_group}.dat.gz", tr_out)
113
+ $ftp.getbinaryfile("uniprot_trembl_#{uniprot_group}.dat.gz", tr_out) if !no_trembl
112
114
 
113
115
  puts "#{uniprot_group} files downloaded"
114
116
 
@@ -138,13 +140,13 @@ def filter_and_makeDB(formatted_db_path, dbtype, db_group, isoform_hash, prefix,
138
140
  end
139
141
 
140
142
  def complete?(uniprot_record)
141
- complete = TRUE
143
+ complete = true
142
144
  if uniprot_record.description.include?('Flags: Fragment') || #Discard non full length records
143
145
  uniprot_record.seq[0] != 'M' ||
144
146
  uniprot_record.seq.include?('XX') ||
145
147
  uniprot_record.ft.keys.include?('NON_TER') ||# The residue at an extremity of the sequence is not the terminal residue. If applied to position 1, this signifies that the first position is not the N-terminus of the complete molecule. If applied to the last position, it means that this position is not the C-terminus of the complete molecule. There is no description field for this key
146
148
  uniprot_record.ft.keys.include?('NON_CONS') # Non-consecutive residues. Indicates that two residues in a sequence are not consecutive and that there are a number of unreported or missing residues between them
147
- complete = FALSE
149
+ complete = false
148
150
  end
149
151
  return complete
150
152
  end
@@ -304,29 +306,29 @@ optparse = OptionParser.new do |opts|
304
306
  end
305
307
  end
306
308
 
307
- options[:no_download] = FALSE
309
+ options[:no_download] = false
308
310
  opts.on( '-d', '--no_download', 'Only parse downloaded files without download them again') do
309
- options[:no_download] = TRUE
311
+ options[:no_download] = true
310
312
  end
311
313
 
312
- options[:no_ncrna] = FALSE
314
+ options[:no_ncrna] = false
313
315
  opts.on( '-n', '--no_ncrna', 'No use ncrna sequences') do
314
- options[:no_ncrna] = TRUE
316
+ options[:no_ncrna] = true
315
317
  end
316
318
 
317
- options[:only_index] = FALSE
319
+ options[:only_index] = false
318
320
  opts.on( '-i', '--only_index', 'Build annotation index only without do blast DB') do
319
- options[:only_index] = TRUE
321
+ options[:only_index] = true
320
322
  end
321
323
 
322
- options[:no_trembl] = FALSE
324
+ options[:no_trembl] = false
323
325
  opts.on( '-t', '--no_trembl', 'No use trembl sequences') do
324
- options[:no_trembl] = TRUE
326
+ options[:no_trembl] = true
325
327
  end
326
328
 
327
- options[:all] = FALSE
329
+ options[:all] = false
328
330
  opts.on( '-a', '--all_sequences', 'Generate databases with all sequences') do
329
- options[:all] = TRUE
331
+ options[:all] = true
330
332
  end
331
333
 
332
334
  options[:cdhit] = 0
@@ -334,11 +336,15 @@ optparse = OptionParser.new do |opts|
334
336
  options[:cdhit] = cdhit.to_f
335
337
  end
336
338
 
337
- options[:no_uniprot] = FALSE
339
+ options[:no_uniprot] = false
338
340
  opts.on( '-p', '--no_uniprot', 'No use uniprot sequences') do
339
- options[:no_uniprot] = TRUE
341
+ options[:no_uniprot] = true
340
342
  end
341
343
 
344
+ options[:passive_ftp] = false
345
+ opts.on( '-P', '--passive_ftp', 'Use pasive ftp') do
346
+ options[:passive_ftp] = true
347
+ end
342
348
 
343
349
  # Set a banner, displayed at the top of the help screen.
344
350
  opts.banner = "Usage: #{File.basename(__FILE__)} [options] \n\n"
@@ -359,23 +365,22 @@ optparse.parse!
359
365
  ## MAIN
360
366
  ##############################################################################################
361
367
 
362
-
363
- if ENV['BLASTDB'] && File.exists?(ENV['BLASTDB'])
364
- formatted_db_path = ENV['BLASTDB']
368
+ if !ENV['BLASTDB'].nil?
369
+ formatted_db_path = File.expand_path(ENV['BLASTDB'])
365
370
  else # otherwise use ROOTPATH + DB
366
371
  formatted_db_path = File.expand_path(File.join(ROOT_PATH, "blast_dbs"))
367
- Dir.mkdir(formatted_db_path)
368
372
  end
373
+ Dir.mkdir(formatted_db_path) if !File.exists?(formatted_db_path)
369
374
 
370
375
 
371
376
  ENV['BLASTDB'] = formatted_db_path
372
377
  puts "Databases will be downloaded at: #{ENV['BLASTDB']}"
373
378
  puts "\nTo set the path for storing databases, execute next line in your terminal or add it to your .bash_profile:\n\n\texport BLASTDB=/my_path/\n\n"
374
-
379
+ puts "Patched? #{Bio::UniProtKB.patched?}"
375
380
  download_ncrna(formatted_db_path, options[:no_download]) if !options[:no_ncrna]
376
381
 
377
382
  if !options[:no_download]
378
- conecta_uniprot(options[:uniprot_div], formatted_db_path)
383
+ conecta_uniprot(options[:uniprot_div], formatted_db_path, options[:no_trembl], options[:passive_ftp])
379
384
  end
380
385
 
381
386
  if !options[:no_uniprot]
@@ -86,9 +86,9 @@ optparse = OptionParser.new do |opts|
86
86
  end
87
87
  end
88
88
 
89
- options[:exonerate] = TRUE
89
+ options[:exonerate] = true
90
90
  opts.on( '-e', '--exonerate', 'Disables exonerate analysis' ) do |exonerate|
91
- options[:exonerate] = FALSE
91
+ options[:exonerate] = false
92
92
  end
93
93
 
94
94
  options[:fasta] = nil
@@ -106,9 +106,9 @@ optparse = OptionParser.new do |opts|
106
106
  options[:ident] = ident.to_f
107
107
  end
108
108
 
109
- options[:high_clustering] = FALSE
109
+ options[:high_clustering] = false
110
110
  opts.on( '-k', '--high_clustering', 'Only for representative transcriptome. Add a clustering step using pfam ids. Default false' ) do
111
- options[:high_clustering] = TRUE
111
+ options[:high_clustering] = true
112
112
  end
113
113
 
114
114
  options[:subject_coverage] = 0.25
@@ -165,7 +165,7 @@ optparse = OptionParser.new do |opts|
165
165
  options[:user_db] = nil
166
166
  opts.on( '-u', '--user_db UserDB', 'User blast+ database' ) do |db|
167
167
  options[:user_db] = db
168
- if !File.exists?(File.expand_path(db+'.psq'))
168
+ if Dir.glob(File.expand_path(db+'*.psq')).empty?
169
169
  puts "user database: #{options[:user_db]} was not found"
170
170
  exit
171
171
  end
@@ -196,9 +196,9 @@ optparse = OptionParser.new do |opts|
196
196
  options[:training_ident] = ident.to_f
197
197
  end
198
198
 
199
- options[:hdd] = FALSE
199
+ options[:hdd] = false
200
200
  opts.on( '-z', '--hdd', 'Write/use blast report on HDD' ) do |hdd|
201
- options[:hdd] = TRUE
201
+ options[:hdd] = true
202
202
  end
203
203
 
204
204
 
@@ -207,9 +207,9 @@ optparse = OptionParser.new do |opts|
207
207
  options[:files2map] = files2map.split(';').map{|map_files| map_files.split(',')}
208
208
  end
209
209
 
210
- options[:remove_unmapped] = TRUE
210
+ options[:remove_unmapped] = true
211
211
  opts.on('-R', '--remove_unmapped', 'When fastq files are provided, all sequences without at least a read pair are removed. When this option is enabled this filtering is disabled' ) do
212
- options[:remove_unmapped] = FALSE
212
+ options[:remove_unmapped] = false
213
213
  end
214
214
 
215
215
  # Set a banner, displayed at the top of the help screen.
@@ -268,8 +268,8 @@ if !File.exists?(ncrna_path) && options[:acess_db].include?('c')
268
268
  end
269
269
 
270
270
  if options[:acess_db].include?('s') || options[:acess_db].include?('t')
271
- sp_path=File.join(ENV['BLASTDB'],"sp_#{options[:tax_group]}","sp_#{options[:tax_group]}.psq")
272
- if !File.exists?(sp_path)
271
+ sp_path=File.join(ENV['BLASTDB'],"sp_#{options[:tax_group]}","sp_#{options[:tax_group]}*.psq")
272
+ if Dir.glob(sp_path).empty?
273
273
  puts "DB File #{sp_path} doesn't exists, or"
274
274
  puts "incorrect taxon group name: #{options[:tax_group]} choose:"
275
275
  puts optparse.help
@@ -120,29 +120,29 @@ optparse = OptionParser.new do |opts|
120
120
  options[:duplicate] = duplicate.to_i
121
121
  end
122
122
 
123
- options[:split]= FALSE
123
+ options[:split]= false
124
124
  opts.on( '-s', '--split', 'Split sequences in each case') do
125
125
  options[:duplicate] = 3
126
126
  end
127
127
 
128
- options[:chim]= TRUE
128
+ options[:chim]= true
129
129
  opts.on( '-c', '--chim', 'Make sequence set of chimeras') do
130
- options[:chim] = FALSE
130
+ options[:chim] = false
131
131
  end
132
132
 
133
- options[:indel]= TRUE
133
+ options[:indel]= true
134
134
  opts.on( '-i', '--indel', 'Make sequence set of indels') do
135
- options[:indel] = FALSE
135
+ options[:indel] = false
136
136
  end
137
137
 
138
- options[:pair]= TRUE
138
+ options[:pair]= true
139
139
  opts.on( '-p', '--pair', 'Make sequence set of paired') do
140
- options[:pair] = FALSE
140
+ options[:pair] = false
141
141
  end
142
142
 
143
- options[:trim]= TRUE
143
+ options[:trim]= true
144
144
  opts.on( '-t', '--trim', 'Make sequence set of trimmed') do
145
- options[:trim] = FALSE
145
+ options[:trim] = false
146
146
  end
147
147
 
148
148
  # Set a banner, displayed at the top of the help screen.
data/bin/make_user_db.rb CHANGED
@@ -63,9 +63,9 @@ optparse = OptionParser.new do |opts|
63
63
  options[:name] = name
64
64
  end
65
65
 
66
- options[:local] = FALSE
66
+ options[:local] = false
67
67
  opts.on( '-l', '--local', 'Only parse downloaded files without download them again') do
68
- options[:local] = TRUE
68
+ options[:local] = true
69
69
  end
70
70
 
71
71
  options[:user_fasta] = nil
@@ -32,7 +32,8 @@ Gem::Specification.new do |spec|
32
32
  spec.add_runtime_dependency 'scbi_blast'
33
33
  spec.add_runtime_dependency 'scbi_mapreduce'
34
34
  spec.add_runtime_dependency 'scbi_zcat'
35
- spec.add_runtime_dependency 'bio-cd-hit-report'
35
+ spec.add_runtime_dependency 'bio'
36
+ #spec.add_runtime_dependency 'bio-cd-hit-report' # Removed due to conflicts with bio-ruby2. This gem depends on bio ruby 1.4.3. cdhit options disabled
36
37
  spec.add_runtime_dependency 'report_html'
37
38
 
38
39
 
@@ -7,11 +7,11 @@ include ChimericSeqs
7
7
  ## MAIN FUNCTION
8
8
  #####################################################################
9
9
  def artifact?(seq, query, db_name, db_path, options, new_seqs)
10
- artifact = FALSE
10
+ artifact = false
11
11
  # UNMAPPED CONTIG DETECTION
12
12
  if query.nil? && seq.unmapped? #If seq is misassembled stop chimera analisys
13
13
  seq.hit = nil
14
- artifact = TRUE
14
+ artifact = true
15
15
  seq.type = UNMAPPED
16
16
  end
17
17
 
@@ -19,7 +19,7 @@ def artifact?(seq, query, db_name, db_path, options, new_seqs)
19
19
  # MISASSEMBLED DETECTION
20
20
  if !artifact && misassembled_detection(query) #If seq is misassembled stop chimera analisys
21
21
  seq.hit = query.hits.first
22
- artifact = TRUE
22
+ artifact = true
23
23
  seq.type = MISASSEMBLED
24
24
  seq.warnings('ERROR#1')
25
25
  end
@@ -35,7 +35,7 @@ def artifact?(seq, query, db_name, db_path, options, new_seqs)
35
35
  else
36
36
  seq.hit = query.hits.first
37
37
  end
38
- artifact = TRUE
38
+ artifact = true
39
39
  seq.type = OTHER
40
40
  seq.warnings('ERROR#2')
41
41
  end
@@ -55,7 +55,7 @@ def artifact?(seq, query, db_name, db_path, options, new_seqs)
55
55
  new_seqs.concat(chimera)
56
56
  seq.db_name = db_name
57
57
  seq.type = CHIMERA
58
- artifact = TRUE
58
+ artifact = true
59
59
  end
60
60
  end
61
61
  end
@@ -64,8 +64,8 @@ def artifact?(seq, query, db_name, db_path, options, new_seqs)
64
64
  puts seq.prot_annot_calification
65
65
  end
66
66
  seq.db_name = db_name
67
- seq.save_fasta = FALSE
68
- seq.ignore = TRUE
67
+ seq.save_fasta = false
68
+ seq.ignore = true
69
69
  end
70
70
  return artifact
71
71
  end
@@ -0,0 +1,93 @@
1
+ module Bio
2
+ class UniProtKB
3
+ def self.patched?
4
+ return true
5
+ end
6
+
7
+ def ft(feature_key = nil)
8
+ return ft[feature_key] if feature_key
9
+ return @data['FT'] if @data['FT']
10
+
11
+ table = []
12
+ begin
13
+ get('FT').split("\n").each do |line|
14
+ if line =~ /^FT \w/
15
+ feature = line.chomp.ljust(74)
16
+ table << [feature[ 5..12].strip, # Feature Name
17
+ feature[14..19].strip, # From
18
+ feature[21..26].strip, # To
19
+ feature[34..74].strip ] # Description
20
+ else
21
+ table.last << line.chomp.sub!(/^FT +/, '')
22
+ end
23
+ end
24
+
25
+ # Joining Description lines
26
+ table = table.map { |feature|
27
+ ftid = feature.pop if feature.last =~ /FTId=/
28
+ if feature.size > 4
29
+ feature = [feature[0],
30
+ feature[1],
31
+ feature[2],
32
+ feature[3, feature.size - 3].join(" ")]
33
+ end
34
+ feature << if ftid then ftid else '' end
35
+ }
36
+
37
+ ###### PATCH TO RECOVER PARSER
38
+ to_delete = []
39
+ table.each_with_index do |feature, i|
40
+ name, from, to, descrition = feature
41
+ if from.empty?
42
+ coors = to.split("..")
43
+ if coors.length == 2
44
+ feature[1] = coors[0]
45
+ feature[2] = coors[1]
46
+ elsif /[^\d]/ =~ to
47
+ to_delete << i
48
+ else
49
+ feature[1] = to
50
+ feature[2] = to
51
+ end
52
+ end
53
+ end
54
+ to_delete.reverse_each{|i| table.delete_at(i)}
55
+ #####
56
+
57
+ hash = {}
58
+ table.each do |feature|
59
+ hash[feature[0]] = [] unless hash[feature[0]]
60
+ hash[feature[0]] << {
61
+ # Removing '<', '>' or '?' in FROM/TO endopoint.
62
+ 'From' => feature[1].sub(/\D/, '').to_i,
63
+ 'To' => feature[2].sub(/\D/, '').to_i,
64
+ 'Description' => feature[3],
65
+ 'FTId' => feature[4].to_s.sub(/\/FTId=/, '').sub(/\.$/, ''),
66
+ 'diff' => [],
67
+ 'original' => feature
68
+ }
69
+
70
+ case feature[0]
71
+ when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
72
+ case hash[feature[0]].last['Description']
73
+ when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
74
+ original_res = $1
75
+ changed_res = $2
76
+ original_res = original_res.gsub(/ /,'').strip
77
+ chenged_res = changed_res.gsub(/ /,'').strip
78
+ when /Missing/i
79
+ original_res = seq.subseq(hash[feature[0]].last['From'],
80
+ hash[feature[0]].last['To'])
81
+ changed_res = ''
82
+ end
83
+ hash[feature[0]].last['diff'] = [original_res, chenged_res]
84
+ end
85
+ end
86
+ rescue
87
+ raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
88
+ end
89
+
90
+ @data['FT'] = hash
91
+ end
92
+ end
93
+ end
@@ -105,31 +105,31 @@ def set_thresold_evalue(hits)
105
105
  end
106
106
 
107
107
  def same_subject_hsp(hit, second_hit)
108
- same = FALSE
108
+ same = false
109
109
  if hit.acc == second_hit.acc
110
110
  if hit.s_beg <= second_hit.s_beg && hit.s_end >= hit.s_end && (second_hit.s_beg - hit.s_end).abs > 1
111
- same = TRUE
111
+ same = true
112
112
  end
113
113
  end
114
114
  return same
115
115
  end
116
116
 
117
117
  def same_query_hsp(hit, second_hit)
118
- same = FALSE
118
+ same = false
119
119
  if hit.acc == second_hit.acc
120
120
  if hit.q_beg <= second_hit.q_beg && hit.q_end >= hit.q_end && (second_hit.q_beg - hit.q_end).abs > 1
121
- same = TRUE
121
+ same = true
122
122
  end
123
123
  end
124
124
  return same
125
125
  end
126
126
 
127
127
  def same_sense?(hit, second_hit)
128
- same= FALSE
128
+ same= false
129
129
  hit_sense = hit.q_frame <=> 0
130
130
  second_hit_sense = second_hit.q_frame <=> 0
131
131
  if hit_sense == second_hit_sense
132
- same = TRUE
132
+ same = true
133
133
  end
134
134
  return same
135
135
  end
@@ -158,7 +158,7 @@ def clean_by_query_length_match(blast_result, min_len_nt)
158
158
  end
159
159
 
160
160
 
161
- def clean_overlapping_hsps(blast_result, keep_if_diff_sense = FALSE)
161
+ def clean_overlapping_hsps(blast_result, keep_if_diff_sense = false)
162
162
  blast_result.querys.each do |query|
163
163
  if query.hits.length > 1
164
164
  query.hits.each_with_index do |hit, j|
@@ -190,7 +190,7 @@ end
190
190
  #####################################################################
191
191
 
192
192
  def misassembled_detection(query)
193
- miss=FALSE
193
+ miss=false
194
194
  hits = cluster_hsps(query.hits)
195
195
  misassembled_hits = []
196
196
  hits.each do |hit|
@@ -202,7 +202,7 @@ def misassembled_detection(query)
202
202
  end
203
203
  end
204
204
  if misassembled_hits.length*1.0/ hits.length > 0.5
205
- miss = TRUE
205
+ miss = true
206
206
  else #Remove missassembled hits to avoid broken analysis
207
207
  query.hits.reverse_each do |hsp|
208
208
  if misassembled_hits.include?(hsp.acc)
@@ -214,16 +214,16 @@ def misassembled_detection(query)
214
214
  end
215
215
 
216
216
  def multiple_hsps(query, num)
217
- multiple = FALSE
217
+ multiple = false
218
218
  hsps = query.hits.select{|h| h.acc == query.hits.first.acc}
219
219
  if hsps.length >= num
220
- multiple = TRUE
220
+ multiple = true
221
221
  end
222
222
  return multiple
223
223
  end
224
224
 
225
225
  def overlapping_hsps_on_subject(query)
226
- overlapping = FALSE
226
+ overlapping = false
227
227
  current_hit = query.hits.first.acc
228
228
  complete_hit = []
229
229
  cleaned_hits = []
@@ -252,16 +252,16 @@ def clean_subject_overlapping_hsps(complete_hit, cleaned_hits)
252
252
  end
253
253
 
254
254
  def subject_overlapping_hsps(hit)
255
- overlapping = FALSE
255
+ overlapping = false
256
256
  hsp_table = hsps_relationship_subject(hit)
257
257
  if !hsp_table.empty?
258
258
  hit = clean_hsp_by_identity(hit, 55)
259
259
  if hit.empty?
260
- overlapping = TRUE
260
+ overlapping = true
261
261
  else
262
262
  hsp_table = hsps_relationship_subject(hit)
263
263
  if !hsp_table.empty?
264
- overlapping = TRUE
264
+ overlapping = true
265
265
  end
266
266
  end
267
267
  end
@@ -286,10 +286,10 @@ def hsps_relationship_subject(hit)
286
286
  end
287
287
 
288
288
  def same_subject_hsp(hit, second_hit)
289
- same = FALSE
289
+ same = false
290
290
  if hit.acc == second_hit.acc
291
291
  if hit.s_beg <= second_hit.s_beg && hit.s_end >= hit.s_end && (second_hit.s_beg - hit.s_end).abs > 1
292
- same = TRUE
292
+ same = true
293
293
  end
294
294
  end
295
295
  return same
@@ -59,8 +59,8 @@ class Cdhit
59
59
  if master_seq.db != 'sp'
60
60
  sp_seq=get_sp(cluster)
61
61
  if !sp_seq.nil?
62
- cluster.map{|seq| seq.master=FALSE}
63
- sp_seq.master=TRUE
62
+ cluster.map{|seq| seq.master=false}
63
+ sp_seq.master= true
64
64
  end
65
65
  end
66
66
  }
@@ -109,7 +109,7 @@ class Cdhit
109
109
 
110
110
 
111
111
  def cd_hit_clusters(clust_file)
112
- require 'bio-cd-hit-report'
112
+ #require 'bio-cd-hit-report'
113
113
  report = Bio::CdHitReport.new(clust_file)
114
114
  report.each_cluster do |cluster|
115
115
  clust=[]
@@ -128,9 +128,9 @@ class Cdhit
128
128
  member.gsub!('>','')
129
129
  fields = member.split(',')
130
130
  data = fields[1].split(' ',2)
131
- master = FALSE
131
+ master = false
132
132
  if data[1] == '*'
133
- master = TRUE
133
+ master = true
134
134
  end
135
135
  return data[0],master
136
136
  end
@@ -110,8 +110,8 @@ module ChimericSeqs
110
110
  seq_bak.clean_warnings
111
111
  seq_bak.seq_name += "_split_#{hit_position}"
112
112
  seq_bak.clean_orfs
113
- seq_bak.save_fasta = TRUE
114
- seq_bak.ignore = FALSE
113
+ seq_bak.save_fasta = true
114
+ seq_bak.ignore = false
115
115
 
116
116
  # Cut sequence and move hit/hsps limits
117
117
  #----------------------------------------
@@ -244,10 +244,10 @@ module ChimericSeqs
244
244
  end
245
245
 
246
246
  def hit_is_in?(h_beg, h_end, hit)
247
- is=FALSE
247
+ is=false
248
248
  # CONTIENE #OVERLAP
249
249
  if h_beg <= hit[BEG] && h_end > hit[BEG] || hit[BEG] <= h_beg && hit[STOP] > h_beg
250
- is=TRUE
250
+ is=true
251
251
  end
252
252
  return is
253
253
  end
@@ -324,7 +324,7 @@ module ChimericSeqs
324
324
  cmd='clustalo -i - -o /dev/null --percent-id --full --distmat-out=/dev/stdout --force'
325
325
  clustal_matrix = nil
326
326
  IO.popen(cmd,'w+') {|clustal|
327
- clustal.sync = TRUE
327
+ clustal.sync = true
328
328
  clustal.write(seq_fasta)
329
329
  clustal.close_write
330
330
  clustal_matrix = clustal.readlines
@@ -101,7 +101,7 @@ module CommonFunctions
101
101
  hit.q_frame = -hit.q_frame
102
102
  hit.q_end = query_fasta.length - 1 - hit.q_end
103
103
  hit.q_beg = query_fasta.length - 1 - hit.q_beg
104
- hit.reversed = TRUE
104
+ hit.reversed = true
105
105
  query_fasta = query_fasta.complementary_dna # ESTO REALMENTE HACE LA REVERSO COMPLEMENTARIA.
106
106
  if hit.class.to_s == 'ExoBlastHit'
107
107
  hit.q_frameshift.map!{|position, num_nts|
@@ -39,7 +39,7 @@ end
39
39
  class ExonerateResult
40
40
 
41
41
  # Parser initialization
42
- def initialize(input, seqs= nil, query_seqs = nil, all = TRUE)
42
+ def initialize(input, seqs= nil, query_seqs = nil, all = true)
43
43
  @querys = []
44
44
  @seqs = seqs #unigenes
45
45
  @prot_seqs = query_seqs#prot
@@ -106,8 +106,8 @@ class ExonerateResult
106
106
 
107
107
  #this method only works fine with --model protein2dna parameter of exonerate
108
108
  def hiting(features, tags, query) #Convierte las coordenadas relativas del exonerate a absolutas tipo blast, definiendo solo los hits
109
- do_align = FALSE
110
- do_align = TRUE if !@prot_seqs.nil? && !@seqs.nil?
109
+ do_align = false
110
+ do_align = true if !@prot_seqs.nil? && !@seqs.nil?
111
111
  start_target = features['target_start_align']#Unigen
112
112
  start_query = features['query_start_align'] #proteina
113
113
  ends_target = features['target_end_align']
@@ -143,7 +143,7 @@ class ExonerateResult
143
143
  target_alignment << target_seq[counter_target, tag[TARGET]].translate
144
144
  end
145
145
  if tag[OPERATION] == 'F'
146
- if tag[TARGET] > 0 && tag[TARGET] < 3 #TRUE FRAMESHIFT
146
+ if tag[TARGET] > 0 && tag[TARGET] < 3 #true FRAMESHIFT
147
147
  gap_shift += 1
148
148
  if tags[n_operation+1][OPERATION] != 'G' #there are frameshift that not insert a gap, we do it
149
149
  query_alignment << '-' if do_align
@@ -203,7 +203,7 @@ class ExonerateResult
203
203
  def define_hit_parameters(hit, features, tags)
204
204
  hit.gaps = 0
205
205
  tags.map{|aln| hit.gaps += 1 if aln[0] == 'G'}
206
- hit.reversed = FALSE
206
+ hit.reversed = false
207
207
  hit.align_len =(features['query_end_align'] - features['query_start_align']).abs+1
208
208
  hit.mismatches=0
209
209
  hit.e_val=0
@@ -171,9 +171,9 @@ module FlAnalysis
171
171
  end
172
172
 
173
173
  if atg_status == 'putative' || end_status == 'putative'
174
- status = FALSE # Putative
174
+ status = false # Putative
175
175
  else
176
- status = TRUE # Sure
176
+ status = true # Sure
177
177
  end
178
178
 
179
179
  return type, status
@@ -187,7 +187,7 @@ module FlAnalysis
187
187
  $global_warnings << ['SeqShorter', final_prot.length, final_hit.s_len]
188
188
  if final_prot.length + 100 < final_hit.s_len || final_prot.length*2 < final_hit.s_len
189
189
  if type == COMPLETE
190
- status = FALSE
190
+ status = false
191
191
  $global_warnings << 'VeryShorter'
192
192
  end
193
193
  end
@@ -209,7 +209,7 @@ module FlAnalysis
209
209
  $global_warnings = [] # Clean all warnings for current sequence
210
210
  seq.seq_nt = mark_nt_seqs(final_hit, query_fasta)
211
211
  if type == COMPLETE
212
- seq.ignore = TRUE
212
+ seq.ignore = true
213
213
  end
214
214
  end
215
215
  if $verbose > 2
@@ -265,8 +265,8 @@ module FlAnalysis
265
265
 
266
266
  ## VERBOSE METHODS
267
267
  def show_nts
268
- show = FALSE
269
- show = TRUE if $verbose && $verbose > 3
268
+ show = false
269
+ show = true if $verbose && $verbose > 3
270
270
  return show
271
271
  end
272
272
 
@@ -74,7 +74,7 @@ module FlnStats
74
74
  if !$1.nil?
75
75
  organism = $1
76
76
  else
77
- name =~ /(\w+ \w+) \(([\w ]+)\)/
77
+ name =~ /(\w+ \w+) \(([\w \/]+)\)/
78
78
  if !$1.nil?
79
79
  organism = $1
80
80
  end
@@ -610,4 +610,4 @@ module FlnStats
610
610
  html = '<div style="font-size:25px; margin: 10"><b>'+title+'</b></div>'
611
611
  return html
612
612
  end
613
- end
613
+ end
@@ -35,7 +35,7 @@ end
35
35
  def do_makeblastdb(seqs, output, dbtype)
36
36
  cmd="makeblastdb -in - -out #{output} -title #{File.basename(output)} -dbtype #{dbtype} -parse_seqids"
37
37
  IO.popen(cmd,'w+') {|makedb|
38
- makedb.sync = TRUE
38
+ makedb.sync = true
39
39
  makedb.write(seqs)
40
40
  makedb.close_write
41
41
  puts makedb.readlines
@@ -193,7 +193,7 @@ class MyWorker < ScbiMapreduce::Worker
193
193
 
194
194
 
195
195
  # ejecuta blast utilizando los parametros fichero de entrada, base de datos, tipo de blast y evalue
196
- def run_blast(input, database, blast_type, evalue, additional_blast_options, do_exonerate, filter = TRUE)
196
+ def run_blast(input, database, blast_type, evalue, additional_blast_options, do_exonerate, filter = true)
197
197
  if !input.empty? && !input.nil?
198
198
  $WORKER_LOG.info "DB: #{File.basename(database)} #{input.length}"
199
199
  blast = BatchBlast.new("-db #{database}", blast_type, "-evalue #{evalue} #{additional_blast_options}")
@@ -202,7 +202,7 @@ class MyWorker < ScbiMapreduce::Worker
202
202
  if @options[:hdd] #Write/parse blast on Disk
203
203
  file_name = file_path+'.blast' #Each blast is identified with database_name and first sequence's name on chunk
204
204
  if !File.exists?(file_name)
205
- blast_result = blast.do_blast_seqs(input, :table, TRUE, file_name)
205
+ blast_result = blast.do_blast_seqs(input, :table, true, file_name)
206
206
  else
207
207
  blast = nil
208
208
  blast_result=BlastTableResult.new(file_name)
@@ -223,8 +223,8 @@ class MyWorker < ScbiMapreduce::Worker
223
223
  end
224
224
 
225
225
  def rescue_sequence(e, seq, status)
226
- seq.save_fasta = FALSE
227
- seq.ignore = TRUE
226
+ seq.save_fasta = false
227
+ seq.ignore = true
228
228
  seq.type = FAILED
229
229
  puts '-- '+seq.seq_name+' FAILED ANALYSIS -- '+status,
230
230
  e.message,
@@ -232,7 +232,7 @@ class MyWorker < ScbiMapreduce::Worker
232
232
  end
233
233
 
234
234
  def check_ncRNA(check_seqs, ncrna_path, blast_type, evalue)
235
- my_blast = run_blast(check_seqs, ncrna_path, blast_type, evalue, '', FALSE, nil)
235
+ my_blast = run_blast(check_seqs, ncrna_path, blast_type, evalue, '', false, nil)
236
236
  if !my_blast.nil?
237
237
  check_seqs.each_with_index do |seq,i|
238
238
  find_nc_rna(seq, my_blast.querys[i])
@@ -280,7 +280,7 @@ class MyWorker < ScbiMapreduce::Worker
280
280
 
281
281
  if seq.type == FAILED
282
282
  seq.type = UNKNOWN
283
- seq.ignore = FALSE
283
+ seq.ignore = false
284
284
  else
285
285
  best_option.warnings(warning) if !warning.nil?
286
286
  end
@@ -28,7 +28,7 @@ class MyWorkerEst < MyWorker
28
28
  #####################################################################################
29
29
 
30
30
  def blastEST(array_seqs)
31
- blast = run_blast(array_seqs, @blast_path, 'blastn', 1e-6, nil, FALSE)
31
+ blast = run_blast(array_seqs, @blast_path, 'blastn', 1e-6, nil, false)
32
32
  if blast.nil?
33
33
  $LOG.info 'BLAST FAILED'
34
34
  Process.exit(-1)
@@ -444,7 +444,7 @@ class MyWorkerManagerFln < ScbiMapreduce::WorkManager
444
444
  @@stats_hash['coding'] += 1
445
445
  coding = select_orf(coding)
446
446
  if coding[1] == 'complete'
447
- seq.status = TRUE
447
+ seq.status = true
448
448
  @@stats_hash['coding_sure'] += 1
449
449
  else
450
450
  @@stats_hash['coding_putative'] += 1
@@ -16,7 +16,7 @@ class Sequence
16
16
  @seq_aa = nil # Protein sequence generated over unigen
17
17
  @db =nil
18
18
  @type = UNKNOWN # See types.rb
19
- @status = FALSE # TRUE => Sure, FALSE => Putative
19
+ @status = false # true => Sure, false => Putative
20
20
  @id = nil #Prot or EST id, can be several => array
21
21
  @warnings = []
22
22
  @annotations=[]
@@ -27,9 +27,9 @@ class Sequence
27
27
  @fpkm = []
28
28
  @coverage_analysis = []
29
29
 
30
- @area_without_annotation=FALSE
31
- @save_fasta=TRUE
32
- @ignore=FALSE
30
+ @area_without_annotation=false
31
+ @save_fasta=true
32
+ @ignore = false
33
33
  @hit=nil
34
34
  @t_code=0
35
35
  end
@@ -86,7 +86,7 @@ class Sequence
86
86
 
87
87
  def reset_classification
88
88
  @type = UNKNOWN
89
- @status = FALSE
89
+ @status = false
90
90
  end
91
91
 
92
92
  def clean_warnings
@@ -150,7 +150,7 @@ class Sequence
150
150
  def test_code(test_code)
151
151
  @t_code = test_code
152
152
  if @t_code >= 0.95
153
- @status = TRUE
153
+ @status = true
154
154
  end
155
155
  end
156
156
 
@@ -470,7 +470,7 @@ class Sequence
470
470
  upstream_annotation_space = hit.q_beg
471
471
  downstream_annotation_space = @fasta_length - hit.q_end
472
472
  if upstream_annotation_space >= 150 || downstream_annotation_space >= 150
473
- @area_without_annotation = TRUE
473
+ @area_without_annotation = true
474
474
  end
475
475
  return @area_without_annotation
476
476
  end
@@ -490,8 +490,8 @@ class Sequence
490
490
  end
491
491
 
492
492
  def unmapped?
493
- res = FALSE
494
- res = TRUE if !@coverage_analysis.empty? && @coverage_analysis[3] == 0 #3 => percentage of sequence covered by reads
493
+ res = false
494
+ res = true if !@coverage_analysis.empty? && @coverage_analysis[3] == 0 #3 => percentage of sequence covered by reads
495
495
  return res
496
496
  end
497
497
  end
@@ -165,7 +165,7 @@ class UneLosHit
165
165
  #if frame_ori < 0 && h.q_frame > 0 || frame_ori > 0 && h.q_frame < 0
166
166
  if h.q_frame < 0 # si la secuencia esta al reves le damos la vuelta
167
167
  query_fasta = reverse_seq(query_fasta_ori, h)
168
- h.reversed = TRUE
168
+ h.reversed = true
169
169
  end
170
170
  misma_id << h
171
171
  #end
@@ -176,17 +176,17 @@ class UneLosHit
176
176
  end
177
177
 
178
178
  def overlapping_hits?(hit)
179
- overlap = FALSE
179
+ overlap = false
180
180
  if @final_hit.q_end >= hit.q_beg && @final_hit.q_end < hit.q_end && @final_hit.q_end < hit.q_end
181
- overlap = TRUE
181
+ overlap = true
182
182
  end
183
183
  return overlap
184
184
  end
185
185
 
186
186
  def separated_hits?(hit)
187
- separated=FALSE
187
+ separated=false
188
188
  if @final_hit.q_end < hit.q_beg && hit.q_end > @final_hit.q_end
189
- separated = TRUE
189
+ separated = true
190
190
  end
191
191
  return separated
192
192
  end
@@ -1,3 +1,3 @@
1
1
  module FullLengtherNext
2
- VERSION = '0.9.9'
2
+ VERSION = '1.0.2'
3
3
  end
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: full_lengther_next
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.9
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pedro Seoane
8
8
  - Noe Fernandez
9
9
  - Dario Guerrero
10
- autorequire:
10
+ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2018-03-12 00:00:00.000000000 Z
13
+ date: 2022-09-05 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: xml-simple
@@ -83,7 +83,7 @@ dependencies:
83
83
  - !ruby/object:Gem::Version
84
84
  version: '0'
85
85
  - !ruby/object:Gem::Dependency
86
- name: bio-cd-hit-report
86
+ name: bio
87
87
  requirement: !ruby/object:Gem::Requirement
88
88
  requirements:
89
89
  - - ">="
@@ -189,6 +189,7 @@ files:
189
189
  - full_lengther_next.gemspec
190
190
  - lib/full_lengther_next.rb
191
191
  - lib/full_lengther_next/artifacts.rb
192
+ - lib/full_lengther_next/bio_patch.rb
192
193
  - lib/full_lengther_next/blast_functions.rb
193
194
  - lib/full_lengther_next/cdhit.rb
194
195
  - lib/full_lengther_next/chimeric_seqs.rb
@@ -220,7 +221,7 @@ homepage: https://github.com/seoanezonjic
220
221
  licenses:
221
222
  - MIT
222
223
  metadata: {}
223
- post_install_message:
224
+ post_install_message:
224
225
  rdoc_options: []
225
226
  require_paths:
226
227
  - lib
@@ -235,9 +236,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
235
236
  - !ruby/object:Gem::Version
236
237
  version: '0'
237
238
  requirements: []
238
- rubyforge_project:
239
- rubygems_version: 2.4.8
240
- signing_key:
239
+ rubygems_version: 3.3.7
240
+ signing_key:
241
241
  specification_version: 4
242
242
  summary: Tool to annotate transcriptomes and it is able to stablish the integrity
243
243
  of each transcript. Also, FLN can detect novel genes on a target organism.