full_lengther_next 1.0.1 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: b6c6ce15a48dc8c8cc6e90b45e20bb1089e9da41
4
- data.tar.gz: 89cb0aa3462686a99567e974b20ae9725bea5c81
2
+ SHA256:
3
+ metadata.gz: 312551b120e51195d03e2e785c3449379d722c04b7b78ed2bc38945768195231
4
+ data.tar.gz: 20e19848d4248e437567856e668c068e2dd7a66fc76590acf80aa9b09a8c316d
5
5
  SHA512:
6
- metadata.gz: e70ec49d6f970affab8e61a1e7caede3b218ab0c88765f867c2ca400ae5df614ecee0910f7c0faa668369c7b685c5dc3fcf99aaa682c3ef65e8f014d94f6b42e
7
- data.tar.gz: cd1a6bc5892128100d1e015b1e9d2e2ae48fa0f5c466782a33553bc35e36c2c3ca44b1eed60ce842c11f29501523944d83856bef5b2a5590150f1b1c732c2952
6
+ metadata.gz: 36bc55ace652cd5170aa0e56199d2a0e34a90d1dbaca02089a20b89d19ec5c59068d5697ba6922acc6f81c8b5f565471ab6840cb100b7559daa7ca12449f57e5
7
+ data.tar.gz: 80e8079acabc809143c2ea7481d05d0779cdd71708cc39c251d34ca9dc878d4be82114731b177204d5be3edb2461c72ac19a220e91fd00c43159434b14b59bf5
@@ -10,6 +10,7 @@ require 'scbi_zcat'
10
10
  require 'optparse'
11
11
  require 'cdhit'
12
12
  require 'handle_db'
13
+ require 'bio_patch'
13
14
 
14
15
  ##############################################################################################
15
16
  ## METHODS
@@ -69,10 +70,10 @@ def filtering_seqs(fasta_file, max_length, black_list)
69
70
  end
70
71
 
71
72
  def compare_list(string, list)
72
- res = FALSE
73
+ res = false
73
74
  list.each do |word|
74
75
  if string.include?(word)
75
- res = TRUE
76
+ res = true
76
77
  break
77
78
  end
78
79
  end
@@ -139,13 +140,13 @@ def filter_and_makeDB(formatted_db_path, dbtype, db_group, isoform_hash, prefix,
139
140
  end
140
141
 
141
142
  def complete?(uniprot_record)
142
- complete = TRUE
143
+ complete = true
143
144
  if uniprot_record.description.include?('Flags: Fragment') || #Discard non full length records
144
145
  uniprot_record.seq[0] != 'M' ||
145
146
  uniprot_record.seq.include?('XX') ||
146
147
  uniprot_record.ft.keys.include?('NON_TER') ||# The residue at an extremity of the sequence is not the terminal residue. If applied to position 1, this signifies that the first position is not the N-terminus of the complete molecule. If applied to the last position, it means that this position is not the C-terminus of the complete molecule. There is no description field for this key
147
148
  uniprot_record.ft.keys.include?('NON_CONS') # Non-consecutive residues. Indicates that two residues in a sequence are not consecutive and that there are a number of unreported or missing residues between them
148
- complete = FALSE
149
+ complete = false
149
150
  end
150
151
  return complete
151
152
  end
@@ -305,29 +306,29 @@ optparse = OptionParser.new do |opts|
305
306
  end
306
307
  end
307
308
 
308
- options[:no_download] = FALSE
309
+ options[:no_download] = false
309
310
  opts.on( '-d', '--no_download', 'Only parse downloaded files without download them again') do
310
- options[:no_download] = TRUE
311
+ options[:no_download] = true
311
312
  end
312
313
 
313
- options[:no_ncrna] = FALSE
314
+ options[:no_ncrna] = false
314
315
  opts.on( '-n', '--no_ncrna', 'No use ncrna sequences') do
315
- options[:no_ncrna] = TRUE
316
+ options[:no_ncrna] = true
316
317
  end
317
318
 
318
- options[:only_index] = FALSE
319
+ options[:only_index] = false
319
320
  opts.on( '-i', '--only_index', 'Build annotation index only without do blast DB') do
320
- options[:only_index] = TRUE
321
+ options[:only_index] = true
321
322
  end
322
323
 
323
- options[:no_trembl] = FALSE
324
+ options[:no_trembl] = false
324
325
  opts.on( '-t', '--no_trembl', 'No use trembl sequences') do
325
- options[:no_trembl] = TRUE
326
+ options[:no_trembl] = true
326
327
  end
327
328
 
328
- options[:all] = FALSE
329
+ options[:all] = false
329
330
  opts.on( '-a', '--all_sequences', 'Generate databases with all sequences') do
330
- options[:all] = TRUE
331
+ options[:all] = true
331
332
  end
332
333
 
333
334
  options[:cdhit] = 0
@@ -335,14 +336,14 @@ optparse = OptionParser.new do |opts|
335
336
  options[:cdhit] = cdhit.to_f
336
337
  end
337
338
 
338
- options[:no_uniprot] = FALSE
339
+ options[:no_uniprot] = false
339
340
  opts.on( '-p', '--no_uniprot', 'No use uniprot sequences') do
340
- options[:no_uniprot] = TRUE
341
+ options[:no_uniprot] = true
341
342
  end
342
343
 
343
- options[:passive_ftp] = FALSE
344
+ options[:passive_ftp] = false
344
345
  opts.on( '-P', '--passive_ftp', 'Use pasive ftp') do
345
- options[:passive_ftp] = TRUE
346
+ options[:passive_ftp] = true
346
347
  end
347
348
 
348
349
  # Set a banner, displayed at the top of the help screen.
@@ -364,19 +365,18 @@ optparse.parse!
364
365
  ## MAIN
365
366
  ##############################################################################################
366
367
 
367
-
368
- if ENV['BLASTDB'] && File.exists?(ENV['BLASTDB'])
369
- formatted_db_path = ENV['BLASTDB']
368
+ if !ENV['BLASTDB'].nil?
369
+ formatted_db_path = File.expand_path(ENV['BLASTDB'])
370
370
  else # otherwise use ROOTPATH + DB
371
371
  formatted_db_path = File.expand_path(File.join(ROOT_PATH, "blast_dbs"))
372
- Dir.mkdir(formatted_db_path)
373
372
  end
373
+ Dir.mkdir(formatted_db_path) if !File.exists?(formatted_db_path)
374
374
 
375
375
 
376
376
  ENV['BLASTDB'] = formatted_db_path
377
377
  puts "Databases will be downloaded at: #{ENV['BLASTDB']}"
378
378
  puts "\nTo set the path for storing databases, execute next line in your terminal or add it to your .bash_profile:\n\n\texport BLASTDB=/my_path/\n\n"
379
-
379
+ puts "Patched? #{Bio::UniProtKB.patched?}"
380
380
  download_ncrna(formatted_db_path, options[:no_download]) if !options[:no_ncrna]
381
381
 
382
382
  if !options[:no_download]
@@ -86,9 +86,9 @@ optparse = OptionParser.new do |opts|
86
86
  end
87
87
  end
88
88
 
89
- options[:exonerate] = TRUE
89
+ options[:exonerate] = true
90
90
  opts.on( '-e', '--exonerate', 'Disables exonerate analysis' ) do |exonerate|
91
- options[:exonerate] = FALSE
91
+ options[:exonerate] = false
92
92
  end
93
93
 
94
94
  options[:fasta] = nil
@@ -106,9 +106,9 @@ optparse = OptionParser.new do |opts|
106
106
  options[:ident] = ident.to_f
107
107
  end
108
108
 
109
- options[:high_clustering] = FALSE
109
+ options[:high_clustering] = false
110
110
  opts.on( '-k', '--high_clustering', 'Only for representative transcriptome. Add a clustering step using pfam ids. Default false' ) do
111
- options[:high_clustering] = TRUE
111
+ options[:high_clustering] = true
112
112
  end
113
113
 
114
114
  options[:subject_coverage] = 0.25
@@ -165,7 +165,7 @@ optparse = OptionParser.new do |opts|
165
165
  options[:user_db] = nil
166
166
  opts.on( '-u', '--user_db UserDB', 'User blast+ database' ) do |db|
167
167
  options[:user_db] = db
168
- if !File.exists?(File.expand_path(db+'.psq'))
168
+ if Dir.glob(File.expand_path(db+'*.psq')).empty?
169
169
  puts "user database: #{options[:user_db]} was not found"
170
170
  exit
171
171
  end
@@ -196,9 +196,9 @@ optparse = OptionParser.new do |opts|
196
196
  options[:training_ident] = ident.to_f
197
197
  end
198
198
 
199
- options[:hdd] = FALSE
199
+ options[:hdd] = false
200
200
  opts.on( '-z', '--hdd', 'Write/use blast report on HDD' ) do |hdd|
201
- options[:hdd] = TRUE
201
+ options[:hdd] = true
202
202
  end
203
203
 
204
204
 
@@ -207,9 +207,9 @@ optparse = OptionParser.new do |opts|
207
207
  options[:files2map] = files2map.split(';').map{|map_files| map_files.split(',')}
208
208
  end
209
209
 
210
- options[:remove_unmapped] = TRUE
210
+ options[:remove_unmapped] = true
211
211
  opts.on('-R', '--remove_unmapped', 'When fastq files are provided, all sequences without at least a read pair are removed. When this option is enabled this filtering is disabled' ) do
212
- options[:remove_unmapped] = FALSE
212
+ options[:remove_unmapped] = false
213
213
  end
214
214
 
215
215
  # Set a banner, displayed at the top of the help screen.
@@ -268,8 +268,8 @@ if !File.exists?(ncrna_path) && options[:acess_db].include?('c')
268
268
  end
269
269
 
270
270
  if options[:acess_db].include?('s') || options[:acess_db].include?('t')
271
- sp_path=File.join(ENV['BLASTDB'],"sp_#{options[:tax_group]}","sp_#{options[:tax_group]}.psq")
272
- if !File.exists?(sp_path)
271
+ sp_path=File.join(ENV['BLASTDB'],"sp_#{options[:tax_group]}","sp_#{options[:tax_group]}*.psq")
272
+ if Dir.glob(sp_path).empty?
273
273
  puts "DB File #{sp_path} doesn't exists, or"
274
274
  puts "incorrect taxon group name: #{options[:tax_group]} choose:"
275
275
  puts optparse.help
@@ -120,29 +120,29 @@ optparse = OptionParser.new do |opts|
120
120
  options[:duplicate] = duplicate.to_i
121
121
  end
122
122
 
123
- options[:split]= FALSE
123
+ options[:split]= false
124
124
  opts.on( '-s', '--split', 'Split sequences in each case') do
125
125
  options[:duplicate] = 3
126
126
  end
127
127
 
128
- options[:chim]= TRUE
128
+ options[:chim]= true
129
129
  opts.on( '-c', '--chim', 'Make sequence set of chimeras') do
130
- options[:chim] = FALSE
130
+ options[:chim] = false
131
131
  end
132
132
 
133
- options[:indel]= TRUE
133
+ options[:indel]= true
134
134
  opts.on( '-i', '--indel', 'Make sequence set of indels') do
135
- options[:indel] = FALSE
135
+ options[:indel] = false
136
136
  end
137
137
 
138
- options[:pair]= TRUE
138
+ options[:pair]= true
139
139
  opts.on( '-p', '--pair', 'Make sequence set of paired') do
140
- options[:pair] = FALSE
140
+ options[:pair] = false
141
141
  end
142
142
 
143
- options[:trim]= TRUE
143
+ options[:trim]= true
144
144
  opts.on( '-t', '--trim', 'Make sequence set of trimmed') do
145
- options[:trim] = FALSE
145
+ options[:trim] = false
146
146
  end
147
147
 
148
148
  # Set a banner, displayed at the top of the help screen.
data/bin/make_user_db.rb CHANGED
@@ -63,9 +63,9 @@ optparse = OptionParser.new do |opts|
63
63
  options[:name] = name
64
64
  end
65
65
 
66
- options[:local] = FALSE
66
+ options[:local] = false
67
67
  opts.on( '-l', '--local', 'Only parse downloaded files without download them again') do
68
- options[:local] = TRUE
68
+ options[:local] = true
69
69
  end
70
70
 
71
71
  options[:user_fasta] = nil
@@ -32,7 +32,8 @@ Gem::Specification.new do |spec|
32
32
  spec.add_runtime_dependency 'scbi_blast'
33
33
  spec.add_runtime_dependency 'scbi_mapreduce'
34
34
  spec.add_runtime_dependency 'scbi_zcat'
35
- spec.add_runtime_dependency 'bio-cd-hit-report'
35
+ spec.add_runtime_dependency 'bio'
36
+ #spec.add_runtime_dependency 'bio-cd-hit-report' # Removed due to conflicts with bio-ruby2. This gem depends on bio ruby 1.4.3. cdhit options disabled
36
37
  spec.add_runtime_dependency 'report_html'
37
38
 
38
39
 
@@ -7,11 +7,11 @@ include ChimericSeqs
7
7
  ## MAIN FUNCTION
8
8
  #####################################################################
9
9
  def artifact?(seq, query, db_name, db_path, options, new_seqs)
10
- artifact = FALSE
10
+ artifact = false
11
11
  # UNMAPPED CONTIG DETECTION
12
12
  if query.nil? && seq.unmapped? #If seq is misassembled stop chimera analisys
13
13
  seq.hit = nil
14
- artifact = TRUE
14
+ artifact = true
15
15
  seq.type = UNMAPPED
16
16
  end
17
17
 
@@ -19,7 +19,7 @@ def artifact?(seq, query, db_name, db_path, options, new_seqs)
19
19
  # MISASSEMBLED DETECTION
20
20
  if !artifact && misassembled_detection(query) #If seq is misassembled stop chimera analisys
21
21
  seq.hit = query.hits.first
22
- artifact = TRUE
22
+ artifact = true
23
23
  seq.type = MISASSEMBLED
24
24
  seq.warnings('ERROR#1')
25
25
  end
@@ -35,7 +35,7 @@ def artifact?(seq, query, db_name, db_path, options, new_seqs)
35
35
  else
36
36
  seq.hit = query.hits.first
37
37
  end
38
- artifact = TRUE
38
+ artifact = true
39
39
  seq.type = OTHER
40
40
  seq.warnings('ERROR#2')
41
41
  end
@@ -55,7 +55,7 @@ def artifact?(seq, query, db_name, db_path, options, new_seqs)
55
55
  new_seqs.concat(chimera)
56
56
  seq.db_name = db_name
57
57
  seq.type = CHIMERA
58
- artifact = TRUE
58
+ artifact = true
59
59
  end
60
60
  end
61
61
  end
@@ -64,8 +64,8 @@ def artifact?(seq, query, db_name, db_path, options, new_seqs)
64
64
  puts seq.prot_annot_calification
65
65
  end
66
66
  seq.db_name = db_name
67
- seq.save_fasta = FALSE
68
- seq.ignore = TRUE
67
+ seq.save_fasta = false
68
+ seq.ignore = true
69
69
  end
70
70
  return artifact
71
71
  end
@@ -0,0 +1,93 @@
1
+ module Bio
2
+ class UniProtKB
3
+ def self.patched?
4
+ return true
5
+ end
6
+
7
+ def ft(feature_key = nil)
8
+ return ft[feature_key] if feature_key
9
+ return @data['FT'] if @data['FT']
10
+
11
+ table = []
12
+ begin
13
+ get('FT').split("\n").each do |line|
14
+ if line =~ /^FT \w/
15
+ feature = line.chomp.ljust(74)
16
+ table << [feature[ 5..12].strip, # Feature Name
17
+ feature[14..19].strip, # From
18
+ feature[21..26].strip, # To
19
+ feature[34..74].strip ] # Description
20
+ else
21
+ table.last << line.chomp.sub!(/^FT +/, '')
22
+ end
23
+ end
24
+
25
+ # Joining Description lines
26
+ table = table.map { |feature|
27
+ ftid = feature.pop if feature.last =~ /FTId=/
28
+ if feature.size > 4
29
+ feature = [feature[0],
30
+ feature[1],
31
+ feature[2],
32
+ feature[3, feature.size - 3].join(" ")]
33
+ end
34
+ feature << if ftid then ftid else '' end
35
+ }
36
+
37
+ ###### PATCH TO RECOVER PARSER
38
+ to_delete = []
39
+ table.each_with_index do |feature, i|
40
+ name, from, to, descrition = feature
41
+ if from.empty?
42
+ coors = to.split("..")
43
+ if coors.length == 2
44
+ feature[1] = coors[0]
45
+ feature[2] = coors[1]
46
+ elsif /[^\d]/ =~ to
47
+ to_delete << i
48
+ else
49
+ feature[1] = to
50
+ feature[2] = to
51
+ end
52
+ end
53
+ end
54
+ to_delete.reverse_each{|i| table.delete_at(i)}
55
+ #####
56
+
57
+ hash = {}
58
+ table.each do |feature|
59
+ hash[feature[0]] = [] unless hash[feature[0]]
60
+ hash[feature[0]] << {
61
+ # Removing '<', '>' or '?' in FROM/TO endopoint.
62
+ 'From' => feature[1].sub(/\D/, '').to_i,
63
+ 'To' => feature[2].sub(/\D/, '').to_i,
64
+ 'Description' => feature[3],
65
+ 'FTId' => feature[4].to_s.sub(/\/FTId=/, '').sub(/\.$/, ''),
66
+ 'diff' => [],
67
+ 'original' => feature
68
+ }
69
+
70
+ case feature[0]
71
+ when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
72
+ case hash[feature[0]].last['Description']
73
+ when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
74
+ original_res = $1
75
+ changed_res = $2
76
+ original_res = original_res.gsub(/ /,'').strip
77
+ chenged_res = changed_res.gsub(/ /,'').strip
78
+ when /Missing/i
79
+ original_res = seq.subseq(hash[feature[0]].last['From'],
80
+ hash[feature[0]].last['To'])
81
+ changed_res = ''
82
+ end
83
+ hash[feature[0]].last['diff'] = [original_res, chenged_res]
84
+ end
85
+ end
86
+ rescue
87
+ raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
88
+ end
89
+
90
+ @data['FT'] = hash
91
+ end
92
+ end
93
+ end
@@ -105,31 +105,31 @@ def set_thresold_evalue(hits)
105
105
  end
106
106
 
107
107
  def same_subject_hsp(hit, second_hit)
108
- same = FALSE
108
+ same = false
109
109
  if hit.acc == second_hit.acc
110
110
  if hit.s_beg <= second_hit.s_beg && hit.s_end >= hit.s_end && (second_hit.s_beg - hit.s_end).abs > 1
111
- same = TRUE
111
+ same = true
112
112
  end
113
113
  end
114
114
  return same
115
115
  end
116
116
 
117
117
  def same_query_hsp(hit, second_hit)
118
- same = FALSE
118
+ same = false
119
119
  if hit.acc == second_hit.acc
120
120
  if hit.q_beg <= second_hit.q_beg && hit.q_end >= hit.q_end && (second_hit.q_beg - hit.q_end).abs > 1
121
- same = TRUE
121
+ same = true
122
122
  end
123
123
  end
124
124
  return same
125
125
  end
126
126
 
127
127
  def same_sense?(hit, second_hit)
128
- same= FALSE
128
+ same= false
129
129
  hit_sense = hit.q_frame <=> 0
130
130
  second_hit_sense = second_hit.q_frame <=> 0
131
131
  if hit_sense == second_hit_sense
132
- same = TRUE
132
+ same = true
133
133
  end
134
134
  return same
135
135
  end
@@ -158,7 +158,7 @@ def clean_by_query_length_match(blast_result, min_len_nt)
158
158
  end
159
159
 
160
160
 
161
- def clean_overlapping_hsps(blast_result, keep_if_diff_sense = FALSE)
161
+ def clean_overlapping_hsps(blast_result, keep_if_diff_sense = false)
162
162
  blast_result.querys.each do |query|
163
163
  if query.hits.length > 1
164
164
  query.hits.each_with_index do |hit, j|
@@ -190,7 +190,7 @@ end
190
190
  #####################################################################
191
191
 
192
192
  def misassembled_detection(query)
193
- miss=FALSE
193
+ miss=false
194
194
  hits = cluster_hsps(query.hits)
195
195
  misassembled_hits = []
196
196
  hits.each do |hit|
@@ -202,7 +202,7 @@ def misassembled_detection(query)
202
202
  end
203
203
  end
204
204
  if misassembled_hits.length*1.0/ hits.length > 0.5
205
- miss = TRUE
205
+ miss = true
206
206
  else #Remove missassembled hits to avoid broken analysis
207
207
  query.hits.reverse_each do |hsp|
208
208
  if misassembled_hits.include?(hsp.acc)
@@ -214,16 +214,16 @@ def misassembled_detection(query)
214
214
  end
215
215
 
216
216
  def multiple_hsps(query, num)
217
- multiple = FALSE
217
+ multiple = false
218
218
  hsps = query.hits.select{|h| h.acc == query.hits.first.acc}
219
219
  if hsps.length >= num
220
- multiple = TRUE
220
+ multiple = true
221
221
  end
222
222
  return multiple
223
223
  end
224
224
 
225
225
  def overlapping_hsps_on_subject(query)
226
- overlapping = FALSE
226
+ overlapping = false
227
227
  current_hit = query.hits.first.acc
228
228
  complete_hit = []
229
229
  cleaned_hits = []
@@ -252,16 +252,16 @@ def clean_subject_overlapping_hsps(complete_hit, cleaned_hits)
252
252
  end
253
253
 
254
254
  def subject_overlapping_hsps(hit)
255
- overlapping = FALSE
255
+ overlapping = false
256
256
  hsp_table = hsps_relationship_subject(hit)
257
257
  if !hsp_table.empty?
258
258
  hit = clean_hsp_by_identity(hit, 55)
259
259
  if hit.empty?
260
- overlapping = TRUE
260
+ overlapping = true
261
261
  else
262
262
  hsp_table = hsps_relationship_subject(hit)
263
263
  if !hsp_table.empty?
264
- overlapping = TRUE
264
+ overlapping = true
265
265
  end
266
266
  end
267
267
  end
@@ -286,10 +286,10 @@ def hsps_relationship_subject(hit)
286
286
  end
287
287
 
288
288
  def same_subject_hsp(hit, second_hit)
289
- same = FALSE
289
+ same = false
290
290
  if hit.acc == second_hit.acc
291
291
  if hit.s_beg <= second_hit.s_beg && hit.s_end >= hit.s_end && (second_hit.s_beg - hit.s_end).abs > 1
292
- same = TRUE
292
+ same = true
293
293
  end
294
294
  end
295
295
  return same
@@ -59,8 +59,8 @@ class Cdhit
59
59
  if master_seq.db != 'sp'
60
60
  sp_seq=get_sp(cluster)
61
61
  if !sp_seq.nil?
62
- cluster.map{|seq| seq.master=FALSE}
63
- sp_seq.master=TRUE
62
+ cluster.map{|seq| seq.master=false}
63
+ sp_seq.master= true
64
64
  end
65
65
  end
66
66
  }
@@ -109,7 +109,7 @@ class Cdhit
109
109
 
110
110
 
111
111
  def cd_hit_clusters(clust_file)
112
- require 'bio-cd-hit-report'
112
+ #require 'bio-cd-hit-report'
113
113
  report = Bio::CdHitReport.new(clust_file)
114
114
  report.each_cluster do |cluster|
115
115
  clust=[]
@@ -128,9 +128,9 @@ class Cdhit
128
128
  member.gsub!('>','')
129
129
  fields = member.split(',')
130
130
  data = fields[1].split(' ',2)
131
- master = FALSE
131
+ master = false
132
132
  if data[1] == '*'
133
- master = TRUE
133
+ master = true
134
134
  end
135
135
  return data[0],master
136
136
  end
@@ -110,8 +110,8 @@ module ChimericSeqs
110
110
  seq_bak.clean_warnings
111
111
  seq_bak.seq_name += "_split_#{hit_position}"
112
112
  seq_bak.clean_orfs
113
- seq_bak.save_fasta = TRUE
114
- seq_bak.ignore = FALSE
113
+ seq_bak.save_fasta = true
114
+ seq_bak.ignore = false
115
115
 
116
116
  # Cut sequence and move hit/hsps limits
117
117
  #----------------------------------------
@@ -244,10 +244,10 @@ module ChimericSeqs
244
244
  end
245
245
 
246
246
  def hit_is_in?(h_beg, h_end, hit)
247
- is=FALSE
247
+ is=false
248
248
  # CONTIENE #OVERLAP
249
249
  if h_beg <= hit[BEG] && h_end > hit[BEG] || hit[BEG] <= h_beg && hit[STOP] > h_beg
250
- is=TRUE
250
+ is=true
251
251
  end
252
252
  return is
253
253
  end
@@ -324,7 +324,7 @@ module ChimericSeqs
324
324
  cmd='clustalo -i - -o /dev/null --percent-id --full --distmat-out=/dev/stdout --force'
325
325
  clustal_matrix = nil
326
326
  IO.popen(cmd,'w+') {|clustal|
327
- clustal.sync = TRUE
327
+ clustal.sync = true
328
328
  clustal.write(seq_fasta)
329
329
  clustal.close_write
330
330
  clustal_matrix = clustal.readlines
@@ -101,7 +101,7 @@ module CommonFunctions
101
101
  hit.q_frame = -hit.q_frame
102
102
  hit.q_end = query_fasta.length - 1 - hit.q_end
103
103
  hit.q_beg = query_fasta.length - 1 - hit.q_beg
104
- hit.reversed = TRUE
104
+ hit.reversed = true
105
105
  query_fasta = query_fasta.complementary_dna # ESTO REALMENTE HACE LA REVERSO COMPLEMENTARIA.
106
106
  if hit.class.to_s == 'ExoBlastHit'
107
107
  hit.q_frameshift.map!{|position, num_nts|
@@ -39,7 +39,7 @@ end
39
39
  class ExonerateResult
40
40
 
41
41
  # Parser initialization
42
- def initialize(input, seqs= nil, query_seqs = nil, all = TRUE)
42
+ def initialize(input, seqs= nil, query_seqs = nil, all = true)
43
43
  @querys = []
44
44
  @seqs = seqs #unigenes
45
45
  @prot_seqs = query_seqs#prot
@@ -106,8 +106,8 @@ class ExonerateResult
106
106
 
107
107
  #this method only works fine with --model protein2dna parameter of exonerate
108
108
  def hiting(features, tags, query) #Convierte las coordenadas relativas del exonerate a absolutas tipo blast, definiendo solo los hits
109
- do_align = FALSE
110
- do_align = TRUE if !@prot_seqs.nil? && !@seqs.nil?
109
+ do_align = false
110
+ do_align = true if !@prot_seqs.nil? && !@seqs.nil?
111
111
  start_target = features['target_start_align']#Unigen
112
112
  start_query = features['query_start_align'] #proteina
113
113
  ends_target = features['target_end_align']
@@ -143,7 +143,7 @@ class ExonerateResult
143
143
  target_alignment << target_seq[counter_target, tag[TARGET]].translate
144
144
  end
145
145
  if tag[OPERATION] == 'F'
146
- if tag[TARGET] > 0 && tag[TARGET] < 3 #TRUE FRAMESHIFT
146
+ if tag[TARGET] > 0 && tag[TARGET] < 3 #true FRAMESHIFT
147
147
  gap_shift += 1
148
148
  if tags[n_operation+1][OPERATION] != 'G' #there are frameshift that not insert a gap, we do it
149
149
  query_alignment << '-' if do_align
@@ -203,7 +203,7 @@ class ExonerateResult
203
203
  def define_hit_parameters(hit, features, tags)
204
204
  hit.gaps = 0
205
205
  tags.map{|aln| hit.gaps += 1 if aln[0] == 'G'}
206
- hit.reversed = FALSE
206
+ hit.reversed = false
207
207
  hit.align_len =(features['query_end_align'] - features['query_start_align']).abs+1
208
208
  hit.mismatches=0
209
209
  hit.e_val=0
@@ -171,9 +171,9 @@ module FlAnalysis
171
171
  end
172
172
 
173
173
  if atg_status == 'putative' || end_status == 'putative'
174
- status = FALSE # Putative
174
+ status = false # Putative
175
175
  else
176
- status = TRUE # Sure
176
+ status = true # Sure
177
177
  end
178
178
 
179
179
  return type, status
@@ -187,7 +187,7 @@ module FlAnalysis
187
187
  $global_warnings << ['SeqShorter', final_prot.length, final_hit.s_len]
188
188
  if final_prot.length + 100 < final_hit.s_len || final_prot.length*2 < final_hit.s_len
189
189
  if type == COMPLETE
190
- status = FALSE
190
+ status = false
191
191
  $global_warnings << 'VeryShorter'
192
192
  end
193
193
  end
@@ -209,7 +209,7 @@ module FlAnalysis
209
209
  $global_warnings = [] # Clean all warnings for current sequence
210
210
  seq.seq_nt = mark_nt_seqs(final_hit, query_fasta)
211
211
  if type == COMPLETE
212
- seq.ignore = TRUE
212
+ seq.ignore = true
213
213
  end
214
214
  end
215
215
  if $verbose > 2
@@ -265,8 +265,8 @@ module FlAnalysis
265
265
 
266
266
  ## VERBOSE METHODS
267
267
  def show_nts
268
- show = FALSE
269
- show = TRUE if $verbose && $verbose > 3
268
+ show = false
269
+ show = true if $verbose && $verbose > 3
270
270
  return show
271
271
  end
272
272
 
@@ -74,7 +74,7 @@ module FlnStats
74
74
  if !$1.nil?
75
75
  organism = $1
76
76
  else
77
- name =~ /(\w+ \w+) \(([\w ]+)\)/
77
+ name =~ /(\w+ \w+) \(([\w \/]+)\)/
78
78
  if !$1.nil?
79
79
  organism = $1
80
80
  end
@@ -610,4 +610,4 @@ module FlnStats
610
610
  html = '<div style="font-size:25px; margin: 10"><b>'+title+'</b></div>'
611
611
  return html
612
612
  end
613
- end
613
+ end
@@ -35,7 +35,7 @@ end
35
35
  def do_makeblastdb(seqs, output, dbtype)
36
36
  cmd="makeblastdb -in - -out #{output} -title #{File.basename(output)} -dbtype #{dbtype} -parse_seqids"
37
37
  IO.popen(cmd,'w+') {|makedb|
38
- makedb.sync = TRUE
38
+ makedb.sync = true
39
39
  makedb.write(seqs)
40
40
  makedb.close_write
41
41
  puts makedb.readlines
@@ -193,7 +193,7 @@ class MyWorker < ScbiMapreduce::Worker
193
193
 
194
194
 
195
195
  # ejecuta blast utilizando los parametros fichero de entrada, base de datos, tipo de blast y evalue
196
- def run_blast(input, database, blast_type, evalue, additional_blast_options, do_exonerate, filter = TRUE)
196
+ def run_blast(input, database, blast_type, evalue, additional_blast_options, do_exonerate, filter = true)
197
197
  if !input.empty? && !input.nil?
198
198
  $WORKER_LOG.info "DB: #{File.basename(database)} #{input.length}"
199
199
  blast = BatchBlast.new("-db #{database}", blast_type, "-evalue #{evalue} #{additional_blast_options}")
@@ -202,7 +202,7 @@ class MyWorker < ScbiMapreduce::Worker
202
202
  if @options[:hdd] #Write/parse blast on Disk
203
203
  file_name = file_path+'.blast' #Each blast is identified with database_name and first sequence's name on chunk
204
204
  if !File.exists?(file_name)
205
- blast_result = blast.do_blast_seqs(input, :table, TRUE, file_name)
205
+ blast_result = blast.do_blast_seqs(input, :table, true, file_name)
206
206
  else
207
207
  blast = nil
208
208
  blast_result=BlastTableResult.new(file_name)
@@ -223,8 +223,8 @@ class MyWorker < ScbiMapreduce::Worker
223
223
  end
224
224
 
225
225
  def rescue_sequence(e, seq, status)
226
- seq.save_fasta = FALSE
227
- seq.ignore = TRUE
226
+ seq.save_fasta = false
227
+ seq.ignore = true
228
228
  seq.type = FAILED
229
229
  puts '-- '+seq.seq_name+' FAILED ANALYSIS -- '+status,
230
230
  e.message,
@@ -232,7 +232,7 @@ class MyWorker < ScbiMapreduce::Worker
232
232
  end
233
233
 
234
234
  def check_ncRNA(check_seqs, ncrna_path, blast_type, evalue)
235
- my_blast = run_blast(check_seqs, ncrna_path, blast_type, evalue, '', FALSE, nil)
235
+ my_blast = run_blast(check_seqs, ncrna_path, blast_type, evalue, '', false, nil)
236
236
  if !my_blast.nil?
237
237
  check_seqs.each_with_index do |seq,i|
238
238
  find_nc_rna(seq, my_blast.querys[i])
@@ -280,7 +280,7 @@ class MyWorker < ScbiMapreduce::Worker
280
280
 
281
281
  if seq.type == FAILED
282
282
  seq.type = UNKNOWN
283
- seq.ignore = FALSE
283
+ seq.ignore = false
284
284
  else
285
285
  best_option.warnings(warning) if !warning.nil?
286
286
  end
@@ -28,7 +28,7 @@ class MyWorkerEst < MyWorker
28
28
  #####################################################################################
29
29
 
30
30
  def blastEST(array_seqs)
31
- blast = run_blast(array_seqs, @blast_path, 'blastn', 1e-6, nil, FALSE)
31
+ blast = run_blast(array_seqs, @blast_path, 'blastn', 1e-6, nil, false)
32
32
  if blast.nil?
33
33
  $LOG.info 'BLAST FAILED'
34
34
  Process.exit(-1)
@@ -444,7 +444,7 @@ class MyWorkerManagerFln < ScbiMapreduce::WorkManager
444
444
  @@stats_hash['coding'] += 1
445
445
  coding = select_orf(coding)
446
446
  if coding[1] == 'complete'
447
- seq.status = TRUE
447
+ seq.status = true
448
448
  @@stats_hash['coding_sure'] += 1
449
449
  else
450
450
  @@stats_hash['coding_putative'] += 1
@@ -16,7 +16,7 @@ class Sequence
16
16
  @seq_aa = nil # Protein sequence generated over unigen
17
17
  @db =nil
18
18
  @type = UNKNOWN # See types.rb
19
- @status = FALSE # TRUE => Sure, FALSE => Putative
19
+ @status = false # true => Sure, false => Putative
20
20
  @id = nil #Prot or EST id, can be several => array
21
21
  @warnings = []
22
22
  @annotations=[]
@@ -27,9 +27,9 @@ class Sequence
27
27
  @fpkm = []
28
28
  @coverage_analysis = []
29
29
 
30
- @area_without_annotation=FALSE
31
- @save_fasta=TRUE
32
- @ignore=FALSE
30
+ @area_without_annotation=false
31
+ @save_fasta=true
32
+ @ignore = false
33
33
  @hit=nil
34
34
  @t_code=0
35
35
  end
@@ -86,7 +86,7 @@ class Sequence
86
86
 
87
87
  def reset_classification
88
88
  @type = UNKNOWN
89
- @status = FALSE
89
+ @status = false
90
90
  end
91
91
 
92
92
  def clean_warnings
@@ -150,7 +150,7 @@ class Sequence
150
150
  def test_code(test_code)
151
151
  @t_code = test_code
152
152
  if @t_code >= 0.95
153
- @status = TRUE
153
+ @status = true
154
154
  end
155
155
  end
156
156
 
@@ -470,7 +470,7 @@ class Sequence
470
470
  upstream_annotation_space = hit.q_beg
471
471
  downstream_annotation_space = @fasta_length - hit.q_end
472
472
  if upstream_annotation_space >= 150 || downstream_annotation_space >= 150
473
- @area_without_annotation = TRUE
473
+ @area_without_annotation = true
474
474
  end
475
475
  return @area_without_annotation
476
476
  end
@@ -490,8 +490,8 @@ class Sequence
490
490
  end
491
491
 
492
492
  def unmapped?
493
- res = FALSE
494
- res = TRUE if !@coverage_analysis.empty? && @coverage_analysis[3] == 0 #3 => percentage of sequence covered by reads
493
+ res = false
494
+ res = true if !@coverage_analysis.empty? && @coverage_analysis[3] == 0 #3 => percentage of sequence covered by reads
495
495
  return res
496
496
  end
497
497
  end
@@ -165,7 +165,7 @@ class UneLosHit
165
165
  #if frame_ori < 0 && h.q_frame > 0 || frame_ori > 0 && h.q_frame < 0
166
166
  if h.q_frame < 0 # si la secuencia esta al reves le damos la vuelta
167
167
  query_fasta = reverse_seq(query_fasta_ori, h)
168
- h.reversed = TRUE
168
+ h.reversed = true
169
169
  end
170
170
  misma_id << h
171
171
  #end
@@ -176,17 +176,17 @@ class UneLosHit
176
176
  end
177
177
 
178
178
  def overlapping_hits?(hit)
179
- overlap = FALSE
179
+ overlap = false
180
180
  if @final_hit.q_end >= hit.q_beg && @final_hit.q_end < hit.q_end && @final_hit.q_end < hit.q_end
181
- overlap = TRUE
181
+ overlap = true
182
182
  end
183
183
  return overlap
184
184
  end
185
185
 
186
186
  def separated_hits?(hit)
187
- separated=FALSE
187
+ separated=false
188
188
  if @final_hit.q_end < hit.q_beg && hit.q_end > @final_hit.q_end
189
- separated = TRUE
189
+ separated = true
190
190
  end
191
191
  return separated
192
192
  end
@@ -1,3 +1,3 @@
1
1
  module FullLengtherNext
2
- VERSION = '1.0.1'
2
+ VERSION = '1.0.4'
3
3
  end
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: full_lengther_next
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pedro Seoane
8
8
  - Noe Fernandez
9
9
  - Dario Guerrero
10
- autorequire:
10
+ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2018-03-12 00:00:00.000000000 Z
13
+ date: 2022-09-07 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: xml-simple
@@ -83,7 +83,7 @@ dependencies:
83
83
  - !ruby/object:Gem::Version
84
84
  version: '0'
85
85
  - !ruby/object:Gem::Dependency
86
- name: bio-cd-hit-report
86
+ name: bio
87
87
  requirement: !ruby/object:Gem::Requirement
88
88
  requirements:
89
89
  - - ">="
@@ -163,12 +163,10 @@ email:
163
163
  - noeisneo@gmail.com
164
164
  - dariogf@gmail.com
165
165
  executables:
166
- - console
167
166
  - download_fln_dbs.rb
168
167
  - full_lengther_next
169
168
  - make_test_dataset.rb
170
169
  - make_user_db.rb
171
- - setup
172
170
  extensions: []
173
171
  extra_rdoc_files: []
174
172
  files:
@@ -180,15 +178,14 @@ files:
180
178
  - LICENSE.txt
181
179
  - README.md
182
180
  - Rakefile
183
- - bin/console
184
181
  - bin/download_fln_dbs.rb
185
182
  - bin/full_lengther_next
186
183
  - bin/make_test_dataset.rb
187
184
  - bin/make_user_db.rb
188
- - bin/setup
189
185
  - full_lengther_next.gemspec
190
186
  - lib/full_lengther_next.rb
191
187
  - lib/full_lengther_next/artifacts.rb
188
+ - lib/full_lengther_next/bio_patch.rb
192
189
  - lib/full_lengther_next/blast_functions.rb
193
190
  - lib/full_lengther_next/cdhit.rb
194
191
  - lib/full_lengther_next/chimeric_seqs.rb
@@ -220,7 +217,7 @@ homepage: https://github.com/seoanezonjic
220
217
  licenses:
221
218
  - MIT
222
219
  metadata: {}
223
- post_install_message:
220
+ post_install_message:
224
221
  rdoc_options: []
225
222
  require_paths:
226
223
  - lib
@@ -235,9 +232,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
235
232
  - !ruby/object:Gem::Version
236
233
  version: '0'
237
234
  requirements: []
238
- rubyforge_project:
239
- rubygems_version: 2.4.8
240
- signing_key:
235
+ rubygems_version: 3.3.7
236
+ signing_key:
241
237
  specification_version: 4
242
238
  summary: Tool to annotate transcriptomes and it is able to stablish the integrity
243
239
  of each transcript. Also, FLN can detect novel genes on a target organism.
data/bin/console DELETED
@@ -1,14 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require "bundler/setup"
4
- require "full_lengther_next"
5
-
6
- # You can add fixtures and/or initialization code here to make experimenting
7
- # with your gem easier. You can also use a different console, if you like.
8
-
9
- # (If you use this, don't forget to add pry to your Gemfile!)
10
- # require "pry"
11
- # Pry.start
12
-
13
- require "irb"
14
- IRB.start
data/bin/setup DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
- IFS=$'\n\t'
4
- set -vx
5
-
6
- bundle install
7
-
8
- # Do any other automated setup that you need to do here