full_lengther_next 0.6.2 → 0.9.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +5 -0
  5. data/CODE_OF_CONDUCT.md +49 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +21 -0
  8. data/{README.rdoc → README.md} +0 -0
  9. data/Rakefile +6 -37
  10. data/bin/console +14 -0
  11. data/bin/download_fln_dbs.rb +2 -7
  12. data/bin/full_lengther_next +85 -6
  13. data/bin/make_user_db.rb +13 -5
  14. data/bin/setup +8 -0
  15. data/full_lengther_next.gemspec +42 -0
  16. data/lib/full_lengther_next.rb +2 -10
  17. data/lib/full_lengther_next/artifacts.rb +74 -0
  18. data/lib/full_lengther_next/{classes/blast_functions.rb → blast_functions.rb} +0 -0
  19. data/lib/full_lengther_next/{classes/cdhit.rb → cdhit.rb} +0 -0
  20. data/lib/full_lengther_next/{classes/chimeric_seqs.rb → chimeric_seqs.rb} +0 -0
  21. data/lib/full_lengther_next/{classes/common_functions.rb → common_functions.rb} +0 -0
  22. data/lib/full_lengther_next/{classes/exonerate_result.rb → exonerate_result.rb} +0 -0
  23. data/lib/full_lengther_next/{classes/fl_analysis.rb → fl_analysis.rb} +0 -0
  24. data/lib/full_lengther_next/{classes/fl_string_utils.rb → fl_string_utils.rb} +0 -0
  25. data/lib/full_lengther_next/fln_stats.rb +613 -0
  26. data/lib/full_lengther_next/go_methods.rb +42 -0
  27. data/lib/full_lengther_next/{classes/handle_db.rb → handle_db.rb} +0 -0
  28. data/lib/full_lengther_next/mapping.rb +296 -0
  29. data/lib/full_lengther_next/{classes/my_worker.rb → my_worker.rb} +71 -9
  30. data/lib/full_lengther_next/{classes/my_worker_EST.rb → my_worker_EST.rb} +0 -0
  31. data/lib/full_lengther_next/{classes/my_worker_manager_EST.rb → my_worker_manager_EST.rb} +0 -0
  32. data/lib/full_lengther_next/{classes/my_worker_manager_fln.rb → my_worker_manager_fln.rb} +181 -16
  33. data/lib/full_lengther_next/{classes/nc_rna.rb → nc_rna.rb} +0 -0
  34. data/lib/full_lengther_next/{classes/orf.rb → orf.rb} +0 -0
  35. data/lib/full_lengther_next/{classes/reptrans.rb → reptrans.rb} +9 -5
  36. data/lib/full_lengther_next/{classes/sequence.rb → sequence.rb} +26 -1
  37. data/lib/full_lengther_next/{classes/test_code.rb → test_code.rb} +1 -1
  38. data/lib/full_lengther_next/{classes/types.rb → types.rb} +3 -2
  39. data/lib/full_lengther_next/{classes/une_los_hit.rb → une_los_hit.rb} +0 -0
  40. data/lib/full_lengther_next/version.rb +3 -0
  41. data/lib/full_lengther_next/{classes/warnings.rb → warnings.rb} +0 -0
  42. data/report_templates/general_summary.erb +140 -0
  43. data/report_templates/mapping_summary.erb +98 -0
  44. data/report_templates/reptrans_summary.erb +32 -0
  45. metadata +112 -134
  46. data/.gemtest +0 -0
  47. data/History.txt +0 -32
  48. data/Manifest.txt +0 -44
  49. data/PostInstall.txt +0 -6
  50. data/bin/plot_fln.rb +0 -270
  51. data/bin/plot_taxonomy.rb +0 -70
  52. data/lib/expresscanvas.zip +0 -0
  53. data/lib/full_lengther_next/classes/artifacts.rb +0 -66
  54. data/lib/full_lengther_next/classes/fln_stats.rb +0 -641
  55. data/script/console +0 -10
  56. data/script/destroy +0 -14
  57. data/script/generate +0 -14
  58. data/test/test_full_lengther_next.rb +0 -11
  59. data/test/test_helper.rb +0 -3
data/.gemtest DELETED
File without changes
@@ -1,32 +0,0 @@
1
- === 0.1.0 2013-09-12
2
-
3
- Major rewrite of script
4
-
5
- === 0.0.7 2012-07-25
6
-
7
- Chimera detection
8
-
9
- === 0.0.6 2012-04-16
10
-
11
- Fixed some cosmetic issues and parameters names
12
-
13
- === 0.0.5 2012-03-09
14
-
15
- Fix NCRNA annotation
16
-
17
- === 0.0.4 2012-03-07
18
-
19
- Fixed stats for 0 seqs
20
-
21
- === 0.0.3 2012-03-01
22
-
23
- Added ncrna
24
-
25
- === 0.0.2 2012-02-07
26
-
27
- Added FULL_LENGTH_NEXT_INIT environment variable for clustered installations
28
-
29
- === 0.0.1 2012-01-30
30
-
31
- * 1 major enhancement:
32
- * Initial release
@@ -1,44 +0,0 @@
1
- Rakefile
2
- script
3
- script/generate
4
- script/destroy
5
- script/console
6
- test
7
- test/test_full_lengther_next.rb
8
- test/test_helper.rb
9
- bin/plot_taxonomy.rb
10
- bin/plot_fln.rb
11
- bin/download_fln_dbs.rb
12
- bin/full_lengther_next
13
- bin/make_user_db.rb
14
- bin/make_test_dataset.rb
15
- PostInstall.txt
16
- README.rdoc
17
- History.txt
18
- Manifest.txt
19
- lib/full_lengther_next
20
- lib/full_lengther_next/classes
21
- lib/full_lengther_next/classes/blast_functions.rb
22
- lib/full_lengther_next/classes/my_worker_manager_fln.rb
23
- lib/full_lengther_next/classes/types.rb
24
- lib/full_lengther_next/classes/chimeric_seqs.rb
25
- lib/full_lengther_next/classes/artifacts.rb
26
- lib/full_lengther_next/classes/cdhit.rb
27
- lib/full_lengther_next/classes/fl_analysis.rb
28
- lib/full_lengther_next/classes/fl_string_utils.rb
29
- lib/full_lengther_next/classes/my_worker.rb
30
- lib/full_lengther_next/classes/sequence.rb
31
- lib/full_lengther_next/classes/my_worker_EST.rb
32
- lib/full_lengther_next/classes/test_code.rb
33
- lib/full_lengther_next/classes/orf.rb
34
- lib/full_lengther_next/classes/une_los_hit.rb
35
- lib/full_lengther_next/classes/warnings.rb
36
- lib/full_lengther_next/classes/fln_stats.rb
37
- lib/full_lengther_next/classes/my_worker_manager_EST.rb
38
- lib/full_lengther_next/classes/nc_rna.rb
39
- lib/full_lengther_next/classes/reptrans.rb
40
- lib/full_lengther_next/classes/common_functions.rb
41
- lib/full_lengther_next/classes/exonerate_result.rb
42
- lib/full_lengther_next/classes/handle_db.rb
43
- lib/full_lengther_next.rb
44
- lib/expresscanvas.zip
@@ -1,6 +0,0 @@
1
- For more information on full_lengther_next, see * http://www.scbi.uma.es/downloads
2
-
3
- Now it is time to install databases using the following command:
4
-
5
- $> algo
6
-
@@ -1,270 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'optparse'
3
-
4
- #############################################
5
- ### FUNCTIONS
6
- #############################################
7
- def create_fln_hash(path)
8
- fln_hash = {}
9
- file = File.open(path, 'r').each do |line|
10
- fields = line.chomp.split
11
- fln_hash[fields[1]] = fields[0].to_i
12
- end
13
- fln_hash['<=200seqs'] = fln_hash['good_seqs'] - fln_hash['sequences_>200']
14
- fln_hash['>200seqs'] = fln_hash['sequences_>200'] - fln_hash['sequences_>500']
15
- fln_hash['<=200unk'] = fln_hash['unknown'] - fln_hash['unknown_>200']
16
- fln_hash['>200unk'] = fln_hash['unknown_>200'] - fln_hash['unknown_>500']
17
- fln_hash['<=200cod'] = fln_hash['coding'] - fln_hash['coding_>200']
18
- fln_hash['>200cod'] = fln_hash['coding_>200'] - fln_hash['coding_>500']
19
- fln_hash['no_match_db'] = fln_hash['coding'] + fln_hash['unknown']
20
-
21
- return fln_hash
22
- end
23
-
24
- def graph_table(fln_hash, output, graph_type, header_titles, categories_names, keywords, stacked_cols, titles = nil)
25
- table = []
26
- cmd = basic_plot_command(graph_type)
27
- cmd << "set output '#{output}.png'\n"
28
-
29
- if fln_hash.class.to_s == 'Array'
30
- table << header(fln_hash.length, header_titles)
31
- table.concat(categories(categories_names))
32
- cmd << 'plot '
33
- count = 0
34
- fln_hash.each_with_index do |hash,i|
35
- table = fill_table(hash, table, keywords, stacked_cols, graph_type)
36
- if i == 0
37
- first = TRUE
38
- else
39
- first = FALSE
40
- end
41
- cmd << histogram(stacked_cols, output,titles[i], count, graph_type, first)
42
- if i < fln_hash.length-1
43
- cmd << "\\\n"
44
- end
45
- if !graph_type.include?('clustered')
46
- count += stacked_cols
47
- else
48
- count += 1
49
- end
50
- end
51
- else
52
- table << header(1, header_titles)
53
- table.concat(categories(categories_names))
54
- table = fill_table(fln_hash, table, keywords, stacked_cols, graph_type)
55
- cmd << 'plot '+ histogram(stacked_cols, output, '', 0, graph_type, TRUE)
56
- end
57
- if table.length ==2 #Dummie row for rowstacked graph with a only category
58
- table << table[1].dup
59
- table[2].each_with_index do |cell, i|
60
- if i== 0
61
- table[2][i] = '&'
62
- else
63
- table[2][i] = 0
64
- end
65
- end
66
- end
67
- cmd.chop!
68
- write_table(table, output)
69
- write_cmd(cmd)
70
- system('gnuplot cmd.dem')
71
- end
72
-
73
- def histogram(columns, file, name, add, graph_type, first)
74
- cmd = ""
75
- if first
76
- cmd << "newhistogram \"#{name}\", '#{file}' using 2:xtic(1) t col,"
77
- else
78
- cmd << "newhistogram \"#{name}\", '' using #{2 + add}:xtic(1) t col,"
79
- end
80
- if !graph_type.include?('clustered')
81
- (columns-1).times do |col|
82
- cmd << " '' u #{3+col+add} t col,"
83
- end
84
- end
85
- return cmd
86
- end
87
-
88
- def write_cmd(cmd)
89
- if File.exists?('cmd.dem')
90
- File.delete('cmd.dem')
91
- end
92
- file = File.open('cmd.dem', 'a')
93
- file.puts cmd
94
- file.close
95
- end
96
-
97
- def header(iterations, header_titles)
98
- header = ['Clasification']
99
- iterations.times do
100
- header_titles.each do |title|
101
- header << title
102
- end
103
- end
104
- return header
105
- end
106
-
107
- def categories(cat)
108
- array_cat = cat.split(' ').map{|name| [name]}
109
- return array_cat
110
- end
111
-
112
- def fill_table(fln_hash, table, keywords, stacked_cols,graph_type)
113
- series = 0
114
- keywords.each_with_index do |key, i|
115
- if graph_type.include?('clustered')
116
- row = i +1 -stacked_cols*series
117
- if (i+1) % stacked_cols == 0
118
- series +=1
119
- end
120
- else
121
- row = i/stacked_cols + 1
122
- end
123
- value = fln_hash[key]
124
- if value.nil?
125
- value = 0
126
- end
127
- if table.length == 2
128
- table[1] << value
129
- else
130
- table[row] << value
131
- end
132
- end
133
- return table
134
- end
135
-
136
- def write_table(table, file_name)
137
- file_table = File.open(file_name, 'w')
138
- table.each do |line|
139
- file_table.puts line.join(' ')
140
- end
141
- file_table.close
142
- end
143
-
144
- def basic_plot_command(graph_type)
145
- cmd = ''
146
- if graph_type.include?('clustered')
147
- cmd << "unset key\n"
148
- else
149
- cmd << "set key under nobox\n"
150
- end
151
- cmd << "set style data histogram\n"
152
- cmd << "set style histogram #{graph_type} title offset 2,0.25\n"
153
- cmd << "set style fill solid noborder\n"
154
- cmd << "set boxwidth 0.95\n"
155
- cmd << "unset xtics\n"
156
- cmd << "set xtics nomirror rotate by -45 scale 0\n"
157
- cmd << "set xlabel \" \" offset 0,-2\n"
158
- cmd << "set ylabel \"Num sequences\"\n"
159
- cmd << "set ytics\n"
160
- cmd << "set grid y\n"
161
- cmd << "set auto y\n"
162
- cmd << "set terminal png nocrop enhanced font arial 15 size 1000,600\n"
163
- return cmd
164
- end
165
-
166
-
167
-
168
- def parse_file(file)
169
- titles = []
170
- paths =[]
171
- File.open(file,'r').each do |line|
172
- fields = line.chomp.split("\t")
173
- if !fields[0].nil?
174
- titles << fields[0]
175
- end
176
- if !fields[1].nil?
177
- paths << fields[1]
178
- end
179
- end
180
-
181
- return titles, paths
182
- end
183
-
184
-
185
- ##########################################################################################
186
- ## OPTIONS
187
- ##########################################################################################
188
-
189
- options = {}
190
-
191
- optparse = OptionParser.new do |opts|
192
- options[:file]='samples'
193
- opts.on( '-f', '--file FILE', 'Path to FLN execution') do |file|
194
- options[:file]=file
195
- end
196
-
197
- options[:path] = File.join('fln_results','summary_stats.txt')
198
- opts.on( '-p', '--path PATH', 'Path to FLN different FLN results' ) do |path|
199
- options[:path] = File.join(path,'fln_results','summary_stats.txt')
200
- end
201
-
202
- # Set a banner, displayed at the top of the help screen.
203
- opts.banner = "Usage: plot_fln.rb [-p PATH || -f FILE] \n\n"
204
-
205
- # This displays the help screen
206
- opts.on( '-h', '--help', 'Display this screen' ) do
207
- puts opts
208
- exit
209
- end
210
-
211
- end # End opts
212
-
213
- # parse options and remove from ARGV
214
- optparse.parse!
215
-
216
- ##########################################################################################
217
- ## MAIN
218
- ##########################################################################################
219
-
220
- if File.exists?(options[:path])
221
- fln_hash = create_fln_hash(options[:path])
222
- end
223
-
224
- if File.exists?(options[:file])
225
- titles, paths = parse_file(options[:file])
226
- fln_hash = []
227
- paths.each do |path|
228
- fln_hash << create_fln_hash(File.join(path,'fln_results','summary_stats.txt'))
229
- end
230
- end
231
-
232
- graph_table(
233
- fln_hash,
234
- 'status_report_table',
235
- 'rowstacked',
236
- %w{Sure Putative},
237
- 'Complete N-terminal C-terminal Internal NcRNA Coding Unknown',
238
- %w{complete_sure complete_putative n_terminal_sure n_terminal_putative c_terminal_sure c_terminal_putative internal internal_putative ncrna ncrna_putative coding_sure coding_putative unknown unknown_putative},
239
- 2,
240
- titles)
241
-
242
- graph_table(
243
- fln_hash,
244
- 'assembly_table',
245
- 'rowstacked',
246
- %w{<=200nt >200nt >500nt},
247
- 'Unigenes Coding Unknown',
248
- %w{<=200seqs >200seqs sequences_>500 <=200cod >200cod coding_>500 <=200unk >200unk unknown_>500},
249
- 3,
250
- titles)
251
-
252
- graph_table(
253
- fln_hash,
254
- 'database_usage',
255
- 'clustered',
256
- %w{seqs},
257
- 'UserDB SwissProt TrEMBL ncRNA None Diff-orthologues Complete Diff-complete',
258
- %w{userdb swissprot trembl ncrna no_match_db different_orthologues complete different_completes},
259
- 8,
260
- titles)
261
-
262
- graph_table(
263
- fln_hash,
264
- 'artifacts',
265
- 'clustered',
266
- %w{seqs},
267
- 'Misassembled Chimeras Other',
268
- %w{misassembled chimeras other_artifacts},
269
- 3,
270
- titles)
@@ -1,70 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'scbi_plot'
4
- #############################################
5
- ### FUNCTIONS
6
- #############################################
7
- def take_taxonomy(file)
8
- taxonomy = {}
9
- File.open(file).each_with_index do |line, i|
10
- line.chomp!
11
- field = line.split("\t").last
12
- organism = field.split(";",2).last
13
- organism.gsub!(/\(\D+\)/,'')
14
- if organism.split(' ').length == 1
15
- next
16
- end
17
- organism.gsub!('.','')
18
- organism.gsub!(/^ /,'')
19
- organism.gsub!(' ','')
20
- organism ='"'+organism+'"'
21
- if taxonomy[organism].nil?
22
- taxonomy[organism] = 1
23
- else
24
- taxonomy[organism] += 1
25
- end
26
- end
27
- return taxonomy
28
- end
29
-
30
- def plot(taxonomy)
31
- p=ScbiPlot::Histogram.new('fln_taxonomy_plot.png','Group organism representation')
32
- p.add_x(taxonomy.keys)
33
- p.add_y(taxonomy.values)
34
- p.do_graph
35
- end
36
-
37
- #############################################
38
-
39
- require 'optparse'
40
- options = {}
41
-
42
- optparse = OptionParser.new do |opts|
43
- options[:path] = File.join('fln_results','pt_seqs')
44
- opts.on( '-p', '--path PATH', 'Path to FLN execution' ) do |path|
45
- options[:path] = File.join(path,'fln_results','pt_seqs')
46
- end
47
-
48
- # Set a banner, displayed at the top of the help screen.
49
- opts.banner = "Usage: plot_taxonomy.rb -p PATH \n\n"
50
-
51
- # This displays the help screen
52
- opts.on( '-h', '--help', 'Display this screen' ) do
53
- puts opts
54
- exit
55
- end
56
-
57
- end # End opts
58
-
59
- # parse options and remove from ARGV
60
- optparse.parse!
61
-
62
- taxonomy = nil
63
- if File.exists?(options[:path])
64
- taxonomy = take_taxonomy(options[:path])
65
- else
66
- puts 'Path isn\'t valid'
67
- Process.exit
68
- end
69
-
70
- plot(taxonomy)
Binary file
@@ -1,66 +0,0 @@
1
- require 'blast_functions'
2
- require 'types'
3
- require 'chimeric_seqs'
4
- include ChimericSeqs
5
-
6
- #####################################################################
7
- ## MAIN FUNCTION
8
- #####################################################################
9
- def artifact?(seq, query, db_name, db_path, options, new_seqs)
10
- artifact = FALSE
11
- # MISASSEMBLED DETECTION
12
- if misassembled_detection(query) #If seq is misassembled stop chimera analisys
13
- seq.hit = query.hits.first
14
- artifact = TRUE
15
- seq.type = MISASSEMBLED
16
- seq.warnings('ERROR#1')
17
- end
18
-
19
- # OVERLAPPING HSPS ON SUBJECT DETECTION
20
- =begin
21
- if !artifact
22
- hit_reference = query.hits.first.dup
23
- query, overlapping = overlapping_hsps_on_subject(query)
24
- if overlapping
25
- if query.hits.first.nil?
26
- seq.hit = hit_reference
27
- else
28
- seq.hit = query.hits.first
29
- end
30
- artifact = TRUE
31
- seq.type = OTHER
32
- seq.warnings('ERROR#2')
33
- end
34
- end
35
- =end
36
-
37
- # MULTIPLE HSP DETECTION
38
- if !artifact && multiple_hsps(query, 3)
39
- seq.hit = query.hits.first
40
- seq.warnings('ERROR#3')
41
- end
42
-
43
- # CHIMERA DETECTION
44
- if !artifact && !options[:chimera].include?('d')
45
- chimera = search_chimeras(seq, query, options, db_name, db_path)
46
- if !chimera.nil?
47
- new_seqs.concat(chimera)
48
- seq.db_name = db_name
49
- seq.type = CHIMERA
50
- artifact = TRUE
51
- end
52
- end
53
-
54
- if artifact
55
- if $verbose > 1
56
- puts seq.prot_annot_calification
57
- end
58
- seq.db_name = db_name
59
- seq.save_fasta = FALSE
60
- seq.ignore = TRUE
61
- end
62
- return artifact
63
- end
64
-
65
-
66
-