full_lengther_next 0.6.2 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +5 -0
  5. data/CODE_OF_CONDUCT.md +49 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +21 -0
  8. data/{README.rdoc → README.md} +0 -0
  9. data/Rakefile +6 -37
  10. data/bin/console +14 -0
  11. data/bin/download_fln_dbs.rb +2 -7
  12. data/bin/full_lengther_next +85 -6
  13. data/bin/make_user_db.rb +13 -5
  14. data/bin/setup +8 -0
  15. data/full_lengther_next.gemspec +42 -0
  16. data/lib/full_lengther_next.rb +2 -10
  17. data/lib/full_lengther_next/artifacts.rb +74 -0
  18. data/lib/full_lengther_next/{classes/blast_functions.rb → blast_functions.rb} +0 -0
  19. data/lib/full_lengther_next/{classes/cdhit.rb → cdhit.rb} +0 -0
  20. data/lib/full_lengther_next/{classes/chimeric_seqs.rb → chimeric_seqs.rb} +0 -0
  21. data/lib/full_lengther_next/{classes/common_functions.rb → common_functions.rb} +0 -0
  22. data/lib/full_lengther_next/{classes/exonerate_result.rb → exonerate_result.rb} +0 -0
  23. data/lib/full_lengther_next/{classes/fl_analysis.rb → fl_analysis.rb} +0 -0
  24. data/lib/full_lengther_next/{classes/fl_string_utils.rb → fl_string_utils.rb} +0 -0
  25. data/lib/full_lengther_next/fln_stats.rb +613 -0
  26. data/lib/full_lengther_next/go_methods.rb +42 -0
  27. data/lib/full_lengther_next/{classes/handle_db.rb → handle_db.rb} +0 -0
  28. data/lib/full_lengther_next/mapping.rb +296 -0
  29. data/lib/full_lengther_next/{classes/my_worker.rb → my_worker.rb} +71 -9
  30. data/lib/full_lengther_next/{classes/my_worker_EST.rb → my_worker_EST.rb} +0 -0
  31. data/lib/full_lengther_next/{classes/my_worker_manager_EST.rb → my_worker_manager_EST.rb} +0 -0
  32. data/lib/full_lengther_next/{classes/my_worker_manager_fln.rb → my_worker_manager_fln.rb} +181 -16
  33. data/lib/full_lengther_next/{classes/nc_rna.rb → nc_rna.rb} +0 -0
  34. data/lib/full_lengther_next/{classes/orf.rb → orf.rb} +0 -0
  35. data/lib/full_lengther_next/{classes/reptrans.rb → reptrans.rb} +9 -5
  36. data/lib/full_lengther_next/{classes/sequence.rb → sequence.rb} +26 -1
  37. data/lib/full_lengther_next/{classes/test_code.rb → test_code.rb} +1 -1
  38. data/lib/full_lengther_next/{classes/types.rb → types.rb} +3 -2
  39. data/lib/full_lengther_next/{classes/une_los_hit.rb → une_los_hit.rb} +0 -0
  40. data/lib/full_lengther_next/version.rb +3 -0
  41. data/lib/full_lengther_next/{classes/warnings.rb → warnings.rb} +0 -0
  42. data/report_templates/general_summary.erb +140 -0
  43. data/report_templates/mapping_summary.erb +98 -0
  44. data/report_templates/reptrans_summary.erb +32 -0
  45. metadata +112 -134
  46. data/.gemtest +0 -0
  47. data/History.txt +0 -32
  48. data/Manifest.txt +0 -44
  49. data/PostInstall.txt +0 -6
  50. data/bin/plot_fln.rb +0 -270
  51. data/bin/plot_taxonomy.rb +0 -70
  52. data/lib/expresscanvas.zip +0 -0
  53. data/lib/full_lengther_next/classes/artifacts.rb +0 -66
  54. data/lib/full_lengther_next/classes/fln_stats.rb +0 -641
  55. data/script/console +0 -10
  56. data/script/destroy +0 -14
  57. data/script/generate +0 -14
  58. data/test/test_full_lengther_next.rb +0 -11
  59. data/test/test_helper.rb +0 -3
data/.gemtest DELETED
File without changes
@@ -1,32 +0,0 @@
1
- === 0.1.0 2013-09-12
2
-
3
- Major rewrite of script
4
-
5
- === 0.0.7 2012-07-25
6
-
7
- Chimera detection
8
-
9
- === 0.0.6 2012-04-16
10
-
11
- Fixed some cosmetic issues and parameters names
12
-
13
- === 0.0.5 2012-03-09
14
-
15
- Fix NCRNA annotation
16
-
17
- === 0.0.4 2012-03-07
18
-
19
- Fixed stats for 0 seqs
20
-
21
- === 0.0.3 2012-03-01
22
-
23
- Added ncrna
24
-
25
- === 0.0.2 2012-02-07
26
-
27
- Added FULL_LENGTH_NEXT_INIT environment variable for clustered installations
28
-
29
- === 0.0.1 2012-01-30
30
-
31
- * 1 major enhancement:
32
- * Initial release
@@ -1,44 +0,0 @@
1
- Rakefile
2
- script
3
- script/generate
4
- script/destroy
5
- script/console
6
- test
7
- test/test_full_lengther_next.rb
8
- test/test_helper.rb
9
- bin/plot_taxonomy.rb
10
- bin/plot_fln.rb
11
- bin/download_fln_dbs.rb
12
- bin/full_lengther_next
13
- bin/make_user_db.rb
14
- bin/make_test_dataset.rb
15
- PostInstall.txt
16
- README.rdoc
17
- History.txt
18
- Manifest.txt
19
- lib/full_lengther_next
20
- lib/full_lengther_next/classes
21
- lib/full_lengther_next/classes/blast_functions.rb
22
- lib/full_lengther_next/classes/my_worker_manager_fln.rb
23
- lib/full_lengther_next/classes/types.rb
24
- lib/full_lengther_next/classes/chimeric_seqs.rb
25
- lib/full_lengther_next/classes/artifacts.rb
26
- lib/full_lengther_next/classes/cdhit.rb
27
- lib/full_lengther_next/classes/fl_analysis.rb
28
- lib/full_lengther_next/classes/fl_string_utils.rb
29
- lib/full_lengther_next/classes/my_worker.rb
30
- lib/full_lengther_next/classes/sequence.rb
31
- lib/full_lengther_next/classes/my_worker_EST.rb
32
- lib/full_lengther_next/classes/test_code.rb
33
- lib/full_lengther_next/classes/orf.rb
34
- lib/full_lengther_next/classes/une_los_hit.rb
35
- lib/full_lengther_next/classes/warnings.rb
36
- lib/full_lengther_next/classes/fln_stats.rb
37
- lib/full_lengther_next/classes/my_worker_manager_EST.rb
38
- lib/full_lengther_next/classes/nc_rna.rb
39
- lib/full_lengther_next/classes/reptrans.rb
40
- lib/full_lengther_next/classes/common_functions.rb
41
- lib/full_lengther_next/classes/exonerate_result.rb
42
- lib/full_lengther_next/classes/handle_db.rb
43
- lib/full_lengther_next.rb
44
- lib/expresscanvas.zip
@@ -1,6 +0,0 @@
1
- For more information on full_lengther_next, see * http://www.scbi.uma.es/downloads
2
-
3
- Now it is time to install databases using the following command:
4
-
5
- $> algo
6
-
@@ -1,270 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'optparse'
3
-
4
- #############################################
5
- ### FUNCTIONS
6
- #############################################
7
- def create_fln_hash(path)
8
- fln_hash = {}
9
- file = File.open(path, 'r').each do |line|
10
- fields = line.chomp.split
11
- fln_hash[fields[1]] = fields[0].to_i
12
- end
13
- fln_hash['<=200seqs'] = fln_hash['good_seqs'] - fln_hash['sequences_>200']
14
- fln_hash['>200seqs'] = fln_hash['sequences_>200'] - fln_hash['sequences_>500']
15
- fln_hash['<=200unk'] = fln_hash['unknown'] - fln_hash['unknown_>200']
16
- fln_hash['>200unk'] = fln_hash['unknown_>200'] - fln_hash['unknown_>500']
17
- fln_hash['<=200cod'] = fln_hash['coding'] - fln_hash['coding_>200']
18
- fln_hash['>200cod'] = fln_hash['coding_>200'] - fln_hash['coding_>500']
19
- fln_hash['no_match_db'] = fln_hash['coding'] + fln_hash['unknown']
20
-
21
- return fln_hash
22
- end
23
-
24
- def graph_table(fln_hash, output, graph_type, header_titles, categories_names, keywords, stacked_cols, titles = nil)
25
- table = []
26
- cmd = basic_plot_command(graph_type)
27
- cmd << "set output '#{output}.png'\n"
28
-
29
- if fln_hash.class.to_s == 'Array'
30
- table << header(fln_hash.length, header_titles)
31
- table.concat(categories(categories_names))
32
- cmd << 'plot '
33
- count = 0
34
- fln_hash.each_with_index do |hash,i|
35
- table = fill_table(hash, table, keywords, stacked_cols, graph_type)
36
- if i == 0
37
- first = TRUE
38
- else
39
- first = FALSE
40
- end
41
- cmd << histogram(stacked_cols, output,titles[i], count, graph_type, first)
42
- if i < fln_hash.length-1
43
- cmd << "\\\n"
44
- end
45
- if !graph_type.include?('clustered')
46
- count += stacked_cols
47
- else
48
- count += 1
49
- end
50
- end
51
- else
52
- table << header(1, header_titles)
53
- table.concat(categories(categories_names))
54
- table = fill_table(fln_hash, table, keywords, stacked_cols, graph_type)
55
- cmd << 'plot '+ histogram(stacked_cols, output, '', 0, graph_type, TRUE)
56
- end
57
- if table.length ==2 #Dummie row for rowstacked graph with a only category
58
- table << table[1].dup
59
- table[2].each_with_index do |cell, i|
60
- if i== 0
61
- table[2][i] = '&'
62
- else
63
- table[2][i] = 0
64
- end
65
- end
66
- end
67
- cmd.chop!
68
- write_table(table, output)
69
- write_cmd(cmd)
70
- system('gnuplot cmd.dem')
71
- end
72
-
73
- def histogram(columns, file, name, add, graph_type, first)
74
- cmd = ""
75
- if first
76
- cmd << "newhistogram \"#{name}\", '#{file}' using 2:xtic(1) t col,"
77
- else
78
- cmd << "newhistogram \"#{name}\", '' using #{2 + add}:xtic(1) t col,"
79
- end
80
- if !graph_type.include?('clustered')
81
- (columns-1).times do |col|
82
- cmd << " '' u #{3+col+add} t col,"
83
- end
84
- end
85
- return cmd
86
- end
87
-
88
- def write_cmd(cmd)
89
- if File.exists?('cmd.dem')
90
- File.delete('cmd.dem')
91
- end
92
- file = File.open('cmd.dem', 'a')
93
- file.puts cmd
94
- file.close
95
- end
96
-
97
- def header(iterations, header_titles)
98
- header = ['Clasification']
99
- iterations.times do
100
- header_titles.each do |title|
101
- header << title
102
- end
103
- end
104
- return header
105
- end
106
-
107
- def categories(cat)
108
- array_cat = cat.split(' ').map{|name| [name]}
109
- return array_cat
110
- end
111
-
112
- def fill_table(fln_hash, table, keywords, stacked_cols,graph_type)
113
- series = 0
114
- keywords.each_with_index do |key, i|
115
- if graph_type.include?('clustered')
116
- row = i +1 -stacked_cols*series
117
- if (i+1) % stacked_cols == 0
118
- series +=1
119
- end
120
- else
121
- row = i/stacked_cols + 1
122
- end
123
- value = fln_hash[key]
124
- if value.nil?
125
- value = 0
126
- end
127
- if table.length == 2
128
- table[1] << value
129
- else
130
- table[row] << value
131
- end
132
- end
133
- return table
134
- end
135
-
136
- def write_table(table, file_name)
137
- file_table = File.open(file_name, 'w')
138
- table.each do |line|
139
- file_table.puts line.join(' ')
140
- end
141
- file_table.close
142
- end
143
-
144
- def basic_plot_command(graph_type)
145
- cmd = ''
146
- if graph_type.include?('clustered')
147
- cmd << "unset key\n"
148
- else
149
- cmd << "set key under nobox\n"
150
- end
151
- cmd << "set style data histogram\n"
152
- cmd << "set style histogram #{graph_type} title offset 2,0.25\n"
153
- cmd << "set style fill solid noborder\n"
154
- cmd << "set boxwidth 0.95\n"
155
- cmd << "unset xtics\n"
156
- cmd << "set xtics nomirror rotate by -45 scale 0\n"
157
- cmd << "set xlabel \" \" offset 0,-2\n"
158
- cmd << "set ylabel \"Num sequences\"\n"
159
- cmd << "set ytics\n"
160
- cmd << "set grid y\n"
161
- cmd << "set auto y\n"
162
- cmd << "set terminal png nocrop enhanced font arial 15 size 1000,600\n"
163
- return cmd
164
- end
165
-
166
-
167
-
168
- def parse_file(file)
169
- titles = []
170
- paths =[]
171
- File.open(file,'r').each do |line|
172
- fields = line.chomp.split("\t")
173
- if !fields[0].nil?
174
- titles << fields[0]
175
- end
176
- if !fields[1].nil?
177
- paths << fields[1]
178
- end
179
- end
180
-
181
- return titles, paths
182
- end
183
-
184
-
185
- ##########################################################################################
186
- ## OPTIONS
187
- ##########################################################################################
188
-
189
- options = {}
190
-
191
- optparse = OptionParser.new do |opts|
192
- options[:file]='samples'
193
- opts.on( '-f', '--file FILE', 'Path to FLN execution') do |file|
194
- options[:file]=file
195
- end
196
-
197
- options[:path] = File.join('fln_results','summary_stats.txt')
198
- opts.on( '-p', '--path PATH', 'Path to FLN different FLN results' ) do |path|
199
- options[:path] = File.join(path,'fln_results','summary_stats.txt')
200
- end
201
-
202
- # Set a banner, displayed at the top of the help screen.
203
- opts.banner = "Usage: plot_fln.rb [-p PATH || -f FILE] \n\n"
204
-
205
- # This displays the help screen
206
- opts.on( '-h', '--help', 'Display this screen' ) do
207
- puts opts
208
- exit
209
- end
210
-
211
- end # End opts
212
-
213
- # parse options and remove from ARGV
214
- optparse.parse!
215
-
216
- ##########################################################################################
217
- ## MAIN
218
- ##########################################################################################
219
-
220
- if File.exists?(options[:path])
221
- fln_hash = create_fln_hash(options[:path])
222
- end
223
-
224
- if File.exists?(options[:file])
225
- titles, paths = parse_file(options[:file])
226
- fln_hash = []
227
- paths.each do |path|
228
- fln_hash << create_fln_hash(File.join(path,'fln_results','summary_stats.txt'))
229
- end
230
- end
231
-
232
- graph_table(
233
- fln_hash,
234
- 'status_report_table',
235
- 'rowstacked',
236
- %w{Sure Putative},
237
- 'Complete N-terminal C-terminal Internal NcRNA Coding Unknown',
238
- %w{complete_sure complete_putative n_terminal_sure n_terminal_putative c_terminal_sure c_terminal_putative internal internal_putative ncrna ncrna_putative coding_sure coding_putative unknown unknown_putative},
239
- 2,
240
- titles)
241
-
242
- graph_table(
243
- fln_hash,
244
- 'assembly_table',
245
- 'rowstacked',
246
- %w{<=200nt >200nt >500nt},
247
- 'Unigenes Coding Unknown',
248
- %w{<=200seqs >200seqs sequences_>500 <=200cod >200cod coding_>500 <=200unk >200unk unknown_>500},
249
- 3,
250
- titles)
251
-
252
- graph_table(
253
- fln_hash,
254
- 'database_usage',
255
- 'clustered',
256
- %w{seqs},
257
- 'UserDB SwissProt TrEMBL ncRNA None Diff-orthologues Complete Diff-complete',
258
- %w{userdb swissprot trembl ncrna no_match_db different_orthologues complete different_completes},
259
- 8,
260
- titles)
261
-
262
- graph_table(
263
- fln_hash,
264
- 'artifacts',
265
- 'clustered',
266
- %w{seqs},
267
- 'Misassembled Chimeras Other',
268
- %w{misassembled chimeras other_artifacts},
269
- 3,
270
- titles)
@@ -1,70 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'scbi_plot'
4
- #############################################
5
- ### FUNCTIONS
6
- #############################################
7
- def take_taxonomy(file)
8
- taxonomy = {}
9
- File.open(file).each_with_index do |line, i|
10
- line.chomp!
11
- field = line.split("\t").last
12
- organism = field.split(";",2).last
13
- organism.gsub!(/\(\D+\)/,'')
14
- if organism.split(' ').length == 1
15
- next
16
- end
17
- organism.gsub!('.','')
18
- organism.gsub!(/^ /,'')
19
- organism.gsub!(' ','')
20
- organism ='"'+organism+'"'
21
- if taxonomy[organism].nil?
22
- taxonomy[organism] = 1
23
- else
24
- taxonomy[organism] += 1
25
- end
26
- end
27
- return taxonomy
28
- end
29
-
30
- def plot(taxonomy)
31
- p=ScbiPlot::Histogram.new('fln_taxonomy_plot.png','Group organism representation')
32
- p.add_x(taxonomy.keys)
33
- p.add_y(taxonomy.values)
34
- p.do_graph
35
- end
36
-
37
- #############################################
38
-
39
- require 'optparse'
40
- options = {}
41
-
42
- optparse = OptionParser.new do |opts|
43
- options[:path] = File.join('fln_results','pt_seqs')
44
- opts.on( '-p', '--path PATH', 'Path to FLN execution' ) do |path|
45
- options[:path] = File.join(path,'fln_results','pt_seqs')
46
- end
47
-
48
- # Set a banner, displayed at the top of the help screen.
49
- opts.banner = "Usage: plot_taxonomy.rb -p PATH \n\n"
50
-
51
- # This displays the help screen
52
- opts.on( '-h', '--help', 'Display this screen' ) do
53
- puts opts
54
- exit
55
- end
56
-
57
- end # End opts
58
-
59
- # parse options and remove from ARGV
60
- optparse.parse!
61
-
62
- taxonomy = nil
63
- if File.exists?(options[:path])
64
- taxonomy = take_taxonomy(options[:path])
65
- else
66
- puts 'Path isn\'t valid'
67
- Process.exit
68
- end
69
-
70
- plot(taxonomy)
Binary file
@@ -1,66 +0,0 @@
1
- require 'blast_functions'
2
- require 'types'
3
- require 'chimeric_seqs'
4
- include ChimericSeqs
5
-
6
- #####################################################################
7
- ## MAIN FUNCTION
8
- #####################################################################
9
- def artifact?(seq, query, db_name, db_path, options, new_seqs)
10
- artifact = FALSE
11
- # MISASSEMBLED DETECTION
12
- if misassembled_detection(query) #If seq is misassembled stop chimera analisys
13
- seq.hit = query.hits.first
14
- artifact = TRUE
15
- seq.type = MISASSEMBLED
16
- seq.warnings('ERROR#1')
17
- end
18
-
19
- # OVERLAPPING HSPS ON SUBJECT DETECTION
20
- =begin
21
- if !artifact
22
- hit_reference = query.hits.first.dup
23
- query, overlapping = overlapping_hsps_on_subject(query)
24
- if overlapping
25
- if query.hits.first.nil?
26
- seq.hit = hit_reference
27
- else
28
- seq.hit = query.hits.first
29
- end
30
- artifact = TRUE
31
- seq.type = OTHER
32
- seq.warnings('ERROR#2')
33
- end
34
- end
35
- =end
36
-
37
- # MULTIPLE HSP DETECTION
38
- if !artifact && multiple_hsps(query, 3)
39
- seq.hit = query.hits.first
40
- seq.warnings('ERROR#3')
41
- end
42
-
43
- # CHIMERA DETECTION
44
- if !artifact && !options[:chimera].include?('d')
45
- chimera = search_chimeras(seq, query, options, db_name, db_path)
46
- if !chimera.nil?
47
- new_seqs.concat(chimera)
48
- seq.db_name = db_name
49
- seq.type = CHIMERA
50
- artifact = TRUE
51
- end
52
- end
53
-
54
- if artifact
55
- if $verbose > 1
56
- puts seq.prot_annot_calification
57
- end
58
- seq.db_name = db_name
59
- seq.save_fasta = FALSE
60
- seq.ignore = TRUE
61
- end
62
- return artifact
63
- end
64
-
65
-
66
-