full_lengther_next 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,7 @@
1
+ === 0.0.6 2012-04-16
2
+
3
+ Fixed some cosmetic issues and parameters names
4
+
1
5
  === 0.0.5 2012-03-09
2
6
 
3
7
  Fix NCRNA annotation
@@ -26,7 +26,7 @@ optparse = OptionParser.new do |opts|
26
26
  end
27
27
 
28
28
  options[:user_db] = nil
29
- opts.on( '-d', '--blast_db DB_NAME', 'User blast plus database' ) do |db|
29
+ opts.on( '-u', '--user_db UserDB', 'User blast+ database' ) do |db|
30
30
  options[:user_db] = db
31
31
  end
32
32
 
@@ -46,7 +46,7 @@ optparse = OptionParser.new do |opts|
46
46
  end
47
47
 
48
48
  options[:distance] = 15
49
- opts.on( '-a', '--aas_distance DISTANCE', "distance threshold in aminoacids used for some calculations, the less distance the more strict. Default=15\n\n" ) do |distance|
49
+ opts.on( '-m', '--max_distance maxDIST', "maximal distance between query and subject gene boundaries to be qualified as putative, the less distance the more strict. Default=15\n\n" ) do |distance|
50
50
  options[:distance] = distance.to_i
51
51
  end
52
52
 
@@ -172,7 +172,7 @@ require 'my_worker_manager'
172
172
  $LOG = Logger.new(STDOUT)
173
173
  $LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
174
174
 
175
- custom_worker_file = File.join(ROOT_PATH,'classes','my_worker.rb')
175
+ custom_worker_file = File.join(File.dirname(ROOT_PATH),'lib','full_lengther_next','classes','my_worker.rb')
176
176
 
177
177
  $LOG.info 'Starting server'
178
178
  # initialize work manager (open files, etc)
@@ -1,13 +1,13 @@
1
1
  $:.unshift(File.dirname(__FILE__)) unless
2
2
  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
3
 
4
- ROOT_PATH=File.join(File.dirname(__FILE__),'full_lengther_next')
4
+ root_path=File.join(File.dirname(__FILE__),'full_lengther_next')
5
5
 
6
- $: << File.expand_path(File.join(ROOT_PATH, 'classes'))
6
+ $: << File.expand_path(File.join(root_path, 'classes'))
7
7
 
8
8
 
9
9
  module FullLengtherNext
10
- VERSION = '0.0.5'
10
+ VERSION = '0.0.6'
11
11
 
12
12
  FULLLENGHTER_VERSION = VERSION
13
13
  end
@@ -247,7 +247,7 @@ module FlAnalysis
247
247
  if (seq.sec_desc.empty?)
248
248
  if (!q.hits[0].definition.nil?)
249
249
  warnings = "Coding sequence with some errors, #{warnings}"
250
- seq.sec_desc = "#{q.query_def}\t#{seq.seq_fasta.length}\t#{q.hits[0].acc}\t#{db_name}\tCoding Seq\t\t#{q.hits[0].e_val}\t#{q.hits[0].ident}\t\t#{q.hits[0].full_subject_length}\t#{warnings}\t\t\t\t\t\t#{q.hits[0].definition}\t"
250
+ seq.sec_desc = "#{q.query_def}\t#{seq.fasta_length}\t#{q.hits[0].acc}\t#{db_name}\tMisassembled\t\t#{q.hits[0].e_val}\t#{q.hits[0].ident}\t\t#{q.hits[0].full_subject_length}\t#{warnings}\t\t\t\t\t\t#{q.hits[0].definition}\t"
251
251
  seq.annotate(:tmp_annotation,[seq.sec_desc, '','',''],true)
252
252
  else
253
253
  seq.annotate(:apply_tcode,'')
@@ -264,7 +264,7 @@ module FlAnalysis
264
264
  if (seq.sec_desc.empty?)
265
265
  if (!q.hits[0].definition.nil?)
266
266
  warnings = "Coding sequence with some errors, #{warnings}"
267
- seq.sec_desc = "#{q.query_def}\t#{seq.seq_fasta.length}\t#{q.hits[0].acc}\t#{db_name}\tCoding Seq\t\t#{q.hits[0].e_val}\t#{q.hits[0].ident}\t\t#{q.hits[0].full_subject_length}\t#{warnings}\t\t\t\t\t\t#{q.hits[0].definition}\t"
267
+ seq.sec_desc = "#{q.query_def}\t#{seq.fasta_length}\t#{q.hits[0].acc}\t#{db_name}\tMisassembled\t\t#{q.hits[0].e_val}\t#{q.hits[0].ident}\t\t#{q.hits[0].full_subject_length}\t#{warnings}\t\t\t\t\t\t#{q.hits[0].definition}\t"
268
268
  end
269
269
  end
270
270
  end
@@ -529,7 +529,7 @@ module FlAnalysis
529
529
  tmp_prot = ">#{q.query_def}\n#{final_prot}"
530
530
  tmp_align = "#{q.query_def}\t#{final_hit.q_seq}\n#{final_hit.acc}#{spnum}\t#{final_hit.s_seq}\n\n"
531
531
  tmp_annot = "#{q.query_def}\t#{query_fasta.length}\t#{final_hit.acc}\t#{db_name}\t#{final_status}\t\t#{final_hit.e_val}\t#{final_hit.ident}\t#{final_prot.length}\t#{final_hit.full_subject_length}\t#{warnings}\t#{final_hit.q_frame}\t#{final_hit.q_beg.to_i + 1}\t#{final_hit.q_end.to_i + 1}\t#{final_hit.s_beg.to_i + 1}\t#{final_hit.s_end.to_i + 1}\t#{final_hit.definition}\t#{final_prot}"
532
- seq.sec_desc = "#{q.query_def}\t#{query_fasta.length}\t#{final_hit.acc}\t#{db_name}\tCoding Seq\t\t#{final_hit.e_val}\t#{final_hit.ident}\t\t#{final_hit.full_subject_length}\t#{warnings}\t\t\t\t\t\t#{final_hit.definition}\t"
532
+ seq.sec_desc = "#{q.query_def}\t#{query_fasta.length}\t#{final_hit.acc}\t#{db_name}\tMisassembled\t\t#{final_hit.e_val}\t#{final_hit.ident}\t\t#{final_hit.full_subject_length}\t#{warnings}\t\t\t\t\t\t#{final_hit.definition}\t"
533
533
  seq.annotate(:tmp_annotation,[tmp_annot, tmp_prot,tmp_align,[q, final_hit, final_prot, query_fasta, final_status]])
534
534
 
535
535
  # puts "\n\n\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.---#{q.query_def}\t#{final_status}\n#{tmp_prot}"
@@ -4,384 +4,485 @@ module FlnStats
4
4
  def summary_stats
5
5
  stats_file = File.open('fln_results/summary_stats.html', 'w')
6
6
 
7
- (html_head, html_1, html_2, html_3, html_4) = html_code
7
+ # recogemos los trozos de html fijos
8
+ (html_head, html_st, html_uni, html_db, html_as, html_end) = html_code
8
9
 
9
10
  total_seqs = 0
11
+ status_suma = 0
12
+ #recogemos los datos que necesitamos de los ficheros de resultados
13
+ (status_array, db_usage, seqs_number1, error_1_num, seq_uniq, complete_uniq, db_uni_500, db_uni_200, db_longest_one) = annotation_stats
14
+ (tcode_array, seqs_number2, unk_200, tc_uni_500, tc_uni_200, tc_longest_one) = testcode_stats
15
+ (ncrna_total, nc_uni_500, nc_uni_200, nc_longest_one)=ncrna_stats
10
16
 
11
- (status_array, seqs_number1, error_1_num, seq_uniq, complete_uniq, seq_length_stats, complete_seq_length_stats) = annotation_stats
12
- (tcode_array, seqs_number2, tcode_length_stats, coding_length_stats, unknown_length_stats) = testcode_stats
13
- ncrna_array=ncrna_stats
14
-
15
- total_seqs = seqs_number1 + seqs_number2 + ncrna_array[4].to_i
16
-
17
+ total_seqs = seqs_number1 + seqs_number2 + ncrna_total.to_i
18
+ uni_500 = (db_uni_500 + tc_uni_500 + nc_uni_500)
19
+ uni_200 = (db_uni_200 + tc_uni_200 + nc_uni_200)
20
+ longest_one = [db_longest_one, tc_longest_one, nc_longest_one].max
17
21
  stats_file.puts html_head
18
- stats_file.puts "\t\t\t\t"+'<font color="#FF0000">'+total_seqs.to_s+"</font> sequences in your input fasta\n\t\t\t</h2>\n\t\t</center>"
19
22
 
20
23
  if (total_seqs.to_i > 0)
21
- stats_file.puts html_1
22
- stats_file.puts ' <tr>
23
- <td align="center">YES</td>
24
- <td align="right">'+seqs_number1.to_s+'</td>
25
- <td align="right">'+'%.2f' % (100*seqs_number1.to_f/total_seqs.to_f).to_s+' %</td>
26
- <td align="right">'+seq_uniq.to_s+'</td>
27
- <td align="right">'+seq_length_stats[0].to_s+'</td>
28
- <td align="right">'+seq_length_stats[1].to_s+'</td>
29
- <td align="right">'+seq_length_stats[2].to_s+'</td>
30
- <td align="right">'+seq_length_stats[3].to_s+'</td>
31
- </tr>'
32
- stats_file.puts ' <tr>
33
- <td align="center">NO</td>
34
- <td align="right">'+seqs_number2.to_s+'</td>
35
- <td align="right">'+'%.2f' % (100*seqs_number2.to_f/total_seqs.to_f).to_s+' %</td>
36
- <td align="right">-</td>
37
- <td align="right">'+tcode_length_stats[0].to_s+'</td>
38
- <td align="right">'+tcode_length_stats[1].to_s+'</td>
39
- <td align="right">'+tcode_length_stats[2].to_s+'</td>
40
- <td align="right">'+tcode_length_stats[3].to_s+'</td>
41
- </tr>'
42
- stats_file.puts ' <tr>
43
- <td align="center">ncRNA</td>
44
- <td align="right">'+ncrna_array[4].to_s+'</td>
45
- <td align="right">'+'%.2f' % (100*ncrna_array[4].to_f/total_seqs.to_f).to_s+' %</td>
46
- <td align="right">-</td>
47
- <td align="right">'+ncrna_array[0].to_s+'</td>
48
- <td align="right">'+ncrna_array[1].to_s+'</td>
49
- <td align="right">'+ncrna_array[2].to_s+'</td>
50
- <td align="right">'+ncrna_array[3].to_s+'</td>
51
- </tr>
52
- </table>'
53
-
54
- stats_file.puts ' <p><font color="#FF0000">'+error_1_num.to_s+'</font> Sequences with sense and antisense hits error</p>'
55
- stats_file.puts ' <p><font color="#FF0000">'+complete_uniq.to_s+'</font> Complete sequences with different ortologue ID</p>'
56
- stats_file.puts html_2
24
+ # imprimimos la tabla Status Report --------------------------------------------------------------------------------------------
25
+ stats_file.puts html_st
57
26
  status_array.each do |status|
27
+ if (status[1] == 'Internal') || (status[1] == 'Misassembled')
58
28
  stats_file.puts ' <tr>
59
- <td align="right">'+status[4].to_s+'</td>
60
- <td align="right">'+status[0].to_s+'</td>
61
- <td align="right">'+'%.2f' % (100*status[0].to_f/total_seqs.to_f).to_s+' %</td>
62
- <td align="right">'+status[1].to_s+'</td>
63
- <td align="right">'+status[2].to_s+'</td>
64
- <td align="right">'+status[3].to_s+'</td>
65
- </tr>'
29
+ <td colspan="2" align="left">'+status[1].to_s+'</td>
30
+ <td align="right">'+status[0].to_s+'</td>
31
+ <td align="right">'+'%.2f' % (100*status[0].to_f/total_seqs.to_f).to_s+' %</td>
32
+ </tr>'
33
+ elsif (status[1] =~ /^Putative/)
34
+ stats_file.puts ' <tr>
35
+ <td align="left">Putative</td>
36
+ <td align="right">'+status[0].to_s+'</td>
37
+ <td align="right">'+'%.2f' % (100*status[0].to_f/total_seqs.to_f).to_s+' %</td>
38
+ </tr>'
39
+ else
40
+ stats_file.puts ' <tr>
41
+ <td rowspan="2" align="left">'+status[1].to_s+'</td>
42
+ <td align="left">Sure</td>
43
+ <td align="right">'+status[0].to_s+'</td>
44
+ <td align="right">'+'%.2f' % (100*status[0].to_f/total_seqs.to_f).to_s+' %</td>
45
+ </tr>'
46
+ end
47
+ status_suma += status[0]
66
48
  end
67
- stats_file.puts html_3
68
-
49
+ # añadimos los coding, P.coding
69
50
  tcode_array.each do |status|
51
+ if (status[1] == 'Coding')
70
52
  stats_file.puts ' <tr>
71
- <td align="right">'+status[5].to_s+'</td>
72
- <td align="right">'+status[4].to_s+'</td>
73
- <td align="right">'+'%.2f' % (100*status[4].to_f/total_seqs.to_f).to_s+' %</td>
74
- <td align="right">'+status[0].to_s+'</td>
75
- <td align="right">'+status[1].to_s+'</td>
76
- <td align="right">'+status[2].to_s+'</td>
77
- <td align="right">'+status[3].to_s+'</td>
78
- </tr>'
53
+ <td rowspan="2" align="left">'+status[1].to_s+'</td>
54
+ <td align="left">Sure</td>
55
+ <td align="right">'+status[0].to_s+'</td>
56
+ <td align="right">'+'%.2f' % (100*status[0].to_f/total_seqs.to_f).to_s+' %</td>
57
+ </tr>'
58
+ elsif (status[1] == 'Putative Coding')
59
+ stats_file.puts ' <tr>
60
+ <td align="left">Putative</td>
61
+ <td align="right">'+status[0].to_s+'</td>
62
+ <td align="right">'+'%.2f' % (100*status[0].to_f/total_seqs.to_f).to_s+' %</td>
63
+ </tr>'
64
+ end
65
+ status_suma += status[0]
79
66
  end
80
-
81
- # print Non coding RNA
67
+ # se ponen los ncRNA
82
68
  stats_file.puts ' <tr>
83
- <td align="right">Putative ncRNA</td>
84
- <td align="right">'+ncrna_array[4].to_s+'</td>
85
- <td align="right">'+'%.2f' % (100*ncrna_array[4].to_f/total_seqs.to_f).to_s+' %</td>
86
- <td align="right">'+ncrna_array[0].to_s+'</td>
87
- <td align="right">'+ncrna_array[1].to_s+'</td>
88
- <td align="right">'+ncrna_array[2].to_s+'</td>
89
- <td align="right">'+ncrna_array[3].to_s+'</td>
69
+ <td colspan="2" align="left">Putative ncRNA</td>
70
+ <td align="right">'+ncrna_total.to_s+'</td>
71
+ <td align="right">'+'%.2f' % (100*ncrna_total.to_f/total_seqs.to_f).to_s+' %</td>
72
+ </tr>'
73
+ status_suma += ncrna_total
74
+ # se ponen los unknown
75
+ tcode_array.each do |status|
76
+ if (status[1] =~ /Unknown/i)
77
+ stats_file.puts ' <tr>
78
+ <td colspan="2" align="left">'+status[1].to_s+'</td>
79
+ <td align="right">'+status[0].to_s+'</td>
80
+ <td align="right">'+'%.2f' % (100*status[0].to_f/total_seqs.to_f).to_s+' %</td>
81
+ </tr>'
82
+ end
83
+ end
84
+ #se añade el total
85
+ stats_file.puts ' <tr>
86
+ <td colspan="2" align="left">Total</td>
87
+ <td align="right">'+status_suma.to_s+'</td>
88
+ <td align="right">'+'%.2f' % (100*status_suma.to_f/total_seqs.to_f).to_s+' %</td>
90
89
  </tr>
91
- </table>
92
- </center>'
93
-
94
- end
95
- stats_file.puts html_4
96
-
97
- stats_file.close
98
- end
99
-
90
+ </table>'
91
+
92
+
93
+ # imprimimos la tabla Unigene Report --------------------------------------------------------------------------------------------
94
+ new_genes = tcode_array[0][0] + tcode_array[1][0]
95
+ total_uni = (seqs_number1 + new_genes + ncrna_total + tcode_array[2][0])
96
+ stats_file.puts html_uni
97
+ stats_file.puts ' <tr>
98
+ <td align="left">With orthologue in DBs</td>
99
+ <td align="right">'+seqs_number1.to_s+'</td>
100
+ <td align="right">'+'%.2f' % (100*seqs_number1.to_f/total_seqs.to_f).to_s+' %</td>
101
+ </tr>'
102
+ stats_file.puts ' <tr>
103
+ <td align="left">Putative New Genes</td>
104
+ <td align="right">'+new_genes.to_s+'</td>
105
+ <td align="right">'+'%.2f' % (100*new_genes.to_f/total_seqs.to_f).to_s+' %</td>
106
+ </tr>'
107
+ stats_file.puts ' <tr>
108
+ <td align="left">ncRNAs</td>
109
+ <td align="right">'+ncrna_total.to_s+'</td>
110
+ <td align="right">'+'%.2f' % (100*ncrna_total.to_f/total_seqs.to_f).to_s+' %</td>
111
+ </tr>'
112
+ stats_file.puts ' <tr>
113
+ <td align="left">Unknown</td>
114
+ <td align="right">'+tcode_array[2][0].to_s+'</td>
115
+ <td align="right">'+'%.2f' % (100*tcode_array[2][0].to_f/total_seqs.to_f).to_s+' %</td>
116
+ </tr>'
117
+ stats_file.puts ' <tr>
118
+ <td align="left">Total</td>
119
+ <td align="right">'+total_uni.to_s+'</td>
120
+ <td align="right">'+'%.2f' % (100*total_uni.to_f/total_seqs.to_f).to_s+' %</td>
121
+ </tr>
122
+ </table>'
100
123
 
101
- def html_code
102
- html_head = '<html>
103
- <head>
104
- <title>FLN Annotation Summary</title>
105
- </head>
106
-
107
- <body bgcolor="#FFFFFF">
108
- <center>
109
- <h1 ALIGN="center">
110
- Full-LengtherNEXT
111
- <br/>
112
- Annotation summary
113
- </h1>
114
- <h2 align="center">'
115
-
116
- html_1 = '
117
- <center>
118
- <table border=1>
119
- <tr>
120
- <th>Ortologue found</th>
121
- <th>Sequences found</th>
122
- <th>%</th>
123
- <th>Different IDs</th>
124
- <th>&gt;200 bp</th>
125
- <th>&lt;200 bp</th>
126
- <th>&gt;500 bp</th>
127
- <th>&lt;500 bp</th>
124
+ # imprimimos la tabla Database Usage --------------------------------------------------------------------------------------------
125
+ stats_file.puts html_db
126
+ db_names=["UserDB", "SwissProt", "TrEMBL"]
127
+ total_db = 0
128
+
129
+ for i in 0..db_usage.length-1 do i
130
+ total_db += db_usage[i]
131
+ stats_file.puts ' <tr>
132
+ <td align="left">'+db_names[i].to_s+'</td>
133
+ <td align="right">'+db_usage[i].to_s+'</td>
134
+ <td align="right">'+'%.2f' % (100*db_usage[i].to_f/total_seqs.to_f).to_s+' %</td>
128
135
  </tr>'
136
+ end
137
+ no_db = seqs_number2 + ncrna_total.to_i
138
+ stats_file.puts ' <tr>
139
+ <td align="left">None</td>
140
+ <td align="right">'+no_db.to_s+'</td>
141
+ <td align="right">'+'%.2f' % (100*no_db.to_f/total_seqs.to_f).to_s+' %</td>
142
+ </tr>'
143
+ total_db += no_db
144
+ stats_file.puts ' <tr>
145
+ <td align="left">Total</td>
146
+ <td align="right">'+total_db.to_s+'</td>
147
+ <td align="right">'+'%.2f' % (100*total_db.to_f/total_seqs.to_f).to_s+' %</td>
148
+ </tr>
149
+ </table>'
129
150
 
130
- html_2= ' <br/>
131
- <table border=1>
132
- <tr>
133
- <th>Status</th>
134
- <th>Total</th>
135
- <th>%</th>
136
- <th>UserDB</th>
137
- <th>SwissProt</th>
138
- <th>TrEMBL</th>
151
+ # imprimimos la tabla Report guiding assembly quality -------------------------------------------------------------
152
+ stats_file.puts html_as
153
+ stats_file.puts ' <tr>
154
+ <td align="left">Unigenes</td>
155
+ <td align="right">'+total_seqs.to_s+'</td>
156
+ <td align="right">'+'%.2f' % (100*total_seqs.to_f/total_seqs.to_f).to_s+' %</td>
139
157
  </tr>'
140
-
141
- html_3= ' </table>
142
- <br/>
143
- <table border=1>
144
- <tr>
145
- <th>Status</th>
146
- <th>Total</th>
147
- <th>%</th>
148
- <th>&gt;200 bp</th>
149
- <th>&lt;200 bp</th>
150
- <th>&gt;500 bp</th>
151
- <th>&lt;500 bp</th>
158
+ stats_file.puts ' <tr>
159
+ <td align="left">Unigenes >500pb</td>
160
+ <td align="right">'+uni_500.to_s+'</td>
161
+ <td align="right">'+'%.2f' % (100*uni_500.to_f/total_seqs.to_f).to_s+' %</td>
162
+ </tr>'
163
+ stats_file.puts ' <tr>
164
+ <td align="left">Unigenes >200pb</td>
165
+ <td align="right">'+uni_200.to_s+'</td>
166
+ <td align="right">'+'%.2f' % (100*uni_200.to_f/total_seqs.to_f).to_s+' %</td>
167
+ </tr>'
168
+ stats_file.puts ' <tr>
169
+ <td align="left">Longest unigene</td>
170
+ <td align="right">'+longest_one.to_s+'</td>
171
+ <td align="right">-</td>
172
+ </tr>'
173
+ stats_file.puts ' <tr>
174
+ <td align="left">With orthologue <sup>1</sup></td>
175
+ <td align="right">'+seqs_number1.to_s+'</td>
176
+ <td align="right">'+'%.2f' % (100*seqs_number1.to_f/total_seqs.to_f).to_s+' %</td>
177
+ </tr>'
178
+ stats_file.puts ' <tr>
179
+ <td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Different orthologue IDs</td>
180
+ <td align="right">'+seq_uniq.to_s+'</td>
181
+ <td align="right">'+'%.2f' % (100*seq_uniq.to_f/seqs_number1.to_f).to_s+' %</td>
152
182
  </tr>'
183
+ stats_file.puts ' <tr>
184
+ <td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Complete transcripts</td>
185
+ <td align="right">'+status_array[0][0].to_s+'</td>
186
+ <td align="right">'+'%.2f' % (100*status_array[0][0].to_f/seqs_number1.to_f).to_s+' %</td>
187
+ </tr>'
188
+ stats_file.puts ' <tr>
189
+ <td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Different complete transcripts</td>
190
+ <td align="right">'+complete_uniq.to_s+'</td>
191
+ <td align="right">'+'%.2f' % (100*complete_uniq.to_f/seqs_number1.to_f).to_s+' %</td>
192
+ </tr>'
193
+ stats_file.puts ' <tr>
194
+ <td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Misassembled</td>
195
+ <td align="right">'+error_1_num.to_s+'</td>
196
+ <td align="right">'+'%.2f' % (100*error_1_num.to_f/seqs_number1.to_f).to_s+' %</td>
197
+ </tr>'
198
+ stats_file.puts ' <tr>
199
+ <td align="left">Without orthologue <sup>1</sup></td>
200
+ <td align="right">'+no_db.to_s+'</td>
201
+ <td align="right">'+'%.2f' % (100*seqs_number2.to_f/total_seqs.to_f).to_s+' %</td>
202
+ </tr>'
203
+ stats_file.puts ' <tr>
204
+ <td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Coding</td>
205
+ <td align="right">'+tcode_array[0][0].to_s+'</td>
206
+ <td align="right">'+'%.2f' % (100*tcode_array[0][0].to_f/no_db.to_f).to_s+' %</td>
207
+ </tr>'
208
+ stats_file.puts ' <tr>
209
+ <td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Putative Coding</td>
210
+ <td align="right">'+tcode_array[1][0].to_s+'</td>
211
+ <td align="right">'+'%.2f' % (100*tcode_array[1][0].to_f/no_db.to_f).to_s+' %</td>
212
+ </tr>'
213
+ stats_file.puts ' <tr>
214
+ <td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Putative ncRNA</td>
215
+ <td align="right">'+ncrna_total.to_s+'</td>
216
+ <td align="right">'+'%.2f' % (100*ncrna_total.to_f/no_db.to_f).to_s+' %</td>
217
+ </tr>'
218
+ stats_file.puts ' <tr>
219
+ <td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Unknown (all)</td>
220
+ <td align="right">'+tcode_array[2][0].to_s+'</td>
221
+ <td align="right">'+'%.2f' % (100*tcode_array[2][0].to_f/no_db.to_f).to_s+' %</td>
222
+ </tr>'
223
+ stats_file.puts ' <tr>
224
+ <td align="left">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Unknown < 200bp</td>
225
+ <td align="right">'+unk_200.to_s+'</td>
226
+ <td align="right">'+'%.2f' % (100*unk_200.to_f/no_db.to_f).to_s+' %</td>
227
+ </tr>
228
+ </table>
229
+ <sup>1</sup> Percents for subclassifications of this category were calculated using this line as 100% reference.'
153
230
 
154
- html_4 = ' </body>
155
- </html>'
231
+
156
232
 
157
- return [html_head, html_1, html_2, html_3, html_4]
158
233
 
234
+ end
235
+ stats_file.puts html_end
236
+
237
+ stats_file.close
159
238
  end
160
239
 
161
240
 
162
- def stats_my_db(db_name, array)
163
241
 
164
- if (db_name !~ /^sp_/) && (db_name !~ /^tr_/)
165
- array[1] += 1
166
- elsif (db_name =~ /^sp_/)
167
- array[2] += 1
168
- elsif (db_name =~ /^tr_/)
169
- array[3] += 1
242
+
243
+ def html_code
244
+ html_head = '<html>
245
+ <head>
246
+ <title>FLN Summary</title>
247
+ </head>
248
+
249
+ <body bgcolor="#FFFFFF">
250
+ <center>
251
+ <h1 align="center">
252
+ Full-LengtherNEXT Summary
253
+ </h1>'
254
+
255
+
256
+ html_1 = '
257
+ <h2 align="center">
258
+ Status report
259
+ </h2>
260
+
261
+ <table border="2" cellspacing="0" cellpadding="2">
262
+ <tr>
263
+ <th colspan="2">Status</th>
264
+ <th>Unigenes</th>
265
+ <th>%</th>
266
+ </tr>'
267
+
268
+ html_2= '
269
+ <h2 align="center">
270
+ Unigene report
271
+ </h2>
272
+
273
+ <table border="2" cellspacing="0" cellpadding="2">
274
+ <tr>
275
+ <th></th>
276
+ <th>Unigenes</th>
277
+ <th>%</th>
278
+ </tr>'
279
+
280
+ html_3= '
281
+ <h2 align="center">
282
+ Database usage
283
+ </h2>
284
+
285
+ <table border="2" cellspacing="0" cellpadding="2">
286
+ <tr>
287
+ <th></th>
288
+ <th>Unigenes</th>
289
+ <th>%</th>
290
+ </tr>'
291
+
292
+ html_4= '
293
+ <h2 align="center">
294
+ Report guiding assembly quality
295
+ </h2>
296
+
297
+ <table border="2" cellspacing="0" cellpadding="2">
298
+ <tr>
299
+ <th></th>
300
+ <th>Unigenes</th>
301
+ <th>%</th>
302
+ </tr>'
303
+
304
+ html_5 = ' </body>
305
+ </html>'
306
+
307
+ return [html_head, html_1, html_2, html_3, html_4, html_5]
308
+
170
309
  end
171
310
 
172
- return array
173
- end
174
311
 
312
+ def annotation_stats
175
313
 
176
- def annotation_stats
314
+ seqs_number = 0
315
+ array_of_all_accs = []
316
+ array_of_complete_accs = []
317
+ error_1_num = 0
318
+ uni_500 = 0
319
+ uni_200 = 0
320
+ longest_one = 0
177
321
 
178
- seqs_number = 0
179
- array_of_all_accs = []
180
- array_of_complete_accs = []
181
- error_1_num = 0
182
-
183
- # >200, <200, >500, <500
184
- seq_length_stats = [0,0,0,0]
185
-
186
- # >200, <200, >500, <500
187
- complete_seq_length_stats = [0,0,0,0]
188
-
189
- status_array = []
190
- # total, userdb, swissprotdb, trembl, status
191
- complete = [0,0,0,0,'Complete']
192
- putative_complete = [0,0,0,0,'Putative Complete']
193
- c_terminus = [0,0,0,0,'C-terminus']
194
- putative_c_terminus = [0,0,0,0,'Putative C-terminus']
195
- n_terminus = [0,0,0,0,'N-terminus']
196
- putative_n_terminus = [0,0,0,0,'Putative N-terminus']
197
- internal = [0,0,0,0,'Internal']
198
- cod_seq = [0,0,0,0,'Misassembled']
199
-
200
-
201
- File.open('fln_results/annotations.txt').each do |line|
202
- line.chomp!
203
- (name,fasta_length,acc,db_name,status,kk1,kk2,kk3,kk4,kk5,msgs) = line.split("\t")
204
-
205
- if (line !~ /^Query_id\t/) && (!line.empty?)
206
- seqs_number += 1
207
- array_of_all_accs.push acc
208
- # -------------------------------------------------------------------------
209
- if (fasta_length.to_i >= 200)
210
- seq_length_stats[0] += 1
211
- # seqs_longer_200 += 1
212
- else
213
- seq_length_stats[1] += 1
214
- # seqs_shorter_200 += 1
215
- end
216
- if (fasta_length.to_i >= 500)
217
- seq_length_stats[2] += 1
218
- # seqs_longer_500 += 1
219
- else
220
- seq_length_stats[3] += 1
221
- # seqs_shorter_500 += 1
222
- end
223
- # -------------------------------------------------------------------------
224
- if (msgs =~ /ERROR#1/)
225
- error_1_num += 1
226
- end
227
- # -------------------------------------------------------------------------
228
- if (status == 'Complete')
229
- complete[0] += 1
230
- array_of_complete_accs.push acc
231
- complete = stats_my_db(db_name, complete)
232
-
322
+ status_array = []
323
+ # total, status
324
+ complete = [0,'Complete']
325
+ putative_complete = [0,'Putative Complete']
326
+ c_terminus = [0,'C-terminus']
327
+ putative_c_terminus = [0,'Putative C-terminus']
328
+ n_terminus = [0,'N-terminus']
329
+ putative_n_terminus = [0,'Putative N-terminus']
330
+ internal = [0,'Internal']
331
+ cod_seq = [0,'Misassembled']
332
+
333
+ #userdb, SwissProt, TrEMBL
334
+ db_usage = [0,0,0]
335
+
336
+ File.open('fln_results/dbannotated.txt').each do |line|
337
+ line.chomp!
338
+ (name,fasta_length,acc,db_name,status,kk1,kk2,kk3,kk4,kk5,msgs) = line.split("\t")
339
+
340
+ if (line !~ /^Query_id\t/) && (!line.empty?)
341
+ seqs_number += 1
342
+ if (fasta_length.to_i > longest_one)
343
+ longest_one = fasta_length.to_i
344
+ end
345
+ array_of_all_accs.push acc
346
+
347
+ if (db_name !~ /^sp_/) && (db_name !~ /^tr_/)
348
+ db_usage[0] += 1
349
+ elsif (db_name =~ /^sp_/)
350
+ db_usage[1] += 1
351
+ elsif (db_name =~ /^tr_/)
352
+ db_usage[2] += 1
353
+ end
354
+
355
+ # -------------------------------------------------------------------------
233
356
  if (fasta_length.to_i >= 200)
234
- complete_seq_length_stats[0] += 1
235
- # complete_longer_200 += 1
236
- else
237
- complete_seq_length_stats[1] += 1
238
- # complete_shorter_200 += 1
357
+ uni_200 += 1
239
358
  end
240
-
241
359
  if (fasta_length.to_i >= 500)
242
- complete_seq_length_stats[2] += 1
243
- # complete_longer_500 += 1
244
- else
245
- complete_seq_length_stats[3] += 1
246
- # complete_shorter_500 += 1
360
+ uni_500 += 1
361
+ end
362
+ # -------------------------------------------------------------------------
363
+ if (msgs =~ /ERROR#1/)
364
+ error_1_num += 1
247
365
  end
248
-
249
- elsif (status == 'Putative Complete')
250
- putative_complete[0] += 1
251
- putative_complete = stats_my_db(db_name, putative_complete)
252
- elsif (status == 'C-terminus')
253
- c_terminus[0] += 1
254
- c_terminus = stats_my_db(db_name, c_terminus)
255
- elsif (status == 'N-terminus')
256
- n_terminus[0] += 1
257
- n_terminus = stats_my_db(db_name, n_terminus)
258
- elsif (status == 'Putative C-terminus')
259
- putative_c_terminus[0] += 1
260
- putative_c_terminus = stats_my_db(db_name, putative_c_terminus)
261
- elsif (status == 'Putative N-terminus')
262
- putative_n_terminus[0] += 1
263
- putative_n_terminus = stats_my_db(db_name, putative_n_terminus)
264
- elsif (status == 'Internal')
265
- internal[0] += 1
266
- internal = stats_my_db(db_name, internal)
267
- elsif (status == 'Coding Seq')
268
- cod_seq[0] += 1
269
- cod_seq = stats_my_db(db_name, cod_seq)
366
+ # -------------------------------------------------------------------------
367
+ if (status == 'Complete')
368
+ complete[0] += 1
369
+ array_of_complete_accs.push acc
370
+
371
+ elsif (status == 'Putative Complete')
372
+ putative_complete[0] += 1
373
+ elsif (status == 'C-terminus')
374
+ c_terminus[0] += 1
375
+ elsif (status == 'N-terminus')
376
+ n_terminus[0] += 1
377
+ elsif (status == 'Putative C-terminus')
378
+ putative_c_terminus[0] += 1
379
+ elsif (status == 'Putative N-terminus')
380
+ putative_n_terminus[0] += 1
381
+ elsif (status == 'Internal')
382
+ internal[0] += 1
383
+ elsif (status == 'Misassembled')
384
+ cod_seq[0] += 1
385
+ end
386
+ # -------------------------------------------------------------------------
270
387
  end
271
- # -------------------------------------------------------------------------
388
+
272
389
  end
273
390
 
391
+ status_array = [complete, putative_complete, c_terminus, putative_c_terminus, n_terminus, putative_n_terminus, internal, cod_seq]
392
+
393
+ return [status_array, db_usage, seqs_number, error_1_num, array_of_all_accs.uniq.count, array_of_complete_accs.uniq.count, uni_500, uni_200, longest_one]
274
394
  end
275
-
276
- status_array = [complete, putative_complete, c_terminus, putative_c_terminus, n_terminus, putative_n_terminus, internal, cod_seq]
277
-
278
- return [status_array, seqs_number, error_1_num, array_of_all_accs.uniq.count, array_of_complete_accs.uniq.count, seq_length_stats, complete_seq_length_stats]
279
- end
280
395
 
281
396
 
282
- def testcode_stats
397
+ def testcode_stats
283
398
 
284
- seqs_number = 0
285
-
286
- # >200, <200, >500, <500
287
- all_tcode_stats = [0,0,0,0]
399
+ seqs_number = 0
400
+ unk_200 = 0
401
+ uni_500 = 0
402
+ uni_200 = 0
403
+ longest_one = 0
288
404
 
289
- # >200, <200, >500, <500, total, status
290
- coding_length_stats = [0,0,0,0,0,'Coding']
291
- p_coding_length_stats = [0,0,0,0,0,'Putative Coding']
292
- unknown_length_stats = [0,0,0,0,0,'Unknown']
405
+ # total, status
406
+ coding_stats = [0,'Coding']
407
+ p_coding_stats = [0,'Putative Coding']
408
+ unknown_stats = [0,'Unknown']
293
409
 
294
- File.open('fln_results/tcode_result.txt').each do |line|
295
- line.chomp!
296
- (name,fasta_length,acc,db_name,status) = line.split("\t")
410
+ File.open('fln_results/new_coding.txt').each do |line|
411
+ line.chomp!
412
+ (name,fasta_length,acc,db_name,status) = line.split("\t")
297
413
 
298
- if (line !~ /^Query_id\t/) && (!line.empty?)
299
- seqs_number += 1
300
-
301
- if (fasta_length.to_i >= 200)
302
- all_tcode_stats[0] += 1
303
-
304
- if (status == 'coding')
305
- coding_length_stats[4] += 1
306
- coding_length_stats[0] += 1
307
- elsif (status == 'putative_coding')
308
- p_coding_length_stats[4] += 1
309
- p_coding_length_stats[0] += 1
310
- elsif (status == 'unknown')
311
- unknown_length_stats[4] += 1
312
- unknown_length_stats[0] += 1
414
+ if (line !~ /^Query_id\t/) && (!line.empty?)
415
+ seqs_number += 1
416
+
417
+ if (fasta_length.to_i > longest_one)
418
+ longest_one = fasta_length.to_i
313
419
  end
314
- else
315
- all_tcode_stats[1] += 1
316
-
317
- if (status == 'coding')
318
- coding_length_stats[4] += 1
319
- coding_length_stats[1] += 1
320
- elsif (status == 'putative_coding')
321
- p_coding_length_stats[4] += 1
322
- p_coding_length_stats[1] += 1
323
- elsif (status == 'unknown')
324
- unknown_length_stats[4] += 1
325
- unknown_length_stats[1] += 1
420
+
421
+ # -------------------------------------------------------------------------
422
+ if (fasta_length.to_i >= 200)
423
+ uni_200 += 1
326
424
  end
327
- end
328
- if (fasta_length.to_i >= 500)
329
- all_tcode_stats[2] += 1
330
-
331
- if (status == 'coding')
332
- coding_length_stats[2] += 1
333
- elsif (status == 'putative_coding')
334
- p_coding_length_stats[2] += 1
335
- elsif (status == 'unknown')
336
- unknown_length_stats[2] += 1
425
+ if (fasta_length.to_i >= 500)
426
+ uni_500 += 1
337
427
  end
338
- else
339
- all_tcode_stats[3] += 1
340
-
428
+ # -------------------------------------------------------------------------
429
+
430
+ if (fasta_length.to_i < 200)
431
+ if (status == 'unknown')
432
+ unk_200 += 1
433
+ end
434
+ end
435
+
341
436
  if (status == 'coding')
342
- coding_length_stats[3] += 1
437
+ coding_stats[0] += 1
343
438
  elsif (status == 'putative_coding')
344
- p_coding_length_stats[3] += 1
439
+ p_coding_stats[0] += 1
345
440
  elsif (status == 'unknown')
346
- unknown_length_stats[3] += 1
441
+ unknown_stats[0] += 1
347
442
  end
443
+
348
444
  end
349
-
445
+
350
446
  end
351
447
 
448
+ status_array = [coding_stats, p_coding_stats, unknown_stats]
449
+
450
+ return [status_array, seqs_number, unk_200, uni_500, uni_200, longest_one]
352
451
  end
353
-
354
- status_array = [coding_length_stats, p_coding_length_stats, unknown_length_stats]
355
452
 
356
- return [status_array, seqs_number, all_tcode_stats, coding_length_stats, unknown_length_stats]
357
- end
358
453
 
359
- def ncrna_stats
454
+ def ncrna_stats
360
455
 
361
- # >200, <200, >500, <500, total
362
- ncrna_array = [0,0,0,0,0]
456
+ uni_500 = 0
457
+ uni_200 = 0
458
+ nc_total = 0
459
+ longest_one = 0
363
460
 
364
- File.open('fln_results/nc_rna.txt').each do |line|
365
- line.chomp!
366
- (name,fasta_length,acc,db_name,status) = line.split("\t")
461
+ File.open('fln_results/nc_rnas.txt').each do |line|
462
+ line.chomp!
463
+ (name,fasta_length,acc,db_name,status) = line.split("\t")
464
+
465
+ if (status == 'Putative ncRNA')
466
+
467
+ if (fasta_length.to_i > longest_one)
468
+ longest_one = fasta_length.to_i
469
+ end
470
+ # -------------------------------------------------------------------------
471
+ if (fasta_length.to_i >= 200)
472
+ uni_200 += 1
473
+ end
474
+ if (fasta_length.to_i >= 500)
475
+ uni_500 += 1
476
+ end
477
+ # -------------------------------------------------------------------------
478
+
479
+ nc_total += 1
367
480
 
368
- if (status == 'Putative ncRNA')
369
- ncrna_array[4] += 1
370
-
371
- if (fasta_length.to_i >= 200)
372
- ncrna_array[0] += 1
373
- else
374
- ncrna_array[1] += 1
375
- end
376
- if (fasta_length.to_i >= 500)
377
- ncrna_array[2] += 1
378
- else
379
- ncrna_array[3] += 1
380
481
  end
381
482
  end
483
+
484
+ return [nc_total, uni_500, uni_200, longest_one]
382
485
  end
383
-
384
- return ncrna_array
385
- end
386
-
486
+
487
+
387
488
  end
@@ -22,16 +22,16 @@ class MyWorkerManager < ScbiMapreduce::WorkManager
22
22
  @@chunk_size=chunk_size
23
23
  @@options = options
24
24
 
25
- @@annotation_file = File.open("fln_results/annotations.txt", 'w')
25
+ @@annotation_file = File.open("fln_results/dbannotated.txt", 'w')
26
26
  @@annotation_file.puts file_head
27
27
 
28
28
  @@alignment_file = File.open("fln_results/alignments.txt", 'w')
29
29
  @@prot_file = File.open("fln_results/proteins.fasta", 'w')
30
30
  @@nts_file = File.open("fln_results/nt_seq.txt", 'w')
31
- @@tcode_file=File.open("fln_results/tcode_result.txt", 'w')
31
+ @@tcode_file=File.open("fln_results/new_coding.txt", 'w')
32
32
  @@tcode_file.puts file_head
33
33
 
34
- @@nc_rna_file = File.open("fln_results/nc_rna.txt", 'w')
34
+ @@nc_rna_file = File.open("fln_results/nc_rnas.txt", 'w')
35
35
  @@nc_rna_file.puts file_head
36
36
 
37
37
  # @@error_fasta_file = File.open("fln_results/error_seqs.fasta", 'w')
@@ -11,7 +11,7 @@ module NcRna
11
11
  q=blast_query
12
12
 
13
13
  if (!q.hits[0].nil?) # There is match in blast.
14
- nc_annotations = "#{q.query_def}\t#{seq.seq_fasta.length}\t#{q.hits[0].acc}\tncRNA\tPutative ncRNA\t\t#{q.hits[0].e_val}\t#{q.hits[0].ident}\t\t\t\t#{q.hits[0].q_frame}\t#{q.hits[0].q_beg}\t#{q.hits[0].q_end}\t#{q.hits[0].s_beg.to_i}\t#{q.hits[0].s_end.to_i}\t#{q.hits[0].definition}\t"
14
+ nc_annotations = "#{q.query_def}\t#{seq.fasta_length}\t#{q.hits[0].acc}\tncRNA\tPutative ncRNA\t\t#{q.hits[0].e_val}\t#{q.hits[0].ident}\t\t\t\t#{q.hits[0].q_frame}\t#{q.hits[0].q_beg}\t#{q.hits[0].q_end}\t#{q.hits[0].s_beg.to_i}\t#{q.hits[0].s_end.to_i}\t#{q.hits[0].definition}\t"
15
15
  seq.annotate(:ncrna,nc_annotations,true)
16
16
  else
17
17
  unknown_annot = seq.get_annotations(:tcode_unknown).first
@@ -3,11 +3,13 @@ require 'orf'
3
3
 
4
4
  class Sequence
5
5
 
6
- attr_accessor :seq_name,:seq_fasta,:seq_qual,:orfs,:sec_desc
6
+ attr_accessor :seq_name,:seq_fasta,:seq_qual,:orfs,:sec_desc,:fasta_length
7
7
 
8
8
  def initialize(seq_name,seq_fasta,seq_qual='')
9
+ fasta_ori = seq_fasta.dup
9
10
  @seq_name=seq_name
10
11
  @seq_fasta = seq_fasta
12
+ @fasta_length = fasta_ori.length
11
13
  change_degenerated_nt!
12
14
  @seq_qual = ''
13
15
  @sec_desc = ''
@@ -18,7 +18,7 @@ class TestCode
18
18
  protein = ''
19
19
  p_long = 0
20
20
 
21
- if (seq.seq_fasta.length < 200)
21
+ if (seq.fasta_length < 200)
22
22
  ref_name = seq.seq_name
23
23
  ref_code = 0.0
24
24
  ref_frame = 0
@@ -26,7 +26,7 @@ class TestCode
26
26
  ref_orf = ''
27
27
  ref_msgs = 'Sequence length < 200 nt'
28
28
 
29
- seq.annotate(:tcode_unknown,"#{ref_name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{ref_status}\t#{ref_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
29
+ seq.annotate(:tcode_unknown,"#{ref_name}\t#{seq.fasta_length}\t\ttestcode\t#{ref_status}\t#{ref_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
30
30
  # seq.annotate(:tcode,"#{ref_name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{ref_status}\t#{ref_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
31
31
  else
32
32
 
@@ -45,9 +45,9 @@ class TestCode
45
45
  # see add_region filter
46
46
  (name,t_code,status,ref_start,ref_end,ref_frame,orf,ref_msgs,stop_before_start,more_than_one_frame) = t_code(seq)
47
47
  if (status == :unknown)
48
- seq.annotate(:tcode_unknown,"#{name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{status}\t#{t_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
48
+ seq.annotate(:tcode_unknown,"#{name}\t#{seq.fasta_length}\t\ttestcode\t#{status}\t#{t_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
49
49
  else
50
- seq.annotate(:tcode,"#{name}\t#{seq.seq_fasta.length}\t\ttestcode\t#{status}\t#{t_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
50
+ seq.annotate(:tcode,"#{name}\t#{seq.fasta_length}\t\ttestcode\t#{status}\t#{t_code}\t\t\t\t\t#{ref_msgs}\t#{ref_frame}\t#{ref_start}\t#{ref_end}\t\t\t\t",true)
51
51
  end
52
52
 
53
53
  # if (ref_msgs.nil?)
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: full_lengther_next
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.5
5
+ version: 0.0.6
6
6
  platform: ruby
7
7
  authors:
8
8
  - Noe Fernandez & Dario Guerrero
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2012-03-09 00:00:00 Z
13
+ date: 2012-04-16 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: xml-simple