big_simon 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -1
- data/Gemfile.lock +1 -1
- data/exe/big_simon +4 -2
- data/lib/big_simon/runners.rb +21 -4
- data/lib/big_simon/utils.rb +5 -1
- data/lib/big_simon/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 624fd8e70d43c1a556b9c846e599dd928cd0c081
|
4
|
+
data.tar.gz: 456b7aec02cd33c6d0145d144fb83504daa0ef07
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 431d7b1f0f44af912b42adf458df8a15ff9b60139b5c6c27318e442b00a55f94b289b59e4518f9005b52d5b26de712ea2bd142c8a152558a98c47e4ede7a2a82
|
7
|
+
data.tar.gz: 6282c6ce73c86f4276934760c4b9decf2d758d962b5c43b7d84048e420fda41eee97d9c0fe1a681479f93daee16a1545f79fee56aba41e0fd0d06eec0debf9ab
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
data/exe/big_simon
CHANGED
@@ -73,8 +73,10 @@ tmpdir_host = File.join tmpdir, "host"
|
|
73
73
|
mean_scaled_scores_fname = File.join outdir, "scores_scaled.mean.txt"
|
74
74
|
|
75
75
|
|
76
|
-
name_map_virus, all_ids_virus = BigSimon::Utils.set_up_tmp_dirs virus_fnames, tmpdir_virus, "virus"
|
77
|
-
name_map_host, all_ids_host = BigSimon::Utils.set_up_tmp_dirs host_fnames, tmpdir_host, "host"
|
76
|
+
name_map_virus, all_ids_virus, vir_seq_lengths = BigSimon::Utils.set_up_tmp_dirs virus_fnames, tmpdir_virus, "virus"
|
77
|
+
name_map_host, all_ids_host, host_seq_lengths = BigSimon::Utils.set_up_tmp_dirs host_fnames, tmpdir_host, "host"
|
78
|
+
|
79
|
+
all_seq_lengths = vir_seq_lengths.merge host_seq_lengths
|
78
80
|
|
79
81
|
wish_outf = BigSimon::Runners.wish BigSimon::WISH, tmpdir_virus, tmpdir_host, tmpdir, threads
|
80
82
|
vhm_outf = BigSimon::Runners.vir_host_matcher BigSimon::VHM, tmpdir_virus, tmpdir_host, tmpdir
|
data/lib/big_simon/runners.rb
CHANGED
@@ -6,7 +6,7 @@ module BigSimon
|
|
6
6
|
class Runners
|
7
7
|
|
8
8
|
# @note To match the other things, you'd like them to be key'd on the file name.
|
9
|
-
def self.mummer exe, vir_dir, host_dir, outdir, threads
|
9
|
+
def self.mummer exe, vir_dir, host_dir, outdir, threads, all_seq_lengths
|
10
10
|
klass = Class.new.extend Rya::CoreExtensions::Math
|
11
11
|
FileUtils.mkdir_p outdir
|
12
12
|
|
@@ -80,10 +80,17 @@ module BigSimon
|
|
80
80
|
ary = line.strip.split " "
|
81
81
|
|
82
82
|
host = ary[0].sub(/___reverse$/, "").strip
|
83
|
-
score = ary[3].to_i
|
84
83
|
|
85
84
|
Rya::AbortIf.assert hit_table[virus].has_key?(host)
|
86
85
|
|
86
|
+
Rya::AbortIf.assert all_seq_lengths[virus]
|
87
|
+
Rya::AbortIf.assert all_seq_lengths[host]
|
88
|
+
|
89
|
+
combined_seq_length = all_seq_lengths[virus] + all_seq_lengths[host]
|
90
|
+
|
91
|
+
score = ary[3].to_i / combined_seq_length * 1000
|
92
|
+
|
93
|
+
|
87
94
|
# unless hit_table[virus].has_key? host
|
88
95
|
# hit_table[virus][host] = -1
|
89
96
|
# end
|
@@ -213,7 +220,7 @@ module BigSimon
|
|
213
220
|
# @note Assumes that the files end with *.fa
|
214
221
|
# @note Assumes that the file names match the IDs. This SHOULD be taken care of by the big_simon program.
|
215
222
|
# @todo assert that fname thing matches sequence ID name.
|
216
|
-
def self.homology vir_dir, host_dir, outdir, threads
|
223
|
+
def self.homology vir_dir, host_dir, outdir, threads, all_seq_lengths
|
217
224
|
FileUtils.mkdir_p outdir
|
218
225
|
|
219
226
|
host_orfs = File.join outdir, "host_orfs.homology"
|
@@ -272,6 +279,13 @@ module BigSimon
|
|
272
279
|
|
273
280
|
Rya::AbortIf.assert blast_table.has_key?(vir_id), "blast_table: got #{vir_id} should have been #{vir_simple_fname}"
|
274
281
|
|
282
|
+
Rya::AbortIf.assert all_seq_lengths[vir_id]
|
283
|
+
Rya::AbortIf.assert all_seq_lengths[host_id]
|
284
|
+
|
285
|
+
combined_seq_length = all_seq_lengths[vir_id] + all_seq_lengths[host_id]
|
286
|
+
score = score / combined_seq_length.to_f * 1000
|
287
|
+
|
288
|
+
|
275
289
|
blast_table[vir_id][host_id] += score
|
276
290
|
end
|
277
291
|
|
@@ -309,7 +323,10 @@ module BigSimon
|
|
309
323
|
collated_blast_table[vir_id] = []
|
310
324
|
|
311
325
|
host_simple_names.each do |host_id|
|
312
|
-
|
326
|
+
|
327
|
+
|
328
|
+
combined_seq_length = all_seq_lengths[vir_id] + all_seq_lengths[host_id]
|
329
|
+
scaled_score = klass.scale host_scores[host_id].to_f, 0, max_score, 1, 0
|
313
330
|
|
314
331
|
host_table = { host: host_id, score: host_scores[host_id], scaled_score: scaled_score }
|
315
332
|
collated_blast_table[vir_id] << host_table
|
data/lib/big_simon/utils.rb
CHANGED
@@ -74,6 +74,8 @@ draw.heatmap("#{in_fname}", "#{out_fname}")
|
|
74
74
|
def self.set_up_tmp_dirs fastas, tmpdir, which
|
75
75
|
Object::FileUtils.mkdir_p tmpdir
|
76
76
|
|
77
|
+
seq_lengths = {}
|
78
|
+
|
77
79
|
name_map = {}
|
78
80
|
all_ids = Set.new
|
79
81
|
|
@@ -92,6 +94,8 @@ draw.heatmap("#{in_fname}", "#{out_fname}")
|
|
92
94
|
|
93
95
|
outfname = File.join tmpdir, "#{new_id}.fa"
|
94
96
|
|
97
|
+
seq_lengths[new_id] = rec.seq.length
|
98
|
+
|
95
99
|
File.open(outfname, "w") do |f|
|
96
100
|
f.puts ">#{new_id}\n#{rec.seq}" # TODO HERE
|
97
101
|
end
|
@@ -99,7 +103,7 @@ draw.heatmap("#{in_fname}", "#{out_fname}")
|
|
99
103
|
end
|
100
104
|
end
|
101
105
|
|
102
|
-
[name_map, all_ids]
|
106
|
+
[name_map, all_ids, seq_lengths]
|
103
107
|
end
|
104
108
|
|
105
109
|
def self.strip_suffix fname
|
data/lib/big_simon/version.rb
CHANGED