big_simon 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -1
- data/Gemfile.lock +1 -1
- data/exe/big_simon +4 -2
- data/lib/big_simon/runners.rb +21 -4
- data/lib/big_simon/utils.rb +5 -1
- data/lib/big_simon/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 624fd8e70d43c1a556b9c846e599dd928cd0c081
|
4
|
+
data.tar.gz: 456b7aec02cd33c6d0145d144fb83504daa0ef07
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 431d7b1f0f44af912b42adf458df8a15ff9b60139b5c6c27318e442b00a55f94b289b59e4518f9005b52d5b26de712ea2bd142c8a152558a98c47e4ede7a2a82
|
7
|
+
data.tar.gz: 6282c6ce73c86f4276934760c4b9decf2d758d962b5c43b7d84048e420fda41eee97d9c0fe1a681479f93daee16a1545f79fee56aba41e0fd0d06eec0debf9ab
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
data/exe/big_simon
CHANGED
@@ -73,8 +73,10 @@ tmpdir_host = File.join tmpdir, "host"
|
|
73
73
|
mean_scaled_scores_fname = File.join outdir, "scores_scaled.mean.txt"
|
74
74
|
|
75
75
|
|
76
|
-
name_map_virus, all_ids_virus = BigSimon::Utils.set_up_tmp_dirs virus_fnames, tmpdir_virus, "virus"
|
77
|
-
name_map_host, all_ids_host = BigSimon::Utils.set_up_tmp_dirs host_fnames, tmpdir_host, "host"
|
76
|
+
name_map_virus, all_ids_virus, vir_seq_lengths = BigSimon::Utils.set_up_tmp_dirs virus_fnames, tmpdir_virus, "virus"
|
77
|
+
name_map_host, all_ids_host, host_seq_lengths = BigSimon::Utils.set_up_tmp_dirs host_fnames, tmpdir_host, "host"
|
78
|
+
|
79
|
+
all_seq_lengths = vir_seq_lengths.merge host_seq_lengths
|
78
80
|
|
79
81
|
wish_outf = BigSimon::Runners.wish BigSimon::WISH, tmpdir_virus, tmpdir_host, tmpdir, threads
|
80
82
|
vhm_outf = BigSimon::Runners.vir_host_matcher BigSimon::VHM, tmpdir_virus, tmpdir_host, tmpdir
|
data/lib/big_simon/runners.rb
CHANGED
@@ -6,7 +6,7 @@ module BigSimon
|
|
6
6
|
class Runners
|
7
7
|
|
8
8
|
# @note To match the other things, you'd like them to be key'd on the file name.
|
9
|
-
def self.mummer exe, vir_dir, host_dir, outdir, threads
|
9
|
+
def self.mummer exe, vir_dir, host_dir, outdir, threads, all_seq_lengths
|
10
10
|
klass = Class.new.extend Rya::CoreExtensions::Math
|
11
11
|
FileUtils.mkdir_p outdir
|
12
12
|
|
@@ -80,10 +80,17 @@ module BigSimon
|
|
80
80
|
ary = line.strip.split " "
|
81
81
|
|
82
82
|
host = ary[0].sub(/___reverse$/, "").strip
|
83
|
-
score = ary[3].to_i
|
84
83
|
|
85
84
|
Rya::AbortIf.assert hit_table[virus].has_key?(host)
|
86
85
|
|
86
|
+
Rya::AbortIf.assert all_seq_lengths[virus]
|
87
|
+
Rya::AbortIf.assert all_seq_lengths[host]
|
88
|
+
|
89
|
+
combined_seq_length = all_seq_lengths[virus] + all_seq_lengths[host]
|
90
|
+
|
91
|
+
score = ary[3].to_i / combined_seq_length * 1000
|
92
|
+
|
93
|
+
|
87
94
|
# unless hit_table[virus].has_key? host
|
88
95
|
# hit_table[virus][host] = -1
|
89
96
|
# end
|
@@ -213,7 +220,7 @@ module BigSimon
|
|
213
220
|
# @note Assumes that the files end with *.fa
|
214
221
|
# @note Assumes that the file names match the IDs. This SHOULD be taken care of by the big_simon program.
|
215
222
|
# @todo assert that fname thing matches sequence ID name.
|
216
|
-
def self.homology vir_dir, host_dir, outdir, threads
|
223
|
+
def self.homology vir_dir, host_dir, outdir, threads, all_seq_lengths
|
217
224
|
FileUtils.mkdir_p outdir
|
218
225
|
|
219
226
|
host_orfs = File.join outdir, "host_orfs.homology"
|
@@ -272,6 +279,13 @@ module BigSimon
|
|
272
279
|
|
273
280
|
Rya::AbortIf.assert blast_table.has_key?(vir_id), "blast_table: got #{vir_id} should have been #{vir_simple_fname}"
|
274
281
|
|
282
|
+
Rya::AbortIf.assert all_seq_lengths[vir_id]
|
283
|
+
Rya::AbortIf.assert all_seq_lengths[host_id]
|
284
|
+
|
285
|
+
combined_seq_length = all_seq_lengths[vir_id] + all_seq_lengths[host_id]
|
286
|
+
score = score / combined_seq_length.to_f * 1000
|
287
|
+
|
288
|
+
|
275
289
|
blast_table[vir_id][host_id] += score
|
276
290
|
end
|
277
291
|
|
@@ -309,7 +323,10 @@ module BigSimon
|
|
309
323
|
collated_blast_table[vir_id] = []
|
310
324
|
|
311
325
|
host_simple_names.each do |host_id|
|
312
|
-
|
326
|
+
|
327
|
+
|
328
|
+
combined_seq_length = all_seq_lengths[vir_id] + all_seq_lengths[host_id]
|
329
|
+
scaled_score = klass.scale host_scores[host_id].to_f, 0, max_score, 1, 0
|
313
330
|
|
314
331
|
host_table = { host: host_id, score: host_scores[host_id], scaled_score: scaled_score }
|
315
332
|
collated_blast_table[vir_id] << host_table
|
data/lib/big_simon/utils.rb
CHANGED
@@ -74,6 +74,8 @@ draw.heatmap("#{in_fname}", "#{out_fname}")
|
|
74
74
|
def self.set_up_tmp_dirs fastas, tmpdir, which
|
75
75
|
Object::FileUtils.mkdir_p tmpdir
|
76
76
|
|
77
|
+
seq_lengths = {}
|
78
|
+
|
77
79
|
name_map = {}
|
78
80
|
all_ids = Set.new
|
79
81
|
|
@@ -92,6 +94,8 @@ draw.heatmap("#{in_fname}", "#{out_fname}")
|
|
92
94
|
|
93
95
|
outfname = File.join tmpdir, "#{new_id}.fa"
|
94
96
|
|
97
|
+
seq_lengths[new_id] = rec.seq.length
|
98
|
+
|
95
99
|
File.open(outfname, "w") do |f|
|
96
100
|
f.puts ">#{new_id}\n#{rec.seq}" # TODO HERE
|
97
101
|
end
|
@@ -99,7 +103,7 @@ draw.heatmap("#{in_fname}", "#{out_fname}")
|
|
99
103
|
end
|
100
104
|
end
|
101
105
|
|
102
|
-
[name_map, all_ids]
|
106
|
+
[name_map, all_ids, seq_lengths]
|
103
107
|
end
|
104
108
|
|
105
109
|
def self.strip_suffix fname
|
data/lib/big_simon/version.rb
CHANGED