big_simon 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 95ca94aff107e2545dd26ab671c2ed125cb89164
4
- data.tar.gz: 8cb9098b4462d6702ecd692e7261abb9c1c7c26a
3
+ metadata.gz: 624fd8e70d43c1a556b9c846e599dd928cd0c081
4
+ data.tar.gz: 456b7aec02cd33c6d0145d144fb83504daa0ef07
5
5
  SHA512:
6
- metadata.gz: be890e07e1ca248da8e5b80637b25d00c20185d4710229cc9c72d564abc930d4eb734ad53f42842a479827e64205ee1f53f5691b6527d03b9cc41b9b8656ae85
7
- data.tar.gz: ac30f597de91303e3b8040ecfde48f98d56c343aadfa3a1085d77fbb55692fb055569734470812144d716e73ee8c94265c732a07462bd8490c2b937572961062
6
+ metadata.gz: 431d7b1f0f44af912b42adf458df8a15ff9b60139b5c6c27318e442b00a55f94b289b59e4518f9005b52d5b26de712ea2bd142c8a152558a98c47e4ede7a2a82
7
+ data.tar.gz: 6282c6ce73c86f4276934760c4b9decf2d758d962b5c43b7d84048e420fda41eee97d9c0fe1a681479f93daee16a1545f79fee56aba41e0fd0d06eec0debf9ab
data/.gitignore CHANGED
@@ -38,4 +38,6 @@ spec/test_files/homology_files/output/*homology*
38
38
 
39
39
  pec/test_files/mummer_files/output/mummer_out.txt
40
40
 
41
- tmp.prodigal.stdin.5129
41
+ tmp.prodigal.stdin.5129
42
+
43
+ spec/test_files/mummer_files/output/
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- big_simon (0.2.0)
4
+ big_simon (0.2.1)
5
5
  parallel
6
6
  parse_fasta (~> 2.5, >= 2.5.2)
7
7
  rya (~> 0.4.0)
data/exe/big_simon CHANGED
@@ -73,8 +73,10 @@ tmpdir_host = File.join tmpdir, "host"
73
73
  mean_scaled_scores_fname = File.join outdir, "scores_scaled.mean.txt"
74
74
 
75
75
 
76
- name_map_virus, all_ids_virus = BigSimon::Utils.set_up_tmp_dirs virus_fnames, tmpdir_virus, "virus"
77
- name_map_host, all_ids_host = BigSimon::Utils.set_up_tmp_dirs host_fnames, tmpdir_host, "host"
76
+ name_map_virus, all_ids_virus, vir_seq_lengths = BigSimon::Utils.set_up_tmp_dirs virus_fnames, tmpdir_virus, "virus"
77
+ name_map_host, all_ids_host, host_seq_lengths = BigSimon::Utils.set_up_tmp_dirs host_fnames, tmpdir_host, "host"
78
+
79
+ all_seq_lengths = vir_seq_lengths.merge host_seq_lengths
78
80
 
79
81
  wish_outf = BigSimon::Runners.wish BigSimon::WISH, tmpdir_virus, tmpdir_host, tmpdir, threads
80
82
  vhm_outf = BigSimon::Runners.vir_host_matcher BigSimon::VHM, tmpdir_virus, tmpdir_host, tmpdir
@@ -6,7 +6,7 @@ module BigSimon
6
6
  class Runners
7
7
 
8
8
  # @note To match the other things, you'd like them to be key'd on the file name.
9
- def self.mummer exe, vir_dir, host_dir, outdir, threads
9
+ def self.mummer exe, vir_dir, host_dir, outdir, threads, all_seq_lengths
10
10
  klass = Class.new.extend Rya::CoreExtensions::Math
11
11
  FileUtils.mkdir_p outdir
12
12
 
@@ -80,10 +80,17 @@ module BigSimon
80
80
  ary = line.strip.split " "
81
81
 
82
82
  host = ary[0].sub(/___reverse$/, "").strip
83
- score = ary[3].to_i
84
83
 
85
84
  Rya::AbortIf.assert hit_table[virus].has_key?(host)
86
85
 
86
+ Rya::AbortIf.assert all_seq_lengths[virus]
87
+ Rya::AbortIf.assert all_seq_lengths[host]
88
+
89
+ combined_seq_length = all_seq_lengths[virus] + all_seq_lengths[host]
90
+
91
+ score = ary[3].to_i / combined_seq_length * 1000
92
+
93
+
87
94
  # unless hit_table[virus].has_key? host
88
95
  # hit_table[virus][host] = -1
89
96
  # end
@@ -213,7 +220,7 @@ module BigSimon
213
220
  # @note Assumes that the files end with *.fa
214
221
  # @note Assumes that the file names match the IDs. This SHOULD be taken care of by the big_simon program.
215
222
  # @todo assert that fname thing matches sequence ID name.
216
- def self.homology vir_dir, host_dir, outdir, threads
223
+ def self.homology vir_dir, host_dir, outdir, threads, all_seq_lengths
217
224
  FileUtils.mkdir_p outdir
218
225
 
219
226
  host_orfs = File.join outdir, "host_orfs.homology"
@@ -272,6 +279,13 @@ module BigSimon
272
279
 
273
280
  Rya::AbortIf.assert blast_table.has_key?(vir_id), "blast_table: got #{vir_id} should have been #{vir_simple_fname}"
274
281
 
282
+ Rya::AbortIf.assert all_seq_lengths[vir_id]
283
+ Rya::AbortIf.assert all_seq_lengths[host_id]
284
+
285
+ combined_seq_length = all_seq_lengths[vir_id] + all_seq_lengths[host_id]
286
+ score = score / combined_seq_length.to_f * 1000
287
+
288
+
275
289
  blast_table[vir_id][host_id] += score
276
290
  end
277
291
 
@@ -309,7 +323,10 @@ module BigSimon
309
323
  collated_blast_table[vir_id] = []
310
324
 
311
325
  host_simple_names.each do |host_id|
312
- scaled_score = klass.scale host_scores[host_id], 0, max_score, 1, 0
326
+
327
+
328
+ combined_seq_length = all_seq_lengths[vir_id] + all_seq_lengths[host_id]
329
+ scaled_score = klass.scale host_scores[host_id].to_f, 0, max_score, 1, 0
313
330
 
314
331
  host_table = { host: host_id, score: host_scores[host_id], scaled_score: scaled_score }
315
332
  collated_blast_table[vir_id] << host_table
@@ -74,6 +74,8 @@ draw.heatmap("#{in_fname}", "#{out_fname}")
74
74
  def self.set_up_tmp_dirs fastas, tmpdir, which
75
75
  Object::FileUtils.mkdir_p tmpdir
76
76
 
77
+ seq_lengths = {}
78
+
77
79
  name_map = {}
78
80
  all_ids = Set.new
79
81
 
@@ -92,6 +94,8 @@ draw.heatmap("#{in_fname}", "#{out_fname}")
92
94
 
93
95
  outfname = File.join tmpdir, "#{new_id}.fa"
94
96
 
97
+ seq_lengths[new_id] = rec.seq.length
98
+
95
99
  File.open(outfname, "w") do |f|
96
100
  f.puts ">#{new_id}\n#{rec.seq}" # TODO HERE
97
101
  end
@@ -99,7 +103,7 @@ draw.heatmap("#{in_fname}", "#{out_fname}")
99
103
  end
100
104
  end
101
105
 
102
- [name_map, all_ids]
106
+ [name_map, all_ids, seq_lengths]
103
107
  end
104
108
 
105
109
  def self.strip_suffix fname
@@ -1,5 +1,5 @@
1
1
  module BigSimon
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
 
4
4
  COPYRIGHT = "2018 Ryan Moore"
5
5
  CONTACT = "moorer@udel.edu"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: big_simon
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore