big_simon 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 95ca94aff107e2545dd26ab671c2ed125cb89164
4
- data.tar.gz: 8cb9098b4462d6702ecd692e7261abb9c1c7c26a
3
+ metadata.gz: 624fd8e70d43c1a556b9c846e599dd928cd0c081
4
+ data.tar.gz: 456b7aec02cd33c6d0145d144fb83504daa0ef07
5
5
  SHA512:
6
- metadata.gz: be890e07e1ca248da8e5b80637b25d00c20185d4710229cc9c72d564abc930d4eb734ad53f42842a479827e64205ee1f53f5691b6527d03b9cc41b9b8656ae85
7
- data.tar.gz: ac30f597de91303e3b8040ecfde48f98d56c343aadfa3a1085d77fbb55692fb055569734470812144d716e73ee8c94265c732a07462bd8490c2b937572961062
6
+ metadata.gz: 431d7b1f0f44af912b42adf458df8a15ff9b60139b5c6c27318e442b00a55f94b289b59e4518f9005b52d5b26de712ea2bd142c8a152558a98c47e4ede7a2a82
7
+ data.tar.gz: 6282c6ce73c86f4276934760c4b9decf2d758d962b5c43b7d84048e420fda41eee97d9c0fe1a681479f93daee16a1545f79fee56aba41e0fd0d06eec0debf9ab
data/.gitignore CHANGED
@@ -38,4 +38,6 @@ spec/test_files/homology_files/output/*homology*
38
38
 
39
39
  pec/test_files/mummer_files/output/mummer_out.txt
40
40
 
41
- tmp.prodigal.stdin.5129
41
+ tmp.prodigal.stdin.5129
42
+
43
+ spec/test_files/mummer_files/output/
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- big_simon (0.2.0)
4
+ big_simon (0.2.1)
5
5
  parallel
6
6
  parse_fasta (~> 2.5, >= 2.5.2)
7
7
  rya (~> 0.4.0)
data/exe/big_simon CHANGED
@@ -73,8 +73,10 @@ tmpdir_host = File.join tmpdir, "host"
73
73
  mean_scaled_scores_fname = File.join outdir, "scores_scaled.mean.txt"
74
74
 
75
75
 
76
- name_map_virus, all_ids_virus = BigSimon::Utils.set_up_tmp_dirs virus_fnames, tmpdir_virus, "virus"
77
- name_map_host, all_ids_host = BigSimon::Utils.set_up_tmp_dirs host_fnames, tmpdir_host, "host"
76
+ name_map_virus, all_ids_virus, vir_seq_lengths = BigSimon::Utils.set_up_tmp_dirs virus_fnames, tmpdir_virus, "virus"
77
+ name_map_host, all_ids_host, host_seq_lengths = BigSimon::Utils.set_up_tmp_dirs host_fnames, tmpdir_host, "host"
78
+
79
+ all_seq_lengths = vir_seq_lengths.merge host_seq_lengths
78
80
 
79
81
  wish_outf = BigSimon::Runners.wish BigSimon::WISH, tmpdir_virus, tmpdir_host, tmpdir, threads
80
82
  vhm_outf = BigSimon::Runners.vir_host_matcher BigSimon::VHM, tmpdir_virus, tmpdir_host, tmpdir
@@ -6,7 +6,7 @@ module BigSimon
6
6
  class Runners
7
7
 
8
8
  # @note To match the other things, you'd like them to be key'd on the file name.
9
- def self.mummer exe, vir_dir, host_dir, outdir, threads
9
+ def self.mummer exe, vir_dir, host_dir, outdir, threads, all_seq_lengths
10
10
  klass = Class.new.extend Rya::CoreExtensions::Math
11
11
  FileUtils.mkdir_p outdir
12
12
 
@@ -80,10 +80,17 @@ module BigSimon
80
80
  ary = line.strip.split " "
81
81
 
82
82
  host = ary[0].sub(/___reverse$/, "").strip
83
- score = ary[3].to_i
84
83
 
85
84
  Rya::AbortIf.assert hit_table[virus].has_key?(host)
86
85
 
86
+ Rya::AbortIf.assert all_seq_lengths[virus]
87
+ Rya::AbortIf.assert all_seq_lengths[host]
88
+
89
+ combined_seq_length = all_seq_lengths[virus] + all_seq_lengths[host]
90
+
91
+ score = ary[3].to_i / combined_seq_length * 1000
92
+
93
+
87
94
  # unless hit_table[virus].has_key? host
88
95
  # hit_table[virus][host] = -1
89
96
  # end
@@ -213,7 +220,7 @@ module BigSimon
213
220
  # @note Assumes that the files end with *.fa
214
221
  # @note Assumes that the file names match the IDs. This SHOULD be taken care of by the big_simon program.
215
222
  # @todo assert that fname thing matches sequence ID name.
216
- def self.homology vir_dir, host_dir, outdir, threads
223
+ def self.homology vir_dir, host_dir, outdir, threads, all_seq_lengths
217
224
  FileUtils.mkdir_p outdir
218
225
 
219
226
  host_orfs = File.join outdir, "host_orfs.homology"
@@ -272,6 +279,13 @@ module BigSimon
272
279
 
273
280
  Rya::AbortIf.assert blast_table.has_key?(vir_id), "blast_table: got #{vir_id} should have been #{vir_simple_fname}"
274
281
 
282
+ Rya::AbortIf.assert all_seq_lengths[vir_id]
283
+ Rya::AbortIf.assert all_seq_lengths[host_id]
284
+
285
+ combined_seq_length = all_seq_lengths[vir_id] + all_seq_lengths[host_id]
286
+ score = score / combined_seq_length.to_f * 1000
287
+
288
+
275
289
  blast_table[vir_id][host_id] += score
276
290
  end
277
291
 
@@ -309,7 +323,10 @@ module BigSimon
309
323
  collated_blast_table[vir_id] = []
310
324
 
311
325
  host_simple_names.each do |host_id|
312
- scaled_score = klass.scale host_scores[host_id], 0, max_score, 1, 0
326
+
327
+
328
+ combined_seq_length = all_seq_lengths[vir_id] + all_seq_lengths[host_id]
329
+ scaled_score = klass.scale host_scores[host_id].to_f, 0, max_score, 1, 0
313
330
 
314
331
  host_table = { host: host_id, score: host_scores[host_id], scaled_score: scaled_score }
315
332
  collated_blast_table[vir_id] << host_table
@@ -74,6 +74,8 @@ draw.heatmap("#{in_fname}", "#{out_fname}")
74
74
  def self.set_up_tmp_dirs fastas, tmpdir, which
75
75
  Object::FileUtils.mkdir_p tmpdir
76
76
 
77
+ seq_lengths = {}
78
+
77
79
  name_map = {}
78
80
  all_ids = Set.new
79
81
 
@@ -92,6 +94,8 @@ draw.heatmap("#{in_fname}", "#{out_fname}")
92
94
 
93
95
  outfname = File.join tmpdir, "#{new_id}.fa"
94
96
 
97
+ seq_lengths[new_id] = rec.seq.length
98
+
95
99
  File.open(outfname, "w") do |f|
96
100
  f.puts ">#{new_id}\n#{rec.seq}" # TODO HERE
97
101
  end
@@ -99,7 +103,7 @@ draw.heatmap("#{in_fname}", "#{out_fname}")
99
103
  end
100
104
  end
101
105
 
102
- [name_map, all_ids]
106
+ [name_map, all_ids, seq_lengths]
103
107
  end
104
108
 
105
109
  def self.strip_suffix fname
@@ -1,5 +1,5 @@
1
1
  module BigSimon
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
 
4
4
  COPYRIGHT = "2018 Ryan Moore"
5
5
  CONTACT = "moorer@udel.edu"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: big_simon
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore