miga-base 0.7.10.2 → 0.7.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: f3b350bb3e5dce2f0f8d0006d7f0693ff746c157006890a1250f30d71d89a523
4
- data.tar.gz: 1d94e39c61bfe191388d309eeb992b995350f6191dc74d5c7b15ae9741bcca09
2
+ SHA1:
3
+ metadata.gz: 964b803e09887476153335589fad3d5aee2861ea
4
+ data.tar.gz: ee0d34a4a7c8569d39bea4855a6054c57de78c81
5
5
  SHA512:
6
- metadata.gz: f2a47ca016873723f030c9435db7daa81c819a3f19c65ed7614c21d8d68d556af0dd7313b79d2678508a12762ecd36f7bd922f5cf45640b383c6aaadde91cdfe
7
- data.tar.gz: e8218114c0e89d80eab2b6cf7eba5a1de6f26b7d27e3560af1da12fdfc9037f3557a5994e1b982cff35635cea3d23e18ef234758ef8e12b86fb56f782a41ef89
6
+ metadata.gz: b84efbc94fbfc848b85ae7c3cca7e26f2baea74c5f066a63cf1ede7fb26ed6c634f4c2e2cbe1c2752e54624cf48d6597b933474d4b4d6083cb505a3b17063cf5
7
+ data.tar.gz: 0c7cfc6a149cab5fa1cfc5e386de789f84c132d227ad8ebbbc42dee2b076010bfe06b6697e69efe00a6639283c472f9ca3793a1e113506e691e50a182cfee2c6
@@ -0,0 +1,214 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action'
4
+
5
+ # Action: miga browse
6
+ class MiGA::Cli::Action::Browse < MiGA::Cli::Action
7
+ def parse_cli
8
+ cli.parse do |opt|
9
+ cli.defaults = { open: true }
10
+ cli.opt_object(opt, [:project])
11
+ end
12
+ end
13
+
14
+ def perform
15
+ p = cli.load_project
16
+ create_empty_page(p)
17
+ generate_project_page(p)
18
+ say 'Creating dataset pages'
19
+ cli.load_project.each_dataset do |d|
20
+ generate_dataset_page(p, d)
21
+ end
22
+ generate_datasets_index(p)
23
+ say "Open in your browser: #{File.join(p.path, 'index.html')}"
24
+ end
25
+
26
+ private
27
+
28
+ ##
29
+ # Create an empty page with necessary assets for project +p+
30
+ def create_empty_page(p)
31
+ say 'Creating project page'
32
+ FileUtils.mkdir_p(browse_file(p, '.'))
33
+ %w[favicon-32.png style.css].each do |i|
34
+ FileUtils.cp(template_file(i), browse_file(p, i))
35
+ end
36
+ write_file(p, 'about.html') do
37
+ build_from_template('about.html', citation: MiGA::MiGA.CITATION)
38
+ end
39
+ end
40
+
41
+ ##
42
+ # Create landing page for project +p+
43
+ def generate_project_page(p)
44
+ # Redirect page
45
+ write_file(p, '../index.html') { build_from_template('redirect.html') }
46
+
47
+ # Summaries
48
+ summaries = Dir["#{p.path}/*.tsv"].map do |i|
49
+ b = File.basename(i)
50
+ "<li><a href='../#{b}'>#{b}</a></li>"
51
+ end.join('')
52
+
53
+ # Project index page
54
+ data = {
55
+ project_active: 'active',
56
+ information: format_metadata(p),
57
+ summaries: summaries.empty? ? 'None' : "<ul>#{summaries}</ul>",
58
+ results: format_results(p)
59
+ }
60
+ write_file(p, 'index.html') { build_from_template('index.html', data) }
61
+ end
62
+
63
+ ##
64
+ # Create page for dataset +d+ within project +p+
65
+ def generate_dataset_page(p, d)
66
+ data = {
67
+ unmiga_name: d.name.unmiga_name,
68
+ information: format_metadata(d),
69
+ results: format_results(d)
70
+ }
71
+ write_file(p, "d_#{d.name}.html") do
72
+ build_from_template('dataset.html', data)
73
+ end
74
+ end
75
+
76
+ ##
77
+ # Create pages for reference and query dataset indexes
78
+ def generate_datasets_index(p)
79
+ say 'Creating index pages'
80
+ data = format_dataset_index(p)
81
+ data.each do |k, v|
82
+ write_file(p, "#{k}_datasets.html") do
83
+ v[:list] = 'None' if v[:list] == ''
84
+ build_from_template(
85
+ 'datasets.html',
86
+ v.merge(:"#{k}_datasets_active" => 'active')
87
+ )
88
+ end
89
+ end
90
+ end
91
+
92
+ def format_dataset_index(p)
93
+ data = {
94
+ ref: { type_name: 'Reference', list: '' },
95
+ qry: { type_name: 'Query', list: '' }
96
+ }
97
+ p.each_dataset do |d|
98
+ data[d.ref? ? :ref : :qry][:list] +=
99
+ "<li><a href='d_#{d.name}.html'>#{d.name.unmiga_name}</a></li>"
100
+ end
101
+ data
102
+ end
103
+
104
+ ##
105
+ # Format +obj+ metadata as a table
106
+ def format_metadata(obj)
107
+ '<table class="table table-sm table-responsive">' +
108
+ obj.metadata.data.map do |k, v|
109
+ case k
110
+ when /^run_/, :plugins, :user
111
+ next
112
+ when :web_assembly_gz
113
+ v = "<a href='#{v}'>#{v[0..50]}...</a>"
114
+ when :datasets
115
+ v = v.size
116
+ end
117
+ "<tr><td class='text-right pr-4'><b>#{format_name(k)}</b></td>" \
118
+ "<td>#{v}</td></tr>"
119
+ end.compact.join('') +
120
+ '</table>'
121
+ end
122
+
123
+ ##
124
+ # Format +obj+ results as cards
125
+ def format_results(obj)
126
+ o = ''
127
+ obj.each_result do |key, res|
128
+ links = format_result_links(res)
129
+ stats = format_result_stats(res)
130
+ next unless links || stats
131
+ name = format_name(key)
132
+ url_doc =
133
+ 'http://manual.microbial-genomes.org/part5/workflow#' +
134
+ key.to_s.tr('_', '-')
135
+ o += <<~CARD
136
+ <div class="col-md-6 mb-4">
137
+ <h3>#{name}</h3>
138
+ <div class='border-left p-3'>
139
+ #{stats}
140
+ #{links}
141
+ </div>
142
+ <div class='border-top p-2 bg-light'>
143
+ <a target=_blank href="#{url_doc}" class='p-2'>Learn more</a>
144
+ </div>
145
+ </div>
146
+ CARD
147
+ end
148
+ "<div class='row'>#{o}</div>"
149
+ end
150
+
151
+ def format_name(str)
152
+ str
153
+ .to_s.unmiga_name
154
+ .sub(/^./, &:upcase)
155
+ .gsub(/(Aai|Ani|Ogs|Cds|Ssu| db$| ssu )/, &:upcase)
156
+ .sub(/Haai/, 'hAAI')
157
+ .sub(/Mytaxa/, 'MyTaxa')
158
+ .sub(/ pvalue$/, ' p-value')
159
+ .sub(/contigs$/, 'Contigs')
160
+ end
161
+
162
+ def format_result_links(res)
163
+ links = []
164
+ res.each_file do |key, _|
165
+ name = format_name(key)
166
+ links << "<a href='../#{res.file_path(key, true)}'>#{name}</a><br/>"
167
+ end
168
+ links.empty? ? nil : links.join('')
169
+ end
170
+
171
+ def format_result_stats(res)
172
+ res.stats.map do |k, v|
173
+ v = [v, ''] unless v.is_a? Array
174
+ v[0] = ('%.3g' % v[0]) if v[0].is_a? Float
175
+ "<b>#{format_name(k)}:</b> #{v[0]}#{v[1]}<br/>"
176
+ end.join('') + '<br/>' unless res.stats.empty?
177
+ end
178
+
179
+ ##
180
+ # Write +file+ within the browse folder of project +p+ using the passed
181
+ # block output as content
182
+ def write_file(p, file)
183
+ File.open(browse_file(p, file), 'w') { |fh| fh.print yield }
184
+ end
185
+
186
+ ##
187
+ # Use a +template+ file to generate content with a hash of +data+ over the
188
+ # layout page if +layout+ is true
189
+ def build_from_template(template, data = {}, layout = true)
190
+ cont = File.read(template_file(template)).miga_variables(data)
191
+ return cont unless layout
192
+
193
+ build_from_template(
194
+ 'layout.html',
195
+ data.merge(content: cont, project_name: cli.load_project.name),
196
+ false
197
+ )
198
+ end
199
+
200
+ ##
201
+ # Path to the template browse file
202
+ def template_file(file)
203
+ File.join(
204
+ MiGA::MiGA.root_path,
205
+ 'lib', 'miga', 'cli', 'action', 'browse', file
206
+ )
207
+ end
208
+
209
+ ##
210
+ # Path to the browse file in the project
211
+ def browse_file(p, file)
212
+ File.join(p.path, 'browse', file)
213
+ end
214
+ end
@@ -0,0 +1,31 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">About MiGA</h1>
2
+ <p>
3
+ MiGA is developed and maintained by
4
+ <a href='https://rodriguez-r.com/'>Luis M. Rodriguez-R</a>.
5
+
6
+ The MiGA codebase is
7
+ <a href='http://code.microbial-genomes.org/miga'>freely available</a> under the
8
+ terms of the terms of the
9
+ <a href='http://code.microbial-genomes.org/miga/blob/master/LICENSE'>Artistic License 2.0</a>.
10
+ </p>
11
+
12
+ <p>
13
+ MiGA is the result of a collaboration between the
14
+ <a href='http://enve-omics.gatech.edu/'>Kostas Lab</a>
15
+ (<a href='http://www.gatech.edu/'>Georgia Institute of Technology</a>) and the
16
+ <a href='http://rdp.cme.msu.edu/'>RDP team</a>
17
+ (<a href='http://cme.msu.edu/'>Center for Microbial Ecology</a>,
18
+ <a href='https://msu.edu/'>Michigan State University</a>).
19
+ The MiGA project is funded by the
20
+ <a href='http://nsf.gov/'>US National Science Foundation</a>
21
+ (Awards <a href='http://nsf.gov/awardsearch/showAward?AWD_ID=1356288'>#1356288</a> &amp;
22
+ <a href='https://xras.xsede.org/public/requests/31162-XSEDE-MCB190042-1190572'>#MCB190042</a>).
23
+ </p>
24
+
25
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Citation</h1>
26
+ If you use MiGA in your work, consider citing:
27
+ <blockquote class='border-left p-3'>
28
+ {{citation}}
29
+ </blockquote>
30
+
31
+
@@ -0,0 +1,5 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">{{unmiga_name}}</h1>
2
+ {{information}}
3
+
4
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Results</h1>
5
+ {{results}}
@@ -0,0 +1,3 @@
1
+ <li class="nav-item">
2
+ <a class="nav-link" href="ds_{{name}}.html">{{unmiga_name}}</a>
3
+ </li>
@@ -0,0 +1,4 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">{{type_name}} Datasets</h1>
2
+ <ul>
3
+ {{list}}
4
+ </ul>
@@ -0,0 +1,8 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Project Information</h1>
2
+ {{information}}
3
+
4
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Summaries</h1>
5
+ {{summaries}}
6
+
7
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Project Results</h1>
8
+ {{results}}
@@ -0,0 +1,57 @@
1
+ <!doctype html>
2
+ <head>
3
+ <meta charset="utf-8">
4
+ <title>MiGA | {{project_name}}</title>
5
+
6
+ <!-- Remote assets -->
7
+ <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/css/bootstrap.min.css" integrity="sha384-9aIt2nRpC12Uk9gS9baDl411NQApFmC26EwAOH8WgZl5MYYxFfc+NcPb1dKGj7Sk" crossorigin="anonymous">
8
+ <script src="https://code.jquery.com/jquery-3.5.1.slim.min.js" integrity="sha384-DfXdz2htPH0lsSSs5nCTpuj/zy4C+OGpamoFVy38MVBnE+IbbVYUew+OrCXaRkfj" crossorigin="anonymous"></script>
9
+ <script src="https://cdn.jsdelivr.net/npm/popper.js@1.16.0/dist/umd/popper.min.js" integrity="sha384-Q6E9RHvbIyZFJoft+2mJbHaEWldlvI9IOYy5n3zV9zzTtmI3UksdQRVvoxMfooAo" crossorigin="anonymous"></script>
10
+ <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/js/bootstrap.min.js" integrity="sha384-OgVRvuATP1z7JjHLkuOU7Xw704+h835Lr+6QL9UvYjZE3Ipu6Tp75j7Bh/kR0JKI" crossorigin="anonymous"></script>
11
+
12
+ <!-- Local assets -->
13
+ <link href="style.css" rel="stylesheet">
14
+ <link rel="icon" href="favicon-32.png" sizes="32x32" type="image/png">
15
+ </head>
16
+ <body>
17
+ <nav class="navbar navbar-dark sticky-top bg-dark flex-md-nowrap p-0 shadow">
18
+ <a class="navbar-brand col-md-12 col-lg-12 mr-0 px-3"
19
+ href="index.html">MiGA | {{project_name}}</a>
20
+ <button class="navbar-toggler position-absolute d-md-none collapsed"
21
+ type="button" data-toggle="collapse" data-target="#sidebarMenu"
22
+ aria-controls="sidebarMenu" aria-expanded="false"
23
+ aria-label="Toggle navigation">
24
+ <span class="navbar-toggler-icon"></span>
25
+ </button>
26
+ </nav>
27
+ <div class="container-fluid">
28
+ <div class="row">
29
+ <nav id="sidebarMenu" class="col-md-3 col-lg-2 d-md-block bg-light sidebar collapse">
30
+ <div class="sidebar-sticky pt-3">
31
+ <ul class="nav flex-column">
32
+ <li class="nav-item">
33
+ <a class="nav-link {{project_active}}" href="index.html">Project</a>
34
+ </li>
35
+ <li class="nav-item">
36
+ <a class="nav-link {{ref_datasets_active}}"
37
+ href="ref_datasets.html">Reference datasets</a>
38
+ </li>
39
+ <li class="nav-item">
40
+ <a class="nav-link {{qry_datasets_active}}"
41
+ href="qry_datasets.html">Query datasets</a>
42
+ </li>
43
+ <li class="nav-item border-top mt-4">
44
+ <a class="nav-link {{about_miga_active}}"
45
+ href="about.html">About MiGA</a>
46
+ </li>
47
+ </ul>
48
+ </div>
49
+ </nav>
50
+
51
+ <main role="main" class="col-md-9 ml-sm-auto col-lg-10 px-md-4">
52
+ {{content}}
53
+ </main>
54
+ </div>
55
+ </div>
56
+ </body>
57
+
@@ -0,0 +1,11 @@
1
+ <!doctype html>
2
+ <head>
3
+ <title>MiGA Project</title>
4
+ <meta http-equiv = "refresh" content = "1; url = browse/index.html" />
5
+ </head>
6
+ <body>
7
+ <div style='font-size:200%; margin-top: 5em; text-align: center;'>
8
+ Redirecting to <a href='browse/index.html'>Project page</a>...
9
+ </div>
10
+ </body>
11
+
@@ -0,0 +1,97 @@
1
+ body {
2
+ font-size: .875rem;
3
+ }
4
+
5
+ /*
6
+ * Sidebar
7
+ */
8
+
9
+ .sidebar {
10
+ position: fixed;
11
+ top: 0;
12
+ bottom: 0;
13
+ left: 0;
14
+ z-index: 100; /* Behind the navbar */
15
+ padding: 48px 0 0; /* Height of navbar */
16
+ box-shadow: inset -1px 0 0 rgba(0, 0, 0, .1);
17
+ }
18
+
19
+ @media (max-width: 767.98px) {
20
+ .sidebar {
21
+ top: 3rem;
22
+ }
23
+ }
24
+
25
+ .sidebar-sticky {
26
+ position: relative;
27
+ top: 0;
28
+ height: calc(100vh - 48px);
29
+ padding-top: .5rem;
30
+ overflow-x: hidden;
31
+ overflow-y: auto; /* Scrollable contents if viewport is shorter than content. */
32
+ }
33
+
34
+ @supports ((position: -webkit-sticky) or (position: sticky)) {
35
+ .sidebar-sticky {
36
+ position: -webkit-sticky;
37
+ position: sticky;
38
+ }
39
+ }
40
+
41
+ .sidebar .nav-link {
42
+ font-weight: 500;
43
+ color: #333;
44
+ }
45
+
46
+ .sidebar .nav-link .feather {
47
+ margin-right: 4px;
48
+ color: #999;
49
+ }
50
+
51
+ .sidebar .nav-link.active {
52
+ color: #007bff;
53
+ }
54
+
55
+ .sidebar .nav-link:hover .feather,
56
+ .sidebar .nav-link.active .feather {
57
+ color: inherit;
58
+ }
59
+
60
+ .sidebar-heading {
61
+ font-size: .75rem;
62
+ text-transform: uppercase;
63
+ }
64
+
65
+ /*
66
+ * Navbar
67
+ */
68
+
69
+ .navbar-brand {
70
+ padding-top: .75rem;
71
+ padding-bottom: .75rem;
72
+ font-size: 1rem;
73
+ background-color: rgba(0, 0, 0, .25);
74
+ box-shadow: inset -1px 0 0 rgba(0, 0, 0, .25);
75
+ }
76
+
77
+ .navbar .navbar-toggler {
78
+ top: .25rem;
79
+ right: 1rem;
80
+ }
81
+
82
+ .navbar .form-control {
83
+ padding: .75rem 1rem;
84
+ border-width: 0;
85
+ border-radius: 0;
86
+ }
87
+
88
+ .form-control-dark {
89
+ color: #fff;
90
+ background-color: rgba(255, 255, 255, .1);
91
+ border-color: rgba(255, 255, 255, .1);
92
+ }
93
+
94
+ .form-control-dark:focus {
95
+ border-color: transparent;
96
+ box-shadow: 0 0 0 3px rgba(255, 255, 255, .25);
97
+ }
@@ -27,10 +27,13 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
27
27
  '--threshold FLOAT', Float,
28
28
  "Metric threshold (%) to dereplicate. By default: #{cli[:threshold]}"
29
29
  ) { |v| cli[:threshold] = v }
30
+ opt.on(
31
+ '--quality',
32
+ 'Use genome with highest quality as clade representatives (default)'
33
+ ) { |v| cli[:criterion] = :quality }
30
34
  opt.on(
31
35
  '--medoids',
32
- 'Use medoids as clade representatives',
33
- 'By default: Use genome with the highest quality'
36
+ 'Use medoids as clade representatives'
34
37
  ) { |v| cli[:criterion] = :medoids }
35
38
  opt.on(
36
39
  '--no-collection',
@@ -47,12 +50,18 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
47
50
 
48
51
  def perform
49
52
  # Input data
50
- p = create_project(:assembly,
51
- { run_project_stats: false, run_clades: false,
52
- gsp_metric: cli[:metric], :"gsp_#{cli[:metric]}" => cli[:threshold] },
53
- { run_mytaxa_scan: false, run_ssu: false })
53
+ p = create_project(
54
+ :assembly,
55
+ {
56
+ run_project_stats: false,
57
+ run_clades: false,
58
+ gsp_metric: cli[:metric],
59
+ :"gsp_#{cli[:metric]}" => cli[:threshold]
60
+ },
61
+ { run_mytaxa_scan: false, run_ssu: false }
62
+ )
54
63
  unless cli[:threshold] >= 0.0 && cli[:threshold] <= 100.0
55
- raise "The threshold of identity must be in the range [0,100]"
64
+ raise 'The threshold of identity must be in the range [0,100]'
56
65
  end
57
66
 
58
67
  # Run
@@ -65,8 +74,8 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
65
74
  private
66
75
 
67
76
  def dereplicate(p)
68
- cli.say "Extracting genomospecies clades"
69
- r = p.result(:clade_finding) or raise "Result unavailable: run failed"
77
+ cli.say 'Extracting genomospecies clades'
78
+ r = p.result(:clade_finding) or raise 'Result unavailable: run failed'
70
79
  c_f = r.file_path(:clades_gsp) or raise 'Result incomplete: run failed'
71
80
  clades = File.readlines(c_f).map { |i| i.chomp.split("\t") }
72
81
  rep = representatives(p)
@@ -87,7 +96,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
87
96
  end
88
97
 
89
98
  def representatives(p)
90
- cli.say "Identifying representatives"
99
+ cli.say 'Identifying representatives'
91
100
  f = File.expand_path('representatives.txt', cli[:outdir])
92
101
  if cli[:criterion] == :medoids
93
102
  FileUtils.cp(p.result(:clade_finding).file_path(:medoids_gsp), f)
@@ -38,7 +38,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
38
38
  end
39
39
  if cli[:key].nil?
40
40
  r[:stats].each do |k, v|
41
- k_n = k == :g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize
41
+ k_n = k.to_s.unmiga_name.sub(/^./, &:upcase)
42
42
  cli.puts "#{k_n}: #{v.is_a?(Array) ? v.join(' ') : v}"
43
43
  end
44
44
  else
@@ -169,6 +169,7 @@ module MiGA::Cli::Action::Wf
169
169
  '--tab', '--ref', '--active'
170
170
  ])
171
171
  end
172
+ call_cli(['browse', '-P', cli[:outdir]])
172
173
  end
173
174
 
174
175
  def cleanup
@@ -11,39 +11,40 @@ module MiGA::Cli::Base
11
11
  preproc_wf: 'Preprocess input genomes or metagenomes',
12
12
  index_wf: 'Generate distance indexing of input genomes',
13
13
  # Projects
14
- new: 'Creates an empty MiGA project',
15
- about: 'Displays information about a MiGA project',
16
- doctor: 'Performs consistency checks on a MiGA project',
17
- get_db: 'Downloads a pre-indexed database',
14
+ new: 'Create an empty MiGA project',
15
+ about: 'Display information about a MiGA project',
16
+ doctor: 'Perform consistency checks on a MiGA project',
17
+ get_db: 'Download a pre-indexed database',
18
+ browse: 'Explore a project locally using a web browser',
18
19
  # Datasets
19
- add: 'Creates a dataset in a MiGA project',
20
- get: 'Downloads a dataset from public databases into a MiGA project',
21
- ncbi_get: 'Downloads all genomes in a taxon from NCBI into a MiGA project',
22
- rm: 'Removes a dataset from an MiGA project',
23
- find: 'Finds unregistered datasets based on result files',
20
+ add: 'Create a dataset in a MiGA project',
21
+ get: 'Download a dataset from public databases into a MiGA project',
22
+ ncbi_get: 'Download all genomes in a taxon from NCBI into a MiGA project',
23
+ rm: 'Remove a dataset from an MiGA project',
24
+ find: 'Find unregistered datasets based on result files',
24
25
  ln: 'Link datasets (including results) from one project to another',
25
- ls: 'Lists all registered datasets in an MiGA project',
26
- archive: 'Generates a tar-ball with all files from select datasets',
26
+ ls: 'List all registered datasets in an MiGA project',
27
+ archive: 'Generate a tar-ball with all files from select datasets',
27
28
  # Results
28
- add_result: 'Registers a result',
29
- stats: 'Extracts statistics for the given result',
30
- files: 'Lists registered files from the results of a dataset or project',
31
- run: 'Executes locally one step analysis producing the given result',
32
- summary: 'Generates a summary table for the statistics of all datasets',
33
- next_step: 'Returns the next task to run in a dataset or project',
29
+ add_result: 'Register a result',
30
+ stats: 'Extract statistics for the given result',
31
+ files: 'List registered files from the results of a dataset or project',
32
+ run: 'Execute locally one step analysis producing the given result',
33
+ summary: 'Generate a summary table for the statistics of all datasets',
34
+ next_step: 'Return the next task to run in a dataset or project',
34
35
  # Objects (Datasets or Projects)
35
- edit: 'Edits the metadata of a dataset or project',
36
+ edit: 'Edit the metadata of a dataset or project',
36
37
  # System
37
38
  init: 'Initialize MiGA to process new projects',
38
- daemon: 'Controls the daemon of a MiGA project',
39
- lair: 'Controls groups of daemons for several MiGA projects',
40
- date: 'Returns the current date in standard MiGA format',
41
- console: 'Opens an IRB console with MiGA',
39
+ daemon: 'Control the daemon of a MiGA project',
40
+ lair: 'Control groups of daemons for several MiGA projects',
41
+ date: 'Return the current date in standard MiGA format',
42
+ console: 'Open an IRB console with MiGA',
42
43
  # Taxonomy
43
- tax_set: 'Registers taxonomic information for datasets',
44
- tax_test: 'Returns test of taxonomic distributions for query datasets',
45
- tax_index: 'Creates a taxonomy-indexed list of the datasets',
46
- tax_dist: 'Estimates distributions of distance by taxonomy',
44
+ tax_set: 'Register taxonomic information for datasets',
45
+ tax_test: 'Return test of taxonomic distributions for query datasets',
46
+ tax_index: 'Create a taxonomy-indexed list of the datasets',
47
+ tax_dist: 'Estimate distributions of distance by taxonomy',
47
48
  }
48
49
 
49
50
  @@TASK_ALIAS = {
@@ -68,15 +68,20 @@ module MiGA::Common::Format
68
68
  # a FastA or FastQ file (supports gzipped files). The +format+ must be a
69
69
  # Symbol, one of +:fasta+ or +:fastq+. Additional estimations can be
70
70
  # controlled via the +opts+ Hash. Supported options include:
71
- # - +:n50+: If true, it also returns the N50 and the median (in bp)
72
- # - +:gc+: If true, it also returns the G+C content (in %)
73
- # - +:x+: If true, it also returns the undetermined bases content (in %)
71
+ # - +:n50+: Include the N50 and the median (in bp)
72
+ # - +:gc+: Include the G+C content (in %)
73
+ # - +:x+: Include the undetermined bases content (in %)
74
+ # - +:skew+: Include G-C and A-T sequence skew (in %; forces gc: true).
75
+ # See definition used here in DOI:10.1177/117693430700300006
74
76
  def seqs_length(file, format, opts = {})
77
+ opts[:gc] = true if opts[:skew]
75
78
  fh = file =~ /\.gz/ ? Zlib::GzipReader.open(file) : File.open(file, 'r')
76
79
  l = []
77
80
  gc = 0
78
81
  xn = 0
79
- i = 0 # <- Zlib::GzipReader doesn't set `$.`
82
+ t = 0
83
+ c = 0
84
+ i = 0 # <- Zlib::GzipReader doesn't set `$.`
80
85
  fh.each_line do |ln|
81
86
  i += 1
82
87
  if (format == :fasta and ln =~ /^>/) or
@@ -86,6 +91,10 @@ module MiGA::Common::Format
86
91
  l[l.size - 1] += ln.chomp.size
87
92
  gc += ln.scan(/[GCgc]/).count if opts[:gc]
88
93
  xn += ln.scan(/[XNxn]/).count if opts[:x]
94
+ if opts[:skew]
95
+ t += ln.scan(/[Tt]/).count
96
+ c += ln.scan(/[Cc]/).count
97
+ end
89
98
  end
90
99
  end
91
100
  fh.close
@@ -97,6 +106,12 @@ module MiGA::Common::Format
97
106
  o[:sd] = Math.sqrt o[:var]
98
107
  o[:gc] = 100.0 * gc / o[:tot] if opts[:gc]
99
108
  o[:x] = 100.0 * xn / o[:tot] if opts[:x]
109
+ if opts[:skew]
110
+ at = o[:tot] - gc
111
+ o[:at_skew] = 100.0 * (2 * t - at) / at
112
+ o[:gc_skew] = 100.0 * (2 * c - gc) / gc
113
+ end
114
+
100
115
  if opts[:n50]
101
116
  l.sort!
102
117
  thr = o[:tot] / 2
@@ -132,9 +147,14 @@ class String
132
147
  end
133
148
 
134
149
  ##
135
- # Replace underscores by spaces or dots (depending on context).
150
+ # Replace underscores by spaces or other symbols depending on context
136
151
  def unmiga_name
137
- gsub(/_(str|sp|subsp|pv)__/, '_\\1._').tr('_', ' ')
152
+ gsub(/_(str|sp|subsp|pv)__/, '_\\1._')
153
+ .gsub(/g_c_(content)/, 'G+C \\1')
154
+ .gsub(/g_c_(skew)/, 'G-C \\1')
155
+ .gsub(/a_t_(skew)/, 'A-T \\1')
156
+ .gsub(/x_content/, &:capitalize)
157
+ .tr('_', ' ')
138
158
  end
139
159
 
140
160
  ##
@@ -91,7 +91,8 @@ class MiGA::Daemon < MiGA::MiGA
91
91
  flush!
92
92
  if (loop_i % 12).zero?
93
93
  purge!
94
- recalculate_status!
94
+ # TEMPORARILY DISABLED:
95
+ # recalculate_status!
95
96
  end
96
97
  save_status
97
98
  sleep(latency)
@@ -66,7 +66,7 @@ module MiGA::Dataset::Base
66
66
  @@PREPROCESSING_TASKS = [
67
67
  :raw_reads, :trimmed_reads, :read_quality, :trimmed_fasta,
68
68
  :assembly, :cds, :essential_genes, :ssu, :mytaxa, :mytaxa_scan,
69
- :distances, :taxonomy, :stats
69
+ :taxonomy, :distances, :stats
70
70
  ]
71
71
 
72
72
  ##
@@ -77,7 +77,7 @@ module MiGA::Dataset::Base
77
77
  ##
78
78
  # Tasks to be executed only in datasets that are not multi-organism. These
79
79
  # tasks are ignored for multi-organism datasets or for unknown types.
80
- @@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances, :taxonomy]
80
+ @@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :taxonomy, :distances]
81
81
  @@_ONLY_NONMULTI_TASKS_H = Hash[@@ONLY_NONMULTI_TASKS.map { |i| [i, true] }]
82
82
 
83
83
  ##
@@ -81,20 +81,22 @@ class MiGA::Result < MiGA::MiGA
81
81
  end
82
82
 
83
83
  ##
84
- # Directory containing the result
85
- def dir
86
- File.dirname(path)
84
+ # Directory containing the result; by default an absolute path, if
85
+ # +relative+ is true returns the path relative to the parent project
86
+ def dir(relative = false)
87
+ relative ? relative_dir : File.dirname(path)
87
88
  end
88
89
 
89
90
  ##
90
- # Absolute path to the file(s) defined by symbol +k+
91
- def file_path(k)
91
+ # Absolute path to the file(s) defined by symbol +k+, or relative
92
+ # path if +relative+ is true
93
+ def file_path(k, relative = false)
92
94
  k = k.to_sym
93
95
  f = self[:files].nil? ? nil : self[:files][k]
94
96
  return nil if f.nil?
95
- return File.expand_path(f, dir) unless f.is_a? Array
97
+ return File.join(dir(relative), f) unless f.is_a? Array
96
98
 
97
- f.map { |fi| File.expand_path(fi, dir) }
99
+ f.map { |fi| File.join(dir(relative), fi) }
98
100
  end
99
101
 
100
102
  ##
@@ -17,32 +17,45 @@ module MiGA::Result::Stats
17
17
  self[:stats]
18
18
  end
19
19
 
20
+ ##
21
+ # Access the stats entry of results
22
+ def stats
23
+ self[:stats]
24
+ end
25
+
20
26
  private
21
27
 
22
28
  def compute_stats_raw_reads
23
29
  stats = {}
30
+ seq_opts = { gc: true, x: true, skew: true }
24
31
  if self[:files][:pair1].nil?
25
- s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, gc: true, x: true)
32
+ s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, seq_opts)
26
33
  stats = {
27
34
  reads: s[:n],
28
35
  length_average: [s[:avg], 'bp'],
29
36
  length_standard_deviation: [s[:sd], 'bp'],
30
37
  g_c_content: [s[:gc], '%'],
31
- x_content: [s[:x], '%']
38
+ x_content: [s[:x], '%'],
39
+ g_c_skew: [s[:gc_skew], '%'],
40
+ a_t_skew: [s[:at_skew], '%']
32
41
  }
33
42
  else
34
- s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, gc: true, x: true)
35
- s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, gc: true, x: true)
43
+ s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, seq_opts)
44
+ s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, seq_opts)
36
45
  stats = {
37
46
  read_pairs: s1[:n],
38
47
  forward_length_average: [s1[:avg], 'bp'],
39
48
  forward_length_standard_deviation: [s1[:sd], 'bp'],
40
49
  forward_g_c_content: [s1[:gc], '%'],
41
50
  forward_x_content: [s1[:x], '%'],
51
+ forward_g_c_skew: [s1[:gc_skew], '%'],
52
+ forward_a_t_skew: [s1[:at_skew], '%'],
42
53
  reverse_length_average: [s2[:avg], 'bp'],
43
54
  reverse_length_standard_deviation: [s2[:sd], 'bp'],
44
55
  reverse_g_c_content: [s2[:gc], '%'],
45
- reverse_x_content: [s2[:x], '%']
56
+ reverse_x_content: [s2[:x], '%'],
57
+ reverse_g_c_skew: [s2[:gc_skew], '%'],
58
+ reverse_a_t_skew: [s2[:at_skew], '%']
46
59
  }
47
60
  end
48
61
  stats
@@ -50,19 +63,22 @@ module MiGA::Result::Stats
50
63
 
51
64
  def compute_stats_trimmed_fasta
52
65
  f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
53
- s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true)
66
+ s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true, skew: true)
54
67
  {
55
68
  reads: s[:n],
56
69
  length_average: [s[:avg], 'bp'],
57
70
  length_standard_deviation: [s[:sd], 'bp'],
58
71
  g_c_content: [s[:gc], '%'],
59
- x_content: [s[:x], '%']
72
+ x_content: [s[:x], '%'],
73
+ g_c_skew: [s[:gc_skew], '%'],
74
+ a_t_skew: [s[:at_skew], '%']
60
75
  }
61
76
  end
62
77
 
63
78
  def compute_stats_assembly
64
79
  s = MiGA::MiGA.seqs_length(
65
- file_path(:largecontigs), :fasta, n50: true, gc: true, x: true
80
+ file_path(:largecontigs), :fasta,
81
+ n50: true, gc: true, x: true, skew: true
66
82
  )
67
83
  {
68
84
  contigs: s[:n],
@@ -70,7 +86,9 @@ module MiGA::Result::Stats
70
86
  total_length: [s[:tot], 'bp'],
71
87
  longest_sequence: [s[:max], 'bp'],
72
88
  g_c_content: [s[:gc], '%'],
73
- x_content: [s[:x], '%']
89
+ x_content: [s[:x], '%'],
90
+ g_c_skew: [s[:gc_skew], '%'],
91
+ a_t_skew: [s[:at_skew], '%']
74
92
  }
75
93
  end
76
94
 
@@ -133,7 +151,7 @@ module MiGA::Result::Stats
133
151
  source.save
134
152
 
135
153
  # Inactivate low-quality datasets
136
- min_qual = (project.metadata[:min_qual] || 50)
154
+ min_qual = (project.metadata[:min_qual] || 25)
137
155
  if min_qual != 'no' && stats[:quality] < min_qual
138
156
  source.inactivate! 'Low quality genome'
139
157
  end
@@ -8,7 +8,7 @@ module MiGA
8
8
  # - Float representing the major.minor version.
9
9
  # - Integer representing gem releases of the current version.
10
10
  # - Integer representing minor changes that require new version number.
11
- VERSION = [0.7, 10, 2]
11
+ VERSION = [0.7, 13, 0]
12
12
 
13
13
  ##
14
14
  # Nickname for the current major.minor version.
@@ -16,7 +16,7 @@ module MiGA
16
16
 
17
17
  ##
18
18
  # Date of the current gem release.
19
- VERSION_DATE = Date.new(2020, 6, 30)
19
+ VERSION_DATE = Date.new(2020, 7, 31)
20
20
 
21
21
  ##
22
22
  # Reference of MiGA.
@@ -13,17 +13,20 @@ echo -n "" > miga-project.log
13
13
  DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
14
14
 
15
15
  # Extract values
16
- echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
17
- for i in $DS ; do
18
- echo "SELECT CASE WHEN omega!=0 THEN 'AAI' ELSE 'hAAI_AAI' END," \
19
- " seq1, seq2, aai, sd, n, omega from aai;" \
20
- | sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
21
- echo "$i" >> miga-project.log
22
- done
16
+ rm -f miga-project.txt
17
+ (
18
+ echo "metric a b value sd n omega" | tr " " "\\t"
19
+ for i in $DS ; do
20
+ echo "SELECT CASE WHEN omega!=0 THEN 'AAI' ELSE 'hAAI_AAI' END," \
21
+ " seq1, seq2, aai, sd, n, omega from aai;" \
22
+ | sqlite3 "$i.db" | tr "\\|" "\\t"
23
+ echo "$i" >> miga-project.log
24
+ done
25
+ ) | gzip -9c > miga-project.txt.gz
23
26
 
24
27
  # R-ify
25
28
  echo "
26
- aai <- read.table('miga-project.txt', sep='\\t', h=T, as.is=TRUE);
29
+ aai <- read.table(gzfile('miga-project.txt.gz'), sep='\\t', h=T, as.is=TRUE);
27
30
  save(aai, file='miga-project.Rdata');
28
31
  if(sum(aai[,'a'] != aai[,'b']) > 0){
29
32
  h <- hist(aai[aai[,'a'] != aai[,'b'], 'value'], breaks=100, plot=FALSE);
@@ -35,9 +38,6 @@ if(sum(aai[,'a'] != aai[,'b']) > 0){
35
38
  }
36
39
  " | R --vanilla
37
40
 
38
- # Gzip
39
- gzip -9 -f miga-project.txt
40
-
41
41
  # Finalize
42
42
  miga date > "miga-project.done"
43
43
  miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -13,16 +13,19 @@ echo -n "" > miga-project.log
13
13
  DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
14
14
 
15
15
  # Extract values
16
- echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
17
- for i in $DS ; do
18
- echo "SELECT 'ANI', seq1, seq2, ani, sd, n, omega from ani ;" \
19
- | sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
20
- echo "$i" >> miga-project.log
21
- done
16
+ rm -f miga-project.txt
17
+ (
18
+ echo "metric a b value sd n omega" | tr " " "\\t"
19
+ for i in $DS ; do
20
+ echo "SELECT 'ANI', seq1, seq2, ani, sd, n, omega from ani ;" \
21
+ | sqlite3 "$i.db" | tr "\\|" "\\t"
22
+ echo "$i" >> miga-project.log
23
+ done
24
+ ) | gzip -9c > miga-project.txt.gz
22
25
 
23
26
  # R-ify
24
27
  echo "
25
- ani <- read.table('miga-project.txt', sep='\\t', h=T, as.is=TRUE);
28
+ ani <- read.table(gzfile('miga-project.txt.gz'), sep='\\t', h=T, as.is=TRUE);
26
29
  save(ani, file='miga-project.Rdata');
27
30
  if(sum(ani[,'a'] != ani[,'b']) > 0){
28
31
  h <- hist(ani[ani[,'a'] != ani[,'b'], 'value'], breaks=100, plot=FALSE);
@@ -34,9 +37,6 @@ if(sum(ani[,'a'] != ani[,'b']) > 0){
34
37
  }
35
38
  " | R --vanilla
36
39
 
37
- # Gzip
38
- gzip -9 -f miga-project.txt
39
-
40
40
  # Finalize
41
41
  miga date > "miga-project.done"
42
42
  miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -20,7 +20,6 @@ fi
20
20
  TYPE=$(miga ls -P "$PROJECT" -D "$DATASET" -m type | cut -f 2)
21
21
  case "$TYPE" in
22
22
  metagenome|virome)
23
- $CMD -p meta
24
23
  prodigal -a "${DATASET}.faa" -d "${DATASET}.fna" -o "${DATASET}.gff3" \
25
24
  -f gff -q -i "../05.assembly/${DATASET}.LargeContigs.fna" -p meta
26
25
  ;;
@@ -17,30 +17,30 @@ echo -n "" > miga-project.log
17
17
  DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
18
18
 
19
19
  # Extract values
20
- echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
21
- for i in $DS ; do
22
- echo "SELECT 'hAAI', seq1, seq2, aai, sd, n, omega from aai ;" \
23
- | sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
24
- echo "$i" >> miga-project.log
25
- done
20
+ rm -f miga-project.txt
21
+ (
22
+ echo "metric a b value sd n omega" | tr " " "\\t"
23
+ for i in $DS ; do
24
+ echo "SELECT 'hAAI', seq1, seq2, aai, sd, n, omega from aai ;" \
25
+ | sqlite3 "$i.db" | tr "\\|" "\\t"
26
+ echo "$i" >> miga-project.log
27
+ done
28
+ ) | gzip -9c > miga-project.txt.gz
26
29
 
27
30
  # R-ify
28
31
  echo "
29
- haai <- read.table('miga-project.txt', sep='\\t', h=T, as.is=TRUE);
32
+ haai <- read.table(gzfile('miga-project.txt.gz'), sep='\\t', h=T, as.is=TRUE);
30
33
  save(haai, file='miga-project.Rdata');
31
34
  if(sum(haai[,'a'] != haai[,'b']) > 0){
32
35
  h <- hist(haai[haai[,'a'] != haai[,'b'], 'value'], breaks=100, plot=FALSE);
33
36
  write.table(
34
37
  cbind(h[['breaks']][-length(h[['breaks']])],
35
- h[['breaks']][-1],h[['counts']]),
38
+ h[['breaks']][-1], h[['counts']]),
36
39
  file='miga-project.hist', quote=FALSE, sep='\\t',
37
40
  col.names=FALSE, row.names=FALSE);
38
41
  }
39
42
  " | R --vanilla
40
43
 
41
- # Gzip
42
- gzip -9 -f miga-project.txt
43
-
44
44
  # Finalize
45
45
  miga date > "miga-project.done"
46
46
  miga add_result -P "$PROJECT" -r "$SCRIPT" -f
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.10.2
4
+ version: 0.7.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-30 00:00:00.000000000 Z
11
+ date: 2020-07-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -118,6 +118,16 @@ files:
118
118
  - lib/miga/cli/action/add.rb
119
119
  - lib/miga/cli/action/add_result.rb
120
120
  - lib/miga/cli/action/archive.rb
121
+ - lib/miga/cli/action/browse.rb
122
+ - lib/miga/cli/action/browse/about.html
123
+ - lib/miga/cli/action/browse/dataset.html
124
+ - lib/miga/cli/action/browse/dataset_menu_item.html
125
+ - lib/miga/cli/action/browse/datasets.html
126
+ - lib/miga/cli/action/browse/favicon-32.png
127
+ - lib/miga/cli/action/browse/index.html
128
+ - lib/miga/cli/action/browse/layout.html
129
+ - lib/miga/cli/action/browse/redirect.html
130
+ - lib/miga/cli/action/browse/style.css
121
131
  - lib/miga/cli/action/classify_wf.rb
122
132
  - lib/miga/cli/action/console.rb
123
133
  - lib/miga/cli/action/daemon.rb
@@ -550,7 +560,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
550
560
  - !ruby/object:Gem::Version
551
561
  version: '0'
552
562
  requirements: []
553
- rubygems_version: 3.1.2
563
+ rubyforge_project:
564
+ rubygems_version: 2.5.2.3
554
565
  signing_key:
555
566
  specification_version: 4
556
567
  summary: MiGA