miga-base 0.7.10.2 → 0.7.13.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: f3b350bb3e5dce2f0f8d0006d7f0693ff746c157006890a1250f30d71d89a523
4
- data.tar.gz: 1d94e39c61bfe191388d309eeb992b995350f6191dc74d5c7b15ae9741bcca09
2
+ SHA1:
3
+ metadata.gz: 964b803e09887476153335589fad3d5aee2861ea
4
+ data.tar.gz: ee0d34a4a7c8569d39bea4855a6054c57de78c81
5
5
  SHA512:
6
- metadata.gz: f2a47ca016873723f030c9435db7daa81c819a3f19c65ed7614c21d8d68d556af0dd7313b79d2678508a12762ecd36f7bd922f5cf45640b383c6aaadde91cdfe
7
- data.tar.gz: e8218114c0e89d80eab2b6cf7eba5a1de6f26b7d27e3560af1da12fdfc9037f3557a5994e1b982cff35635cea3d23e18ef234758ef8e12b86fb56f782a41ef89
6
+ metadata.gz: b84efbc94fbfc848b85ae7c3cca7e26f2baea74c5f066a63cf1ede7fb26ed6c634f4c2e2cbe1c2752e54624cf48d6597b933474d4b4d6083cb505a3b17063cf5
7
+ data.tar.gz: 0c7cfc6a149cab5fa1cfc5e386de789f84c132d227ad8ebbbc42dee2b076010bfe06b6697e69efe00a6639283c472f9ca3793a1e113506e691e50a182cfee2c6
@@ -0,0 +1,214 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action'
4
+
5
+ # Action: miga browse
6
+ class MiGA::Cli::Action::Browse < MiGA::Cli::Action
7
+ def parse_cli
8
+ cli.parse do |opt|
9
+ cli.defaults = { open: true }
10
+ cli.opt_object(opt, [:project])
11
+ end
12
+ end
13
+
14
+ def perform
15
+ p = cli.load_project
16
+ create_empty_page(p)
17
+ generate_project_page(p)
18
+ say 'Creating dataset pages'
19
+ cli.load_project.each_dataset do |d|
20
+ generate_dataset_page(p, d)
21
+ end
22
+ generate_datasets_index(p)
23
+ say "Open in your browser: #{File.join(p.path, 'index.html')}"
24
+ end
25
+
26
+ private
27
+
28
+ ##
29
+ # Create an empty page with necessary assets for project +p+
30
+ def create_empty_page(p)
31
+ say 'Creating project page'
32
+ FileUtils.mkdir_p(browse_file(p, '.'))
33
+ %w[favicon-32.png style.css].each do |i|
34
+ FileUtils.cp(template_file(i), browse_file(p, i))
35
+ end
36
+ write_file(p, 'about.html') do
37
+ build_from_template('about.html', citation: MiGA::MiGA.CITATION)
38
+ end
39
+ end
40
+
41
+ ##
42
+ # Create landing page for project +p+
43
+ def generate_project_page(p)
44
+ # Redirect page
45
+ write_file(p, '../index.html') { build_from_template('redirect.html') }
46
+
47
+ # Summaries
48
+ summaries = Dir["#{p.path}/*.tsv"].map do |i|
49
+ b = File.basename(i)
50
+ "<li><a href='../#{b}'>#{b}</a></li>"
51
+ end.join('')
52
+
53
+ # Project index page
54
+ data = {
55
+ project_active: 'active',
56
+ information: format_metadata(p),
57
+ summaries: summaries.empty? ? 'None' : "<ul>#{summaries}</ul>",
58
+ results: format_results(p)
59
+ }
60
+ write_file(p, 'index.html') { build_from_template('index.html', data) }
61
+ end
62
+
63
+ ##
64
+ # Create page for dataset +d+ within project +p+
65
+ def generate_dataset_page(p, d)
66
+ data = {
67
+ unmiga_name: d.name.unmiga_name,
68
+ information: format_metadata(d),
69
+ results: format_results(d)
70
+ }
71
+ write_file(p, "d_#{d.name}.html") do
72
+ build_from_template('dataset.html', data)
73
+ end
74
+ end
75
+
76
+ ##
77
+ # Create pages for reference and query dataset indexes
78
+ def generate_datasets_index(p)
79
+ say 'Creating index pages'
80
+ data = format_dataset_index(p)
81
+ data.each do |k, v|
82
+ write_file(p, "#{k}_datasets.html") do
83
+ v[:list] = 'None' if v[:list] == ''
84
+ build_from_template(
85
+ 'datasets.html',
86
+ v.merge(:"#{k}_datasets_active" => 'active')
87
+ )
88
+ end
89
+ end
90
+ end
91
+
92
+ def format_dataset_index(p)
93
+ data = {
94
+ ref: { type_name: 'Reference', list: '' },
95
+ qry: { type_name: 'Query', list: '' }
96
+ }
97
+ p.each_dataset do |d|
98
+ data[d.ref? ? :ref : :qry][:list] +=
99
+ "<li><a href='d_#{d.name}.html'>#{d.name.unmiga_name}</a></li>"
100
+ end
101
+ data
102
+ end
103
+
104
+ ##
105
+ # Format +obj+ metadata as a table
106
+ def format_metadata(obj)
107
+ '<table class="table table-sm table-responsive">' +
108
+ obj.metadata.data.map do |k, v|
109
+ case k
110
+ when /^run_/, :plugins, :user
111
+ next
112
+ when :web_assembly_gz
113
+ v = "<a href='#{v}'>#{v[0..50]}...</a>"
114
+ when :datasets
115
+ v = v.size
116
+ end
117
+ "<tr><td class='text-right pr-4'><b>#{format_name(k)}</b></td>" \
118
+ "<td>#{v}</td></tr>"
119
+ end.compact.join('') +
120
+ '</table>'
121
+ end
122
+
123
+ ##
124
+ # Format +obj+ results as cards
125
+ def format_results(obj)
126
+ o = ''
127
+ obj.each_result do |key, res|
128
+ links = format_result_links(res)
129
+ stats = format_result_stats(res)
130
+ next unless links || stats
131
+ name = format_name(key)
132
+ url_doc =
133
+ 'http://manual.microbial-genomes.org/part5/workflow#' +
134
+ key.to_s.tr('_', '-')
135
+ o += <<~CARD
136
+ <div class="col-md-6 mb-4">
137
+ <h3>#{name}</h3>
138
+ <div class='border-left p-3'>
139
+ #{stats}
140
+ #{links}
141
+ </div>
142
+ <div class='border-top p-2 bg-light'>
143
+ <a target=_blank href="#{url_doc}" class='p-2'>Learn more</a>
144
+ </div>
145
+ </div>
146
+ CARD
147
+ end
148
+ "<div class='row'>#{o}</div>"
149
+ end
150
+
151
+ def format_name(str)
152
+ str
153
+ .to_s.unmiga_name
154
+ .sub(/^./, &:upcase)
155
+ .gsub(/(Aai|Ani|Ogs|Cds|Ssu| db$| ssu )/, &:upcase)
156
+ .sub(/Haai/, 'hAAI')
157
+ .sub(/Mytaxa/, 'MyTaxa')
158
+ .sub(/ pvalue$/, ' p-value')
159
+ .sub(/contigs$/, 'Contigs')
160
+ end
161
+
162
+ def format_result_links(res)
163
+ links = []
164
+ res.each_file do |key, _|
165
+ name = format_name(key)
166
+ links << "<a href='../#{res.file_path(key, true)}'>#{name}</a><br/>"
167
+ end
168
+ links.empty? ? nil : links.join('')
169
+ end
170
+
171
+ def format_result_stats(res)
172
+ res.stats.map do |k, v|
173
+ v = [v, ''] unless v.is_a? Array
174
+ v[0] = ('%.3g' % v[0]) if v[0].is_a? Float
175
+ "<b>#{format_name(k)}:</b> #{v[0]}#{v[1]}<br/>"
176
+ end.join('') + '<br/>' unless res.stats.empty?
177
+ end
178
+
179
+ ##
180
+ # Write +file+ within the browse folder of project +p+ using the passed
181
+ # block output as content
182
+ def write_file(p, file)
183
+ File.open(browse_file(p, file), 'w') { |fh| fh.print yield }
184
+ end
185
+
186
+ ##
187
+ # Use a +template+ file to generate content with a hash of +data+ over the
188
+ # layout page if +layout+ is true
189
+ def build_from_template(template, data = {}, layout = true)
190
+ cont = File.read(template_file(template)).miga_variables(data)
191
+ return cont unless layout
192
+
193
+ build_from_template(
194
+ 'layout.html',
195
+ data.merge(content: cont, project_name: cli.load_project.name),
196
+ false
197
+ )
198
+ end
199
+
200
+ ##
201
+ # Path to the template browse file
202
+ def template_file(file)
203
+ File.join(
204
+ MiGA::MiGA.root_path,
205
+ 'lib', 'miga', 'cli', 'action', 'browse', file
206
+ )
207
+ end
208
+
209
+ ##
210
+ # Path to the browse file in the project
211
+ def browse_file(p, file)
212
+ File.join(p.path, 'browse', file)
213
+ end
214
+ end
@@ -0,0 +1,31 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">About MiGA</h1>
2
+ <p>
3
+ MiGA is developed and maintained by
4
+ <a href='https://rodriguez-r.com/'>Luis M. Rodriguez-R</a>.
5
+
6
+ The MiGA codebase is
7
+ <a href='http://code.microbial-genomes.org/miga'>freely available</a> under the
8
+ terms of the terms of the
9
+ <a href='http://code.microbial-genomes.org/miga/blob/master/LICENSE'>Artistic License 2.0</a>.
10
+ </p>
11
+
12
+ <p>
13
+ MiGA is the result of a collaboration between the
14
+ <a href='http://enve-omics.gatech.edu/'>Kostas Lab</a>
15
+ (<a href='http://www.gatech.edu/'>Georgia Institute of Technology</a>) and the
16
+ <a href='http://rdp.cme.msu.edu/'>RDP team</a>
17
+ (<a href='http://cme.msu.edu/'>Center for Microbial Ecology</a>,
18
+ <a href='https://msu.edu/'>Michigan State University</a>).
19
+ The MiGA project is funded by the
20
+ <a href='http://nsf.gov/'>US National Science Foundation</a>
21
+ (Awards <a href='http://nsf.gov/awardsearch/showAward?AWD_ID=1356288'>#1356288</a> &amp;
22
+ <a href='https://xras.xsede.org/public/requests/31162-XSEDE-MCB190042-1190572'>#MCB190042</a>).
23
+ </p>
24
+
25
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Citation</h1>
26
+ If you use MiGA in your work, consider citing:
27
+ <blockquote class='border-left p-3'>
28
+ {{citation}}
29
+ </blockquote>
30
+
31
+
@@ -0,0 +1,5 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">{{unmiga_name}}</h1>
2
+ {{information}}
3
+
4
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Results</h1>
5
+ {{results}}
@@ -0,0 +1,3 @@
1
+ <li class="nav-item">
2
+ <a class="nav-link" href="ds_{{name}}.html">{{unmiga_name}}</a>
3
+ </li>
@@ -0,0 +1,4 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">{{type_name}} Datasets</h1>
2
+ <ul>
3
+ {{list}}
4
+ </ul>
@@ -0,0 +1,8 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Project Information</h1>
2
+ {{information}}
3
+
4
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Summaries</h1>
5
+ {{summaries}}
6
+
7
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Project Results</h1>
8
+ {{results}}
@@ -0,0 +1,57 @@
1
+ <!doctype html>
2
+ <head>
3
+ <meta charset="utf-8">
4
+ <title>MiGA | {{project_name}}</title>
5
+
6
+ <!-- Remote assets -->
7
+ <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/css/bootstrap.min.css" integrity="sha384-9aIt2nRpC12Uk9gS9baDl411NQApFmC26EwAOH8WgZl5MYYxFfc+NcPb1dKGj7Sk" crossorigin="anonymous">
8
+ <script src="https://code.jquery.com/jquery-3.5.1.slim.min.js" integrity="sha384-DfXdz2htPH0lsSSs5nCTpuj/zy4C+OGpamoFVy38MVBnE+IbbVYUew+OrCXaRkfj" crossorigin="anonymous"></script>
9
+ <script src="https://cdn.jsdelivr.net/npm/popper.js@1.16.0/dist/umd/popper.min.js" integrity="sha384-Q6E9RHvbIyZFJoft+2mJbHaEWldlvI9IOYy5n3zV9zzTtmI3UksdQRVvoxMfooAo" crossorigin="anonymous"></script>
10
+ <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/js/bootstrap.min.js" integrity="sha384-OgVRvuATP1z7JjHLkuOU7Xw704+h835Lr+6QL9UvYjZE3Ipu6Tp75j7Bh/kR0JKI" crossorigin="anonymous"></script>
11
+
12
+ <!-- Local assets -->
13
+ <link href="style.css" rel="stylesheet">
14
+ <link rel="icon" href="favicon-32.png" sizes="32x32" type="image/png">
15
+ </head>
16
+ <body>
17
+ <nav class="navbar navbar-dark sticky-top bg-dark flex-md-nowrap p-0 shadow">
18
+ <a class="navbar-brand col-md-12 col-lg-12 mr-0 px-3"
19
+ href="index.html">MiGA | {{project_name}}</a>
20
+ <button class="navbar-toggler position-absolute d-md-none collapsed"
21
+ type="button" data-toggle="collapse" data-target="#sidebarMenu"
22
+ aria-controls="sidebarMenu" aria-expanded="false"
23
+ aria-label="Toggle navigation">
24
+ <span class="navbar-toggler-icon"></span>
25
+ </button>
26
+ </nav>
27
+ <div class="container-fluid">
28
+ <div class="row">
29
+ <nav id="sidebarMenu" class="col-md-3 col-lg-2 d-md-block bg-light sidebar collapse">
30
+ <div class="sidebar-sticky pt-3">
31
+ <ul class="nav flex-column">
32
+ <li class="nav-item">
33
+ <a class="nav-link {{project_active}}" href="index.html">Project</a>
34
+ </li>
35
+ <li class="nav-item">
36
+ <a class="nav-link {{ref_datasets_active}}"
37
+ href="ref_datasets.html">Reference datasets</a>
38
+ </li>
39
+ <li class="nav-item">
40
+ <a class="nav-link {{qry_datasets_active}}"
41
+ href="qry_datasets.html">Query datasets</a>
42
+ </li>
43
+ <li class="nav-item border-top mt-4">
44
+ <a class="nav-link {{about_miga_active}}"
45
+ href="about.html">About MiGA</a>
46
+ </li>
47
+ </ul>
48
+ </div>
49
+ </nav>
50
+
51
+ <main role="main" class="col-md-9 ml-sm-auto col-lg-10 px-md-4">
52
+ {{content}}
53
+ </main>
54
+ </div>
55
+ </div>
56
+ </body>
57
+
@@ -0,0 +1,11 @@
1
+ <!doctype html>
2
+ <head>
3
+ <title>MiGA Project</title>
4
+ <meta http-equiv = "refresh" content = "1; url = browse/index.html" />
5
+ </head>
6
+ <body>
7
+ <div style='font-size:200%; margin-top: 5em; text-align: center;'>
8
+ Redirecting to <a href='browse/index.html'>Project page</a>...
9
+ </div>
10
+ </body>
11
+
@@ -0,0 +1,97 @@
1
+ body {
2
+ font-size: .875rem;
3
+ }
4
+
5
+ /*
6
+ * Sidebar
7
+ */
8
+
9
+ .sidebar {
10
+ position: fixed;
11
+ top: 0;
12
+ bottom: 0;
13
+ left: 0;
14
+ z-index: 100; /* Behind the navbar */
15
+ padding: 48px 0 0; /* Height of navbar */
16
+ box-shadow: inset -1px 0 0 rgba(0, 0, 0, .1);
17
+ }
18
+
19
+ @media (max-width: 767.98px) {
20
+ .sidebar {
21
+ top: 3rem;
22
+ }
23
+ }
24
+
25
+ .sidebar-sticky {
26
+ position: relative;
27
+ top: 0;
28
+ height: calc(100vh - 48px);
29
+ padding-top: .5rem;
30
+ overflow-x: hidden;
31
+ overflow-y: auto; /* Scrollable contents if viewport is shorter than content. */
32
+ }
33
+
34
+ @supports ((position: -webkit-sticky) or (position: sticky)) {
35
+ .sidebar-sticky {
36
+ position: -webkit-sticky;
37
+ position: sticky;
38
+ }
39
+ }
40
+
41
+ .sidebar .nav-link {
42
+ font-weight: 500;
43
+ color: #333;
44
+ }
45
+
46
+ .sidebar .nav-link .feather {
47
+ margin-right: 4px;
48
+ color: #999;
49
+ }
50
+
51
+ .sidebar .nav-link.active {
52
+ color: #007bff;
53
+ }
54
+
55
+ .sidebar .nav-link:hover .feather,
56
+ .sidebar .nav-link.active .feather {
57
+ color: inherit;
58
+ }
59
+
60
+ .sidebar-heading {
61
+ font-size: .75rem;
62
+ text-transform: uppercase;
63
+ }
64
+
65
+ /*
66
+ * Navbar
67
+ */
68
+
69
+ .navbar-brand {
70
+ padding-top: .75rem;
71
+ padding-bottom: .75rem;
72
+ font-size: 1rem;
73
+ background-color: rgba(0, 0, 0, .25);
74
+ box-shadow: inset -1px 0 0 rgba(0, 0, 0, .25);
75
+ }
76
+
77
+ .navbar .navbar-toggler {
78
+ top: .25rem;
79
+ right: 1rem;
80
+ }
81
+
82
+ .navbar .form-control {
83
+ padding: .75rem 1rem;
84
+ border-width: 0;
85
+ border-radius: 0;
86
+ }
87
+
88
+ .form-control-dark {
89
+ color: #fff;
90
+ background-color: rgba(255, 255, 255, .1);
91
+ border-color: rgba(255, 255, 255, .1);
92
+ }
93
+
94
+ .form-control-dark:focus {
95
+ border-color: transparent;
96
+ box-shadow: 0 0 0 3px rgba(255, 255, 255, .25);
97
+ }
@@ -27,10 +27,13 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
27
27
  '--threshold FLOAT', Float,
28
28
  "Metric threshold (%) to dereplicate. By default: #{cli[:threshold]}"
29
29
  ) { |v| cli[:threshold] = v }
30
+ opt.on(
31
+ '--quality',
32
+ 'Use genome with highest quality as clade representatives (default)'
33
+ ) { |v| cli[:criterion] = :quality }
30
34
  opt.on(
31
35
  '--medoids',
32
- 'Use medoids as clade representatives',
33
- 'By default: Use genome with the highest quality'
36
+ 'Use medoids as clade representatives'
34
37
  ) { |v| cli[:criterion] = :medoids }
35
38
  opt.on(
36
39
  '--no-collection',
@@ -47,12 +50,18 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
47
50
 
48
51
  def perform
49
52
  # Input data
50
- p = create_project(:assembly,
51
- { run_project_stats: false, run_clades: false,
52
- gsp_metric: cli[:metric], :"gsp_#{cli[:metric]}" => cli[:threshold] },
53
- { run_mytaxa_scan: false, run_ssu: false })
53
+ p = create_project(
54
+ :assembly,
55
+ {
56
+ run_project_stats: false,
57
+ run_clades: false,
58
+ gsp_metric: cli[:metric],
59
+ :"gsp_#{cli[:metric]}" => cli[:threshold]
60
+ },
61
+ { run_mytaxa_scan: false, run_ssu: false }
62
+ )
54
63
  unless cli[:threshold] >= 0.0 && cli[:threshold] <= 100.0
55
- raise "The threshold of identity must be in the range [0,100]"
64
+ raise 'The threshold of identity must be in the range [0,100]'
56
65
  end
57
66
 
58
67
  # Run
@@ -65,8 +74,8 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
65
74
  private
66
75
 
67
76
  def dereplicate(p)
68
- cli.say "Extracting genomospecies clades"
69
- r = p.result(:clade_finding) or raise "Result unavailable: run failed"
77
+ cli.say 'Extracting genomospecies clades'
78
+ r = p.result(:clade_finding) or raise 'Result unavailable: run failed'
70
79
  c_f = r.file_path(:clades_gsp) or raise 'Result incomplete: run failed'
71
80
  clades = File.readlines(c_f).map { |i| i.chomp.split("\t") }
72
81
  rep = representatives(p)
@@ -87,7 +96,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
87
96
  end
88
97
 
89
98
  def representatives(p)
90
- cli.say "Identifying representatives"
99
+ cli.say 'Identifying representatives'
91
100
  f = File.expand_path('representatives.txt', cli[:outdir])
92
101
  if cli[:criterion] == :medoids
93
102
  FileUtils.cp(p.result(:clade_finding).file_path(:medoids_gsp), f)
@@ -38,7 +38,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
38
38
  end
39
39
  if cli[:key].nil?
40
40
  r[:stats].each do |k, v|
41
- k_n = k == :g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize
41
+ k_n = k.to_s.unmiga_name.sub(/^./, &:upcase)
42
42
  cli.puts "#{k_n}: #{v.is_a?(Array) ? v.join(' ') : v}"
43
43
  end
44
44
  else
@@ -169,6 +169,7 @@ module MiGA::Cli::Action::Wf
169
169
  '--tab', '--ref', '--active'
170
170
  ])
171
171
  end
172
+ call_cli(['browse', '-P', cli[:outdir]])
172
173
  end
173
174
 
174
175
  def cleanup
@@ -11,39 +11,40 @@ module MiGA::Cli::Base
11
11
  preproc_wf: 'Preprocess input genomes or metagenomes',
12
12
  index_wf: 'Generate distance indexing of input genomes',
13
13
  # Projects
14
- new: 'Creates an empty MiGA project',
15
- about: 'Displays information about a MiGA project',
16
- doctor: 'Performs consistency checks on a MiGA project',
17
- get_db: 'Downloads a pre-indexed database',
14
+ new: 'Create an empty MiGA project',
15
+ about: 'Display information about a MiGA project',
16
+ doctor: 'Perform consistency checks on a MiGA project',
17
+ get_db: 'Download a pre-indexed database',
18
+ browse: 'Explore a project locally using a web browser',
18
19
  # Datasets
19
- add: 'Creates a dataset in a MiGA project',
20
- get: 'Downloads a dataset from public databases into a MiGA project',
21
- ncbi_get: 'Downloads all genomes in a taxon from NCBI into a MiGA project',
22
- rm: 'Removes a dataset from an MiGA project',
23
- find: 'Finds unregistered datasets based on result files',
20
+ add: 'Create a dataset in a MiGA project',
21
+ get: 'Download a dataset from public databases into a MiGA project',
22
+ ncbi_get: 'Download all genomes in a taxon from NCBI into a MiGA project',
23
+ rm: 'Remove a dataset from an MiGA project',
24
+ find: 'Find unregistered datasets based on result files',
24
25
  ln: 'Link datasets (including results) from one project to another',
25
- ls: 'Lists all registered datasets in an MiGA project',
26
- archive: 'Generates a tar-ball with all files from select datasets',
26
+ ls: 'List all registered datasets in an MiGA project',
27
+ archive: 'Generate a tar-ball with all files from select datasets',
27
28
  # Results
28
- add_result: 'Registers a result',
29
- stats: 'Extracts statistics for the given result',
30
- files: 'Lists registered files from the results of a dataset or project',
31
- run: 'Executes locally one step analysis producing the given result',
32
- summary: 'Generates a summary table for the statistics of all datasets',
33
- next_step: 'Returns the next task to run in a dataset or project',
29
+ add_result: 'Register a result',
30
+ stats: 'Extract statistics for the given result',
31
+ files: 'List registered files from the results of a dataset or project',
32
+ run: 'Execute locally one step analysis producing the given result',
33
+ summary: 'Generate a summary table for the statistics of all datasets',
34
+ next_step: 'Return the next task to run in a dataset or project',
34
35
  # Objects (Datasets or Projects)
35
- edit: 'Edits the metadata of a dataset or project',
36
+ edit: 'Edit the metadata of a dataset or project',
36
37
  # System
37
38
  init: 'Initialize MiGA to process new projects',
38
- daemon: 'Controls the daemon of a MiGA project',
39
- lair: 'Controls groups of daemons for several MiGA projects',
40
- date: 'Returns the current date in standard MiGA format',
41
- console: 'Opens an IRB console with MiGA',
39
+ daemon: 'Control the daemon of a MiGA project',
40
+ lair: 'Control groups of daemons for several MiGA projects',
41
+ date: 'Return the current date in standard MiGA format',
42
+ console: 'Open an IRB console with MiGA',
42
43
  # Taxonomy
43
- tax_set: 'Registers taxonomic information for datasets',
44
- tax_test: 'Returns test of taxonomic distributions for query datasets',
45
- tax_index: 'Creates a taxonomy-indexed list of the datasets',
46
- tax_dist: 'Estimates distributions of distance by taxonomy',
44
+ tax_set: 'Register taxonomic information for datasets',
45
+ tax_test: 'Return test of taxonomic distributions for query datasets',
46
+ tax_index: 'Create a taxonomy-indexed list of the datasets',
47
+ tax_dist: 'Estimate distributions of distance by taxonomy',
47
48
  }
48
49
 
49
50
  @@TASK_ALIAS = {
@@ -68,15 +68,20 @@ module MiGA::Common::Format
68
68
  # a FastA or FastQ file (supports gzipped files). The +format+ must be a
69
69
  # Symbol, one of +:fasta+ or +:fastq+. Additional estimations can be
70
70
  # controlled via the +opts+ Hash. Supported options include:
71
- # - +:n50+: If true, it also returns the N50 and the median (in bp)
72
- # - +:gc+: If true, it also returns the G+C content (in %)
73
- # - +:x+: If true, it also returns the undetermined bases content (in %)
71
+ # - +:n50+: Include the N50 and the median (in bp)
72
+ # - +:gc+: Include the G+C content (in %)
73
+ # - +:x+: Include the undetermined bases content (in %)
74
+ # - +:skew+: Include G-C and A-T sequence skew (in %; forces gc: true).
75
+ # See definition used here in DOI:10.1177/117693430700300006
74
76
  def seqs_length(file, format, opts = {})
77
+ opts[:gc] = true if opts[:skew]
75
78
  fh = file =~ /\.gz/ ? Zlib::GzipReader.open(file) : File.open(file, 'r')
76
79
  l = []
77
80
  gc = 0
78
81
  xn = 0
79
- i = 0 # <- Zlib::GzipReader doesn't set `$.`
82
+ t = 0
83
+ c = 0
84
+ i = 0 # <- Zlib::GzipReader doesn't set `$.`
80
85
  fh.each_line do |ln|
81
86
  i += 1
82
87
  if (format == :fasta and ln =~ /^>/) or
@@ -86,6 +91,10 @@ module MiGA::Common::Format
86
91
  l[l.size - 1] += ln.chomp.size
87
92
  gc += ln.scan(/[GCgc]/).count if opts[:gc]
88
93
  xn += ln.scan(/[XNxn]/).count if opts[:x]
94
+ if opts[:skew]
95
+ t += ln.scan(/[Tt]/).count
96
+ c += ln.scan(/[Cc]/).count
97
+ end
89
98
  end
90
99
  end
91
100
  fh.close
@@ -97,6 +106,12 @@ module MiGA::Common::Format
97
106
  o[:sd] = Math.sqrt o[:var]
98
107
  o[:gc] = 100.0 * gc / o[:tot] if opts[:gc]
99
108
  o[:x] = 100.0 * xn / o[:tot] if opts[:x]
109
+ if opts[:skew]
110
+ at = o[:tot] - gc
111
+ o[:at_skew] = 100.0 * (2 * t - at) / at
112
+ o[:gc_skew] = 100.0 * (2 * c - gc) / gc
113
+ end
114
+
100
115
  if opts[:n50]
101
116
  l.sort!
102
117
  thr = o[:tot] / 2
@@ -132,9 +147,14 @@ class String
132
147
  end
133
148
 
134
149
  ##
135
- # Replace underscores by spaces or dots (depending on context).
150
+ # Replace underscores by spaces or other symbols depending on context
136
151
  def unmiga_name
137
- gsub(/_(str|sp|subsp|pv)__/, '_\\1._').tr('_', ' ')
152
+ gsub(/_(str|sp|subsp|pv)__/, '_\\1._')
153
+ .gsub(/g_c_(content)/, 'G+C \\1')
154
+ .gsub(/g_c_(skew)/, 'G-C \\1')
155
+ .gsub(/a_t_(skew)/, 'A-T \\1')
156
+ .gsub(/x_content/, &:capitalize)
157
+ .tr('_', ' ')
138
158
  end
139
159
 
140
160
  ##
@@ -91,7 +91,8 @@ class MiGA::Daemon < MiGA::MiGA
91
91
  flush!
92
92
  if (loop_i % 12).zero?
93
93
  purge!
94
- recalculate_status!
94
+ # TEMPORARILY DISABLED:
95
+ # recalculate_status!
95
96
  end
96
97
  save_status
97
98
  sleep(latency)
@@ -66,7 +66,7 @@ module MiGA::Dataset::Base
66
66
  @@PREPROCESSING_TASKS = [
67
67
  :raw_reads, :trimmed_reads, :read_quality, :trimmed_fasta,
68
68
  :assembly, :cds, :essential_genes, :ssu, :mytaxa, :mytaxa_scan,
69
- :distances, :taxonomy, :stats
69
+ :taxonomy, :distances, :stats
70
70
  ]
71
71
 
72
72
  ##
@@ -77,7 +77,7 @@ module MiGA::Dataset::Base
77
77
  ##
78
78
  # Tasks to be executed only in datasets that are not multi-organism. These
79
79
  # tasks are ignored for multi-organism datasets or for unknown types.
80
- @@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances, :taxonomy]
80
+ @@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :taxonomy, :distances]
81
81
  @@_ONLY_NONMULTI_TASKS_H = Hash[@@ONLY_NONMULTI_TASKS.map { |i| [i, true] }]
82
82
 
83
83
  ##
@@ -81,20 +81,22 @@ class MiGA::Result < MiGA::MiGA
81
81
  end
82
82
 
83
83
  ##
84
- # Directory containing the result
85
- def dir
86
- File.dirname(path)
84
+ # Directory containing the result; by default an absolute path, if
85
+ # +relative+ is true returns the path relative to the parent project
86
+ def dir(relative = false)
87
+ relative ? relative_dir : File.dirname(path)
87
88
  end
88
89
 
89
90
  ##
90
- # Absolute path to the file(s) defined by symbol +k+
91
- def file_path(k)
91
+ # Absolute path to the file(s) defined by symbol +k+, or relative
92
+ # path if +relative+ is true
93
+ def file_path(k, relative = false)
92
94
  k = k.to_sym
93
95
  f = self[:files].nil? ? nil : self[:files][k]
94
96
  return nil if f.nil?
95
- return File.expand_path(f, dir) unless f.is_a? Array
97
+ return File.join(dir(relative), f) unless f.is_a? Array
96
98
 
97
- f.map { |fi| File.expand_path(fi, dir) }
99
+ f.map { |fi| File.join(dir(relative), fi) }
98
100
  end
99
101
 
100
102
  ##
@@ -17,32 +17,45 @@ module MiGA::Result::Stats
17
17
  self[:stats]
18
18
  end
19
19
 
20
+ ##
21
+ # Access the stats entry of results
22
+ def stats
23
+ self[:stats]
24
+ end
25
+
20
26
  private
21
27
 
22
28
  def compute_stats_raw_reads
23
29
  stats = {}
30
+ seq_opts = { gc: true, x: true, skew: true }
24
31
  if self[:files][:pair1].nil?
25
- s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, gc: true, x: true)
32
+ s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, seq_opts)
26
33
  stats = {
27
34
  reads: s[:n],
28
35
  length_average: [s[:avg], 'bp'],
29
36
  length_standard_deviation: [s[:sd], 'bp'],
30
37
  g_c_content: [s[:gc], '%'],
31
- x_content: [s[:x], '%']
38
+ x_content: [s[:x], '%'],
39
+ g_c_skew: [s[:gc_skew], '%'],
40
+ a_t_skew: [s[:at_skew], '%']
32
41
  }
33
42
  else
34
- s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, gc: true, x: true)
35
- s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, gc: true, x: true)
43
+ s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, seq_opts)
44
+ s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, seq_opts)
36
45
  stats = {
37
46
  read_pairs: s1[:n],
38
47
  forward_length_average: [s1[:avg], 'bp'],
39
48
  forward_length_standard_deviation: [s1[:sd], 'bp'],
40
49
  forward_g_c_content: [s1[:gc], '%'],
41
50
  forward_x_content: [s1[:x], '%'],
51
+ forward_g_c_skew: [s1[:gc_skew], '%'],
52
+ forward_a_t_skew: [s1[:at_skew], '%'],
42
53
  reverse_length_average: [s2[:avg], 'bp'],
43
54
  reverse_length_standard_deviation: [s2[:sd], 'bp'],
44
55
  reverse_g_c_content: [s2[:gc], '%'],
45
- reverse_x_content: [s2[:x], '%']
56
+ reverse_x_content: [s2[:x], '%'],
57
+ reverse_g_c_skew: [s2[:gc_skew], '%'],
58
+ reverse_a_t_skew: [s2[:at_skew], '%']
46
59
  }
47
60
  end
48
61
  stats
@@ -50,19 +63,22 @@ module MiGA::Result::Stats
50
63
 
51
64
  def compute_stats_trimmed_fasta
52
65
  f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
53
- s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true)
66
+ s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true, skew: true)
54
67
  {
55
68
  reads: s[:n],
56
69
  length_average: [s[:avg], 'bp'],
57
70
  length_standard_deviation: [s[:sd], 'bp'],
58
71
  g_c_content: [s[:gc], '%'],
59
- x_content: [s[:x], '%']
72
+ x_content: [s[:x], '%'],
73
+ g_c_skew: [s[:gc_skew], '%'],
74
+ a_t_skew: [s[:at_skew], '%']
60
75
  }
61
76
  end
62
77
 
63
78
  def compute_stats_assembly
64
79
  s = MiGA::MiGA.seqs_length(
65
- file_path(:largecontigs), :fasta, n50: true, gc: true, x: true
80
+ file_path(:largecontigs), :fasta,
81
+ n50: true, gc: true, x: true, skew: true
66
82
  )
67
83
  {
68
84
  contigs: s[:n],
@@ -70,7 +86,9 @@ module MiGA::Result::Stats
70
86
  total_length: [s[:tot], 'bp'],
71
87
  longest_sequence: [s[:max], 'bp'],
72
88
  g_c_content: [s[:gc], '%'],
73
- x_content: [s[:x], '%']
89
+ x_content: [s[:x], '%'],
90
+ g_c_skew: [s[:gc_skew], '%'],
91
+ a_t_skew: [s[:at_skew], '%']
74
92
  }
75
93
  end
76
94
 
@@ -133,7 +151,7 @@ module MiGA::Result::Stats
133
151
  source.save
134
152
 
135
153
  # Inactivate low-quality datasets
136
- min_qual = (project.metadata[:min_qual] || 50)
154
+ min_qual = (project.metadata[:min_qual] || 25)
137
155
  if min_qual != 'no' && stats[:quality] < min_qual
138
156
  source.inactivate! 'Low quality genome'
139
157
  end
@@ -8,7 +8,7 @@ module MiGA
8
8
  # - Float representing the major.minor version.
9
9
  # - Integer representing gem releases of the current version.
10
10
  # - Integer representing minor changes that require new version number.
11
- VERSION = [0.7, 10, 2]
11
+ VERSION = [0.7, 13, 0]
12
12
 
13
13
  ##
14
14
  # Nickname for the current major.minor version.
@@ -16,7 +16,7 @@ module MiGA
16
16
 
17
17
  ##
18
18
  # Date of the current gem release.
19
- VERSION_DATE = Date.new(2020, 6, 30)
19
+ VERSION_DATE = Date.new(2020, 7, 31)
20
20
 
21
21
  ##
22
22
  # Reference of MiGA.
@@ -13,17 +13,20 @@ echo -n "" > miga-project.log
13
13
  DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
14
14
 
15
15
  # Extract values
16
- echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
17
- for i in $DS ; do
18
- echo "SELECT CASE WHEN omega!=0 THEN 'AAI' ELSE 'hAAI_AAI' END," \
19
- " seq1, seq2, aai, sd, n, omega from aai;" \
20
- | sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
21
- echo "$i" >> miga-project.log
22
- done
16
+ rm -f miga-project.txt
17
+ (
18
+ echo "metric a b value sd n omega" | tr " " "\\t"
19
+ for i in $DS ; do
20
+ echo "SELECT CASE WHEN omega!=0 THEN 'AAI' ELSE 'hAAI_AAI' END," \
21
+ " seq1, seq2, aai, sd, n, omega from aai;" \
22
+ | sqlite3 "$i.db" | tr "\\|" "\\t"
23
+ echo "$i" >> miga-project.log
24
+ done
25
+ ) | gzip -9c > miga-project.txt.gz
23
26
 
24
27
  # R-ify
25
28
  echo "
26
- aai <- read.table('miga-project.txt', sep='\\t', h=T, as.is=TRUE);
29
+ aai <- read.table(gzfile('miga-project.txt.gz'), sep='\\t', h=T, as.is=TRUE);
27
30
  save(aai, file='miga-project.Rdata');
28
31
  if(sum(aai[,'a'] != aai[,'b']) > 0){
29
32
  h <- hist(aai[aai[,'a'] != aai[,'b'], 'value'], breaks=100, plot=FALSE);
@@ -35,9 +38,6 @@ if(sum(aai[,'a'] != aai[,'b']) > 0){
35
38
  }
36
39
  " | R --vanilla
37
40
 
38
- # Gzip
39
- gzip -9 -f miga-project.txt
40
-
41
41
  # Finalize
42
42
  miga date > "miga-project.done"
43
43
  miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -13,16 +13,19 @@ echo -n "" > miga-project.log
13
13
  DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
14
14
 
15
15
  # Extract values
16
- echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
17
- for i in $DS ; do
18
- echo "SELECT 'ANI', seq1, seq2, ani, sd, n, omega from ani ;" \
19
- | sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
20
- echo "$i" >> miga-project.log
21
- done
16
+ rm -f miga-project.txt
17
+ (
18
+ echo "metric a b value sd n omega" | tr " " "\\t"
19
+ for i in $DS ; do
20
+ echo "SELECT 'ANI', seq1, seq2, ani, sd, n, omega from ani ;" \
21
+ | sqlite3 "$i.db" | tr "\\|" "\\t"
22
+ echo "$i" >> miga-project.log
23
+ done
24
+ ) | gzip -9c > miga-project.txt.gz
22
25
 
23
26
  # R-ify
24
27
  echo "
25
- ani <- read.table('miga-project.txt', sep='\\t', h=T, as.is=TRUE);
28
+ ani <- read.table(gzfile('miga-project.txt.gz'), sep='\\t', h=T, as.is=TRUE);
26
29
  save(ani, file='miga-project.Rdata');
27
30
  if(sum(ani[,'a'] != ani[,'b']) > 0){
28
31
  h <- hist(ani[ani[,'a'] != ani[,'b'], 'value'], breaks=100, plot=FALSE);
@@ -34,9 +37,6 @@ if(sum(ani[,'a'] != ani[,'b']) > 0){
34
37
  }
35
38
  " | R --vanilla
36
39
 
37
- # Gzip
38
- gzip -9 -f miga-project.txt
39
-
40
40
  # Finalize
41
41
  miga date > "miga-project.done"
42
42
  miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -20,7 +20,6 @@ fi
20
20
  TYPE=$(miga ls -P "$PROJECT" -D "$DATASET" -m type | cut -f 2)
21
21
  case "$TYPE" in
22
22
  metagenome|virome)
23
- $CMD -p meta
24
23
  prodigal -a "${DATASET}.faa" -d "${DATASET}.fna" -o "${DATASET}.gff3" \
25
24
  -f gff -q -i "../05.assembly/${DATASET}.LargeContigs.fna" -p meta
26
25
  ;;
@@ -17,30 +17,30 @@ echo -n "" > miga-project.log
17
17
  DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
18
18
 
19
19
  # Extract values
20
- echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
21
- for i in $DS ; do
22
- echo "SELECT 'hAAI', seq1, seq2, aai, sd, n, omega from aai ;" \
23
- | sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
24
- echo "$i" >> miga-project.log
25
- done
20
+ rm -f miga-project.txt
21
+ (
22
+ echo "metric a b value sd n omega" | tr " " "\\t"
23
+ for i in $DS ; do
24
+ echo "SELECT 'hAAI', seq1, seq2, aai, sd, n, omega from aai ;" \
25
+ | sqlite3 "$i.db" | tr "\\|" "\\t"
26
+ echo "$i" >> miga-project.log
27
+ done
28
+ ) | gzip -9c > miga-project.txt.gz
26
29
 
27
30
  # R-ify
28
31
  echo "
29
- haai <- read.table('miga-project.txt', sep='\\t', h=T, as.is=TRUE);
32
+ haai <- read.table(gzfile('miga-project.txt.gz'), sep='\\t', h=T, as.is=TRUE);
30
33
  save(haai, file='miga-project.Rdata');
31
34
  if(sum(haai[,'a'] != haai[,'b']) > 0){
32
35
  h <- hist(haai[haai[,'a'] != haai[,'b'], 'value'], breaks=100, plot=FALSE);
33
36
  write.table(
34
37
  cbind(h[['breaks']][-length(h[['breaks']])],
35
- h[['breaks']][-1],h[['counts']]),
38
+ h[['breaks']][-1], h[['counts']]),
36
39
  file='miga-project.hist', quote=FALSE, sep='\\t',
37
40
  col.names=FALSE, row.names=FALSE);
38
41
  }
39
42
  " | R --vanilla
40
43
 
41
- # Gzip
42
- gzip -9 -f miga-project.txt
43
-
44
44
  # Finalize
45
45
  miga date > "miga-project.done"
46
46
  miga add_result -P "$PROJECT" -r "$SCRIPT" -f
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.10.2
4
+ version: 0.7.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-30 00:00:00.000000000 Z
11
+ date: 2020-07-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -118,6 +118,16 @@ files:
118
118
  - lib/miga/cli/action/add.rb
119
119
  - lib/miga/cli/action/add_result.rb
120
120
  - lib/miga/cli/action/archive.rb
121
+ - lib/miga/cli/action/browse.rb
122
+ - lib/miga/cli/action/browse/about.html
123
+ - lib/miga/cli/action/browse/dataset.html
124
+ - lib/miga/cli/action/browse/dataset_menu_item.html
125
+ - lib/miga/cli/action/browse/datasets.html
126
+ - lib/miga/cli/action/browse/favicon-32.png
127
+ - lib/miga/cli/action/browse/index.html
128
+ - lib/miga/cli/action/browse/layout.html
129
+ - lib/miga/cli/action/browse/redirect.html
130
+ - lib/miga/cli/action/browse/style.css
121
131
  - lib/miga/cli/action/classify_wf.rb
122
132
  - lib/miga/cli/action/console.rb
123
133
  - lib/miga/cli/action/daemon.rb
@@ -550,7 +560,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
550
560
  - !ruby/object:Gem::Version
551
561
  version: '0'
552
562
  requirements: []
553
- rubygems_version: 3.1.2
563
+ rubyforge_project:
564
+ rubygems_version: 2.5.2.3
554
565
  signing_key:
555
566
  specification_version: 4
556
567
  summary: MiGA