miga-base 0.7.10.0 → 0.7.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 30d61eb5847a28c4d83a43e6e47ff0738bb819218e677b9aa43de158b441e0ae
4
- data.tar.gz: 46338ae15353b71fb6e7eff390c8bb976d1c11fc296bb83f33aab8ffba1a3fa8
3
+ metadata.gz: c57b841cc84841ebfd868af862204179e443d37eb4ec5550ea9ef7df63b9d933
4
+ data.tar.gz: 5512fe8305c2b6193f5d1a5c99e663ef480be32db2e8bdc7087826bdeeae5b35
5
5
  SHA512:
6
- metadata.gz: 488e7888039bb9e08e7c257fdd7cb0cf34340766f73b9b46c28b332072f5f207f5dcfb0df08c98b27f5640158d3a14ae6d0f5ab19d78f3e9e1ef44e381d34e74
7
- data.tar.gz: 992cfc225eeb2f4b8017260e66c67bc9df39a79757ecdb65e01766079e103385259c1eeb447fdc5e6b769990f96256d384dc4adddf184693a3bc94b8323b9a5a
6
+ metadata.gz: 63e095dded680c2a6d1519d8fc8788d4d44cf523fc84b9e67ee453dffb9f05c75e2acafc283b511f6b27638e678d53f7362b056f35d18d5b08893419c7d24302
7
+ data.tar.gz: efac7f7c3d65640b64dc6d9b32e3bd520faebda05a72f1b88a2ee4c11546b5366db1ac42ea296ba73fe1f7873244a9f64791a180c9094a374a7ebbf1c589d491
@@ -0,0 +1,214 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action'
4
+
5
+ # Action: miga browse
6
+ class MiGA::Cli::Action::Browse < MiGA::Cli::Action
7
+ def parse_cli
8
+ cli.parse do |opt|
9
+ cli.defaults = { open: true }
10
+ cli.opt_object(opt, [:project])
11
+ end
12
+ end
13
+
14
+ def perform
15
+ p = cli.load_project
16
+ create_empty_page(p)
17
+ generate_project_page(p)
18
+ say 'Creating dataset pages'
19
+ cli.load_project.each_dataset do |d|
20
+ generate_dataset_page(p, d)
21
+ end
22
+ generate_datasets_index(p)
23
+ say "Open in your browser: #{File.join(p.path, 'index.html')}"
24
+ end
25
+
26
+ private
27
+
28
+ ##
29
+ # Create an empty page with necessary assets for project +p+
30
+ def create_empty_page(p)
31
+ say 'Creating project page'
32
+ FileUtils.mkdir_p(browse_file(p, '.'))
33
+ %w[favicon-32.png style.css].each do |i|
34
+ FileUtils.cp(template_file(i), browse_file(p, i))
35
+ end
36
+ write_file(p, 'about.html') do
37
+ build_from_template('about.html', citation: MiGA::MiGA.CITATION)
38
+ end
39
+ end
40
+
41
+ ##
42
+ # Create landing page for project +p+
43
+ def generate_project_page(p)
44
+ # Redirect page
45
+ write_file(p, '../index.html') { build_from_template('redirect.html') }
46
+
47
+ # Summaries
48
+ summaries = Dir["#{p.path}/*.tsv"].map do |i|
49
+ b = File.basename(i)
50
+ "<li><a href='../#{b}'>#{b}</a></li>"
51
+ end.join('')
52
+
53
+ # Project index page
54
+ data = {
55
+ project_active: 'active',
56
+ information: format_metadata(p),
57
+ summaries: summaries.empty? ? 'None' : "<ul>#{summaries}</ul>",
58
+ results: format_results(p)
59
+ }
60
+ write_file(p, 'index.html') { build_from_template('index.html', data) }
61
+ end
62
+
63
+ ##
64
+ # Create page for dataset +d+ within project +p+
65
+ def generate_dataset_page(p, d)
66
+ data = {
67
+ unmiga_name: d.name.unmiga_name,
68
+ information: format_metadata(d),
69
+ results: format_results(d)
70
+ }
71
+ write_file(p, "d_#{d.name}.html") do
72
+ build_from_template('dataset.html', data)
73
+ end
74
+ end
75
+
76
+ ##
77
+ # Create pages for reference and query dataset indexes
78
+ def generate_datasets_index(p)
79
+ say 'Creating index pages'
80
+ data = format_dataset_index(p)
81
+ data.each do |k, v|
82
+ write_file(p, "#{k}_datasets.html") do
83
+ v[:list] = 'None' if v[:list] == ''
84
+ build_from_template(
85
+ 'datasets.html',
86
+ v.merge(:"#{k}_datasets_active" => 'active')
87
+ )
88
+ end
89
+ end
90
+ end
91
+
92
+ def format_dataset_index(p)
93
+ data = {
94
+ ref: { type_name: 'Reference', list: '' },
95
+ qry: { type_name: 'Query', list: '' }
96
+ }
97
+ p.each_dataset do |d|
98
+ data[d.ref? ? :ref : :qry][:list] +=
99
+ "<li><a href='d_#{d.name}.html'>#{d.name.unmiga_name}</a></li>"
100
+ end
101
+ data
102
+ end
103
+
104
+ ##
105
+ # Format +obj+ metadata as a table
106
+ def format_metadata(obj)
107
+ '<table class="table table-sm table-responsive">' +
108
+ obj.metadata.data.map do |k, v|
109
+ case k
110
+ when /^run_/, :plugins, :user
111
+ next
112
+ when :web_assembly_gz
113
+ v = "<a href='#{v}'>#{v[0..50]}...</a>"
114
+ when :datasets
115
+ v = v.size
116
+ end
117
+ "<tr><td class='text-right pr-4'><b>#{format_name(k)}</b></td>" \
118
+ "<td>#{v}</td></tr>"
119
+ end.compact.join('') +
120
+ '</table>'
121
+ end
122
+
123
+ ##
124
+ # Format +obj+ results as cards
125
+ def format_results(obj)
126
+ o = ''
127
+ obj.each_result do |key, res|
128
+ links = format_result_links(res)
129
+ stats = format_result_stats(res)
130
+ next unless links || stats
131
+ name = format_name(key)
132
+ url_doc =
133
+ 'http://manual.microbial-genomes.org/part5/workflow#' +
134
+ key.to_s.tr('_', '-')
135
+ o += <<~CARD
136
+ <div class="col-md-6 mb-4">
137
+ <h3>#{name}</h3>
138
+ <div class='border-left p-3'>
139
+ #{stats}
140
+ #{links}
141
+ </div>
142
+ <div class='border-top p-2 bg-light'>
143
+ <a target=_blank href="#{url_doc}" class='p-2'>Learn more</a>
144
+ </div>
145
+ </div>
146
+ CARD
147
+ end
148
+ "<div class='row'>#{o}</div>"
149
+ end
150
+
151
+ def format_name(str)
152
+ str
153
+ .to_s.unmiga_name
154
+ .sub(/^./, &:upcase)
155
+ .gsub(/(Aai|Ani|Ogs|Cds|Ssu| db$| ssu )/, &:upcase)
156
+ .sub(/Haai/, 'hAAI')
157
+ .sub(/Mytaxa/, 'MyTaxa')
158
+ .sub(/ pvalue$/, ' p-value')
159
+ .sub(/contigs$/, 'Contigs')
160
+ end
161
+
162
+ def format_result_links(res)
163
+ links = []
164
+ res.each_file do |key, _|
165
+ name = format_name(key)
166
+ links << "<a href='../#{res.file_path(key, true)}'>#{name}</a><br/>"
167
+ end
168
+ links.empty? ? nil : links.join('')
169
+ end
170
+
171
+ def format_result_stats(res)
172
+ res.stats.map do |k, v|
173
+ v = [v, ''] unless v.is_a? Array
174
+ v[0] = ('%.3g' % v[0]) if v[0].is_a? Float
175
+ "<b>#{format_name(k)}:</b> #{v[0]}#{v[1]}<br/>"
176
+ end.join('') + '<br/>' unless res.stats.empty?
177
+ end
178
+
179
+ ##
180
+ # Write +file+ within the browse folder of project +p+ using the passed
181
+ # block output as content
182
+ def write_file(p, file)
183
+ File.open(browse_file(p, file), 'w') { |fh| fh.print yield }
184
+ end
185
+
186
+ ##
187
+ # Use a +template+ file to generate content with a hash of +data+ over the
188
+ # layout page if +layout+ is true
189
+ def build_from_template(template, data = {}, layout = true)
190
+ cont = File.read(template_file(template)).miga_variables(data)
191
+ return cont unless layout
192
+
193
+ build_from_template(
194
+ 'layout.html',
195
+ data.merge(content: cont, project_name: cli.load_project.name),
196
+ false
197
+ )
198
+ end
199
+
200
+ ##
201
+ # Path to the template browse file
202
+ def template_file(file)
203
+ File.join(
204
+ MiGA::MiGA.root_path,
205
+ 'lib', 'miga', 'cli', 'action', 'browse', file
206
+ )
207
+ end
208
+
209
+ ##
210
+ # Path to the browse file in the project
211
+ def browse_file(p, file)
212
+ File.join(p.path, 'browse', file)
213
+ end
214
+ end
@@ -0,0 +1,31 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">About MiGA</h1>
2
+ <p>
3
+ MiGA is developed and maintained by
4
+ <a href='https://rodriguez-r.com/'>Luis M. Rodriguez-R</a>.
5
+
6
+ The MiGA codebase is
7
+ <a href='http://code.microbial-genomes.org/miga'>freely available</a> under the
8
+ terms of the terms of the
9
+ <a href='http://code.microbial-genomes.org/miga/blob/master/LICENSE'>Artistic License 2.0</a>.
10
+ </p>
11
+
12
+ <p>
13
+ MiGA is the result of a collaboration between the
14
+ <a href='http://enve-omics.gatech.edu/'>Kostas Lab</a>
15
+ (<a href='http://www.gatech.edu/'>Georgia Institute of Technology</a>) and the
16
+ <a href='http://rdp.cme.msu.edu/'>RDP team</a>
17
+ (<a href='http://cme.msu.edu/'>Center for Microbial Ecology</a>,
18
+ <a href='https://msu.edu/'>Michigan State University</a>).
19
+ The MiGA project is funded by the
20
+ <a href='http://nsf.gov/'>US National Science Foundation</a>
21
+ (Awards <a href='http://nsf.gov/awardsearch/showAward?AWD_ID=1356288'>#1356288</a> &amp;
22
+ <a href='https://xras.xsede.org/public/requests/31162-XSEDE-MCB190042-1190572'>#MCB190042</a>).
23
+ </p>
24
+
25
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Citation</h1>
26
+ If you use MiGA in your work, consider citing:
27
+ <blockquote class='border-left p-3'>
28
+ {{citation}}
29
+ </blockquote>
30
+
31
+
@@ -0,0 +1,5 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">{{unmiga_name}}</h1>
2
+ {{information}}
3
+
4
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Results</h1>
5
+ {{results}}
@@ -0,0 +1,3 @@
1
+ <li class="nav-item">
2
+ <a class="nav-link" href="ds_{{name}}.html">{{unmiga_name}}</a>
3
+ </li>
@@ -0,0 +1,4 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">{{type_name}} Datasets</h1>
2
+ <ul>
3
+ {{list}}
4
+ </ul>
@@ -0,0 +1,8 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Project Information</h1>
2
+ {{information}}
3
+
4
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Summaries</h1>
5
+ {{summaries}}
6
+
7
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Project Results</h1>
8
+ {{results}}
@@ -0,0 +1,57 @@
1
+ <!doctype html>
2
+ <head>
3
+ <meta charset="utf-8">
4
+ <title>MiGA | {{project_name}}</title>
5
+
6
+ <!-- Remote assets -->
7
+ <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/css/bootstrap.min.css" integrity="sha384-9aIt2nRpC12Uk9gS9baDl411NQApFmC26EwAOH8WgZl5MYYxFfc+NcPb1dKGj7Sk" crossorigin="anonymous">
8
+ <script src="https://code.jquery.com/jquery-3.5.1.slim.min.js" integrity="sha384-DfXdz2htPH0lsSSs5nCTpuj/zy4C+OGpamoFVy38MVBnE+IbbVYUew+OrCXaRkfj" crossorigin="anonymous"></script>
9
+ <script src="https://cdn.jsdelivr.net/npm/popper.js@1.16.0/dist/umd/popper.min.js" integrity="sha384-Q6E9RHvbIyZFJoft+2mJbHaEWldlvI9IOYy5n3zV9zzTtmI3UksdQRVvoxMfooAo" crossorigin="anonymous"></script>
10
+ <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/js/bootstrap.min.js" integrity="sha384-OgVRvuATP1z7JjHLkuOU7Xw704+h835Lr+6QL9UvYjZE3Ipu6Tp75j7Bh/kR0JKI" crossorigin="anonymous"></script>
11
+
12
+ <!-- Local assets -->
13
+ <link href="style.css" rel="stylesheet">
14
+ <link rel="icon" href="favicon-32.png" sizes="32x32" type="image/png">
15
+ </head>
16
+ <body>
17
+ <nav class="navbar navbar-dark sticky-top bg-dark flex-md-nowrap p-0 shadow">
18
+ <a class="navbar-brand col-md-12 col-lg-12 mr-0 px-3"
19
+ href="index.html">MiGA | {{project_name}}</a>
20
+ <button class="navbar-toggler position-absolute d-md-none collapsed"
21
+ type="button" data-toggle="collapse" data-target="#sidebarMenu"
22
+ aria-controls="sidebarMenu" aria-expanded="false"
23
+ aria-label="Toggle navigation">
24
+ <span class="navbar-toggler-icon"></span>
25
+ </button>
26
+ </nav>
27
+ <div class="container-fluid">
28
+ <div class="row">
29
+ <nav id="sidebarMenu" class="col-md-3 col-lg-2 d-md-block bg-light sidebar collapse">
30
+ <div class="sidebar-sticky pt-3">
31
+ <ul class="nav flex-column">
32
+ <li class="nav-item">
33
+ <a class="nav-link {{project_active}}" href="index.html">Project</a>
34
+ </li>
35
+ <li class="nav-item">
36
+ <a class="nav-link {{ref_datasets_active}}"
37
+ href="ref_datasets.html">Reference datasets</a>
38
+ </li>
39
+ <li class="nav-item">
40
+ <a class="nav-link {{qry_datasets_active}}"
41
+ href="qry_datasets.html">Query datasets</a>
42
+ </li>
43
+ <li class="nav-item border-top mt-4">
44
+ <a class="nav-link {{about_miga_active}}"
45
+ href="about.html">About MiGA</a>
46
+ </li>
47
+ </ul>
48
+ </div>
49
+ </nav>
50
+
51
+ <main role="main" class="col-md-9 ml-sm-auto col-lg-10 px-md-4">
52
+ {{content}}
53
+ </main>
54
+ </div>
55
+ </div>
56
+ </body>
57
+
@@ -0,0 +1,11 @@
1
+ <!doctype html>
2
+ <head>
3
+ <title>MiGA Project</title>
4
+ <meta http-equiv = "refresh" content = "1; url = browse/index.html" />
5
+ </head>
6
+ <body>
7
+ <div style='font-size:200%; margin-top: 5em; text-align: center;'>
8
+ Redirecting to <a href='browse/index.html'>Project page</a>...
9
+ </div>
10
+ </body>
11
+
@@ -0,0 +1,97 @@
1
+ body {
2
+ font-size: .875rem;
3
+ }
4
+
5
+ /*
6
+ * Sidebar
7
+ */
8
+
9
+ .sidebar {
10
+ position: fixed;
11
+ top: 0;
12
+ bottom: 0;
13
+ left: 0;
14
+ z-index: 100; /* Behind the navbar */
15
+ padding: 48px 0 0; /* Height of navbar */
16
+ box-shadow: inset -1px 0 0 rgba(0, 0, 0, .1);
17
+ }
18
+
19
+ @media (max-width: 767.98px) {
20
+ .sidebar {
21
+ top: 3rem;
22
+ }
23
+ }
24
+
25
+ .sidebar-sticky {
26
+ position: relative;
27
+ top: 0;
28
+ height: calc(100vh - 48px);
29
+ padding-top: .5rem;
30
+ overflow-x: hidden;
31
+ overflow-y: auto; /* Scrollable contents if viewport is shorter than content. */
32
+ }
33
+
34
+ @supports ((position: -webkit-sticky) or (position: sticky)) {
35
+ .sidebar-sticky {
36
+ position: -webkit-sticky;
37
+ position: sticky;
38
+ }
39
+ }
40
+
41
+ .sidebar .nav-link {
42
+ font-weight: 500;
43
+ color: #333;
44
+ }
45
+
46
+ .sidebar .nav-link .feather {
47
+ margin-right: 4px;
48
+ color: #999;
49
+ }
50
+
51
+ .sidebar .nav-link.active {
52
+ color: #007bff;
53
+ }
54
+
55
+ .sidebar .nav-link:hover .feather,
56
+ .sidebar .nav-link.active .feather {
57
+ color: inherit;
58
+ }
59
+
60
+ .sidebar-heading {
61
+ font-size: .75rem;
62
+ text-transform: uppercase;
63
+ }
64
+
65
+ /*
66
+ * Navbar
67
+ */
68
+
69
+ .navbar-brand {
70
+ padding-top: .75rem;
71
+ padding-bottom: .75rem;
72
+ font-size: 1rem;
73
+ background-color: rgba(0, 0, 0, .25);
74
+ box-shadow: inset -1px 0 0 rgba(0, 0, 0, .25);
75
+ }
76
+
77
+ .navbar .navbar-toggler {
78
+ top: .25rem;
79
+ right: 1rem;
80
+ }
81
+
82
+ .navbar .form-control {
83
+ padding: .75rem 1rem;
84
+ border-width: 0;
85
+ border-radius: 0;
86
+ }
87
+
88
+ .form-control-dark {
89
+ color: #fff;
90
+ background-color: rgba(255, 255, 255, .1);
91
+ border-color: rgba(255, 255, 255, .1);
92
+ }
93
+
94
+ .form-control-dark:focus {
95
+ border-color: transparent;
96
+ box-shadow: 0 0 0 3px rgba(255, 255, 255, .25);
97
+ }
@@ -19,6 +19,10 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
19
19
  'Use Average Amino Acid Identity (AAI) as genome similarity metric',
20
20
  'By default: Use Average Nucleotide Identity (ANI)'
21
21
  ) { cli[:metric] = :aai }
22
+ opt.on(
23
+ '--ani',
24
+ 'Use Average Nucleotide Identity (ANI) as similarity metric (default)'
25
+ ) { cli[:metric] = :ani }
22
26
  opt.on(
23
27
  '--threshold FLOAT', Float,
24
28
  "Metric threshold (%) to dereplicate. By default: #{cli[:threshold]}"
@@ -38,7 +38,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
38
38
  end
39
39
  if cli[:key].nil?
40
40
  r[:stats].each do |k, v|
41
- k_n = k == :g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize
41
+ k_n = k.to_s.unmiga_name.sub(/^./, &:upcase)
42
42
  cli.puts "#{k_n}: #{v.is_a?(Array) ? v.join(' ') : v}"
43
43
  end
44
44
  else
@@ -169,6 +169,7 @@ module MiGA::Cli::Action::Wf
169
169
  '--tab', '--ref', '--active'
170
170
  ])
171
171
  end
172
+ call_cli(['browse', '-P', cli[:outdir]])
172
173
  end
173
174
 
174
175
  def cleanup
@@ -11,39 +11,40 @@ module MiGA::Cli::Base
11
11
  preproc_wf: 'Preprocess input genomes or metagenomes',
12
12
  index_wf: 'Generate distance indexing of input genomes',
13
13
  # Projects
14
- new: 'Creates an empty MiGA project',
15
- about: 'Displays information about a MiGA project',
16
- doctor: 'Performs consistency checks on a MiGA project',
17
- get_db: 'Downloads a pre-indexed database',
14
+ new: 'Create an empty MiGA project',
15
+ about: 'Display information about a MiGA project',
16
+ doctor: 'Perform consistency checks on a MiGA project',
17
+ get_db: 'Download a pre-indexed database',
18
+ browse: 'Explore a project locally using a web browser',
18
19
  # Datasets
19
- add: 'Creates a dataset in a MiGA project',
20
- get: 'Downloads a dataset from public databases into a MiGA project',
21
- ncbi_get: 'Downloads all genomes in a taxon from NCBI into a MiGA project',
22
- rm: 'Removes a dataset from an MiGA project',
23
- find: 'Finds unregistered datasets based on result files',
20
+ add: 'Create a dataset in a MiGA project',
21
+ get: 'Download a dataset from public databases into a MiGA project',
22
+ ncbi_get: 'Download all genomes in a taxon from NCBI into a MiGA project',
23
+ rm: 'Remove a dataset from an MiGA project',
24
+ find: 'Find unregistered datasets based on result files',
24
25
  ln: 'Link datasets (including results) from one project to another',
25
- ls: 'Lists all registered datasets in an MiGA project',
26
- archive: 'Generates a tar-ball with all files from select datasets',
26
+ ls: 'List all registered datasets in an MiGA project',
27
+ archive: 'Generate a tar-ball with all files from select datasets',
27
28
  # Results
28
- add_result: 'Registers a result',
29
- stats: 'Extracts statistics for the given result',
30
- files: 'Lists registered files from the results of a dataset or project',
31
- run: 'Executes locally one step analysis producing the given result',
32
- summary: 'Generates a summary table for the statistics of all datasets',
33
- next_step: 'Returns the next task to run in a dataset or project',
29
+ add_result: 'Register a result',
30
+ stats: 'Extract statistics for the given result',
31
+ files: 'List registered files from the results of a dataset or project',
32
+ run: 'Execute locally one step analysis producing the given result',
33
+ summary: 'Generate a summary table for the statistics of all datasets',
34
+ next_step: 'Return the next task to run in a dataset or project',
34
35
  # Objects (Datasets or Projects)
35
- edit: 'Edits the metadata of a dataset or project',
36
+ edit: 'Edit the metadata of a dataset or project',
36
37
  # System
37
38
  init: 'Initialize MiGA to process new projects',
38
- daemon: 'Controls the daemon of a MiGA project',
39
- lair: 'Controls groups of daemons for several MiGA projects',
40
- date: 'Returns the current date in standard MiGA format',
41
- console: 'Opens an IRB console with MiGA',
39
+ daemon: 'Control the daemon of a MiGA project',
40
+ lair: 'Control groups of daemons for several MiGA projects',
41
+ date: 'Return the current date in standard MiGA format',
42
+ console: 'Open an IRB console with MiGA',
42
43
  # Taxonomy
43
- tax_set: 'Registers taxonomic information for datasets',
44
- tax_test: 'Returns test of taxonomic distributions for query datasets',
45
- tax_index: 'Creates a taxonomy-indexed list of the datasets',
46
- tax_dist: 'Estimates distributions of distance by taxonomy',
44
+ tax_set: 'Register taxonomic information for datasets',
45
+ tax_test: 'Return test of taxonomic distributions for query datasets',
46
+ tax_index: 'Create a taxonomy-indexed list of the datasets',
47
+ tax_dist: 'Estimate distributions of distance by taxonomy',
47
48
  }
48
49
 
49
50
  @@TASK_ALIAS = {
@@ -68,15 +68,20 @@ module MiGA::Common::Format
68
68
  # a FastA or FastQ file (supports gzipped files). The +format+ must be a
69
69
  # Symbol, one of +:fasta+ or +:fastq+. Additional estimations can be
70
70
  # controlled via the +opts+ Hash. Supported options include:
71
- # - +:n50+: If true, it also returns the N50 and the median (in bp)
72
- # - +:gc+: If true, it also returns the G+C content (in %)
73
- # - +:x+: If true, it also returns the undetermined bases content (in %)
71
+ # - +:n50+: Include the N50 and the median (in bp)
72
+ # - +:gc+: Include the G+C content (in %)
73
+ # - +:x+: Include the undetermined bases content (in %)
74
+ # - +:skew+: Include G-C and A-T sequence skew (in %; forces gc: true).
75
+ # See definition used here in DOI:10.1177/117693430700300006
74
76
  def seqs_length(file, format, opts = {})
77
+ opts[:gc] = true if opts[:skew]
75
78
  fh = file =~ /\.gz/ ? Zlib::GzipReader.open(file) : File.open(file, 'r')
76
79
  l = []
77
80
  gc = 0
78
81
  xn = 0
79
- i = 0 # <- Zlib::GzipReader doesn't set `$.`
82
+ t = 0
83
+ c = 0
84
+ i = 0 # <- Zlib::GzipReader doesn't set `$.`
80
85
  fh.each_line do |ln|
81
86
  i += 1
82
87
  if (format == :fasta and ln =~ /^>/) or
@@ -86,6 +91,10 @@ module MiGA::Common::Format
86
91
  l[l.size - 1] += ln.chomp.size
87
92
  gc += ln.scan(/[GCgc]/).count if opts[:gc]
88
93
  xn += ln.scan(/[XNxn]/).count if opts[:x]
94
+ if opts[:skew]
95
+ t += ln.scan(/[Tt]/).count
96
+ c += ln.scan(/[Cc]/).count
97
+ end
89
98
  end
90
99
  end
91
100
  fh.close
@@ -97,6 +106,12 @@ module MiGA::Common::Format
97
106
  o[:sd] = Math.sqrt o[:var]
98
107
  o[:gc] = 100.0 * gc / o[:tot] if opts[:gc]
99
108
  o[:x] = 100.0 * xn / o[:tot] if opts[:x]
109
+ if opts[:skew]
110
+ at = o[:tot] - gc
111
+ o[:at_skew] = 100.0 * (2 * t - at) / at
112
+ o[:gc_skew] = 100.0 * (2 * c - gc) / gc
113
+ end
114
+
100
115
  if opts[:n50]
101
116
  l.sort!
102
117
  thr = o[:tot] / 2
@@ -132,9 +147,14 @@ class String
132
147
  end
133
148
 
134
149
  ##
135
- # Replace underscores by spaces or dots (depending on context).
150
+ # Replace underscores by spaces or other symbols depending on context
136
151
  def unmiga_name
137
- gsub(/_(str|sp|subsp|pv)__/, '_\\1._').tr('_', ' ')
152
+ gsub(/_(str|sp|subsp|pv)__/, '_\\1._')
153
+ .gsub(/g_c_(content)/, 'G+C \\1')
154
+ .gsub(/g_c_(skew)/, 'G-C \\1')
155
+ .gsub(/a_t_(skew)/, 'A-T \\1')
156
+ .gsub(/x_content/, &:capitalize)
157
+ .tr('_', ' ')
138
158
  end
139
159
 
140
160
  ##
@@ -185,7 +185,7 @@ class MiGA::Daemon < MiGA::MiGA
185
185
  return if project.dataset_names.empty?
186
186
 
187
187
  # Double-check if all datasets are ready
188
- return unless project.done_preprocessing?(false)
188
+ return unless project.done_preprocessing?
189
189
 
190
190
  # Queue project-level job
191
191
  to_run = project.next_task(nil, false)
@@ -66,7 +66,7 @@ module MiGA::Dataset::Base
66
66
  @@PREPROCESSING_TASKS = [
67
67
  :raw_reads, :trimmed_reads, :read_quality, :trimmed_fasta,
68
68
  :assembly, :cds, :essential_genes, :ssu, :mytaxa, :mytaxa_scan,
69
- :distances, :taxonomy, :stats
69
+ :taxonomy, :distances, :stats
70
70
  ]
71
71
 
72
72
  ##
@@ -77,7 +77,7 @@ module MiGA::Dataset::Base
77
77
  ##
78
78
  # Tasks to be executed only in datasets that are not multi-organism. These
79
79
  # tasks are ignored for multi-organism datasets or for unknown types.
80
- @@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances, :taxonomy]
80
+ @@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :taxonomy, :distances]
81
81
  @@_ONLY_NONMULTI_TASKS_H = Hash[@@ONLY_NONMULTI_TASKS.map { |i| [i, true] }]
82
82
 
83
83
  ##
@@ -26,15 +26,24 @@ module MiGA::Dataset::Result
26
26
  # The values are symbols:
27
27
  # - empty: the dataset has no data
28
28
  # - inactive: the dataset is inactive
29
+ # - upstream: the task is upstream from dataset's input
29
30
  # - force: forced to ignore by metadata
30
31
  # - project: incompatible project
31
32
  # - noref: incompatible dataset, only for reference
32
33
  # - multi: incompatible dataset, only for multi
33
34
  # - nonmulti: incompatible dataset, only for nonmulti
35
+ # - complete: the task is already complete
34
36
  # - execute: do not ignore, execute the task
35
37
  def why_ignore(task)
36
- if !active?
38
+ if !get_result(task).nil?
39
+ :complete
40
+ elsif !active?
37
41
  :inactive
42
+ elsif first_preprocessing.nil?
43
+ :empty
44
+ elsif @@PREPROCESSING_TASKS.index(task) <
45
+ @@PREPROCESSING_TASKS.index(first_preprocessing)
46
+ :upstream
38
47
  elsif !metadata["run_#{task}"].nil?
39
48
  metadata["run_#{task}"] ? :execute : :force
40
49
  elsif task == :taxonomy && project.metadata[:ref_project].nil?
@@ -56,7 +65,7 @@ module MiGA::Dataset::Result
56
65
  # initial input. Passes +save+ to #add_result.
57
66
  def first_preprocessing(save = false)
58
67
  @first_processing ||= @@PREPROCESSING_TASKS.find do |t|
59
- !ignore_task?(t) && !add_result(t, save).nil?
68
+ !add_result(t, save).nil?
60
69
  end
61
70
  end
62
71
 
@@ -121,17 +130,12 @@ module MiGA::Dataset::Result
121
130
  # - complete: a task with registered results
122
131
  # - pending: a task queued to be performed
123
132
  def result_status(task)
124
- if first_preprocessing.nil?
125
- :ignore_empty
126
- elsif !get_result(task).nil?
127
- :complete
128
- elsif @@PREPROCESSING_TASKS.index(task) <
129
- @@PREPROCESSING_TASKS.index(first_preprocessing)
130
- :-
131
- elsif ignore_task?(task)
132
- :"ignore_#{why_ignore task}"
133
- else
134
- :pending
133
+ reason = why_ignore(task)
134
+ case reason
135
+ when :upstream; :-
136
+ when :execute; :pending
137
+ when :complete; :complete
138
+ else; :"ignore_#{reason}"
135
139
  end
136
140
  end
137
141
 
@@ -111,7 +111,7 @@ class MiGA::Lair < MiGA::MiGA
111
111
 
112
112
  yield(project)
113
113
  elsif Dir.exist? f
114
- each_project(f) { |project| yield(project) }
114
+ each_project(f) { |p| yield(p) }
115
115
  end
116
116
  end
117
117
  end
@@ -134,12 +134,10 @@ module MiGA::Project::Dataset
134
134
  ##
135
135
  # Are all the datasets in the project preprocessed? Save intermediate results
136
136
  # if +save+ (until the first incomplete dataset is reached).
137
- def done_preprocessing?(save = true)
138
- dataset_names.each do |dn|
139
- ds = dataset(dn)
140
- return false if ds.is_ref? and not ds.done_preprocessing?(save)
137
+ def done_preprocessing?(save = false)
138
+ !each_dataset.any? do |d|
139
+ d.ref? && d.active? && !d.done_preprocessing?(save)
141
140
  end
142
- true
143
141
  end
144
142
 
145
143
  ##
@@ -81,20 +81,22 @@ class MiGA::Result < MiGA::MiGA
81
81
  end
82
82
 
83
83
  ##
84
- # Directory containing the result
85
- def dir
86
- File.dirname(path)
84
+ # Directory containing the result; by default an absolute path, if
85
+ # +relative+ is true returns the path relative to the parent project
86
+ def dir(relative = false)
87
+ relative ? relative_dir : File.dirname(path)
87
88
  end
88
89
 
89
90
  ##
90
- # Absolute path to the file(s) defined by symbol +k+
91
- def file_path(k)
91
+ # Absolute path to the file(s) defined by symbol +k+, or relative
92
+ # path if +relative+ is true
93
+ def file_path(k, relative = false)
92
94
  k = k.to_sym
93
95
  f = self[:files].nil? ? nil : self[:files][k]
94
96
  return nil if f.nil?
95
- return File.expand_path(f, dir) unless f.is_a? Array
97
+ return File.join(dir(relative), f) unless f.is_a? Array
96
98
 
97
- f.map { |fi| File.expand_path(fi, dir) }
99
+ f.map { |fi| File.join(dir(relative), fi) }
98
100
  end
99
101
 
100
102
  ##
@@ -17,32 +17,45 @@ module MiGA::Result::Stats
17
17
  self[:stats]
18
18
  end
19
19
 
20
+ ##
21
+ # Access the stats entry of results
22
+ def stats
23
+ self[:stats]
24
+ end
25
+
20
26
  private
21
27
 
22
28
  def compute_stats_raw_reads
23
29
  stats = {}
30
+ seq_opts = { gc: true, x: true, skew: true }
24
31
  if self[:files][:pair1].nil?
25
- s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, gc: true, x: true)
32
+ s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, seq_opts)
26
33
  stats = {
27
34
  reads: s[:n],
28
35
  length_average: [s[:avg], 'bp'],
29
36
  length_standard_deviation: [s[:sd], 'bp'],
30
37
  g_c_content: [s[:gc], '%'],
31
- x_content: [s[:x], '%']
38
+ x_content: [s[:x], '%'],
39
+ g_c_skew: [s[:gc_skew], '%'],
40
+ a_t_skew: [s[:at_skew], '%']
32
41
  }
33
42
  else
34
- s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, gc: true, x: true)
35
- s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, gc: true, x: true)
43
+ s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, seq_opts)
44
+ s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, seq_opts)
36
45
  stats = {
37
46
  read_pairs: s1[:n],
38
47
  forward_length_average: [s1[:avg], 'bp'],
39
48
  forward_length_standard_deviation: [s1[:sd], 'bp'],
40
49
  forward_g_c_content: [s1[:gc], '%'],
41
50
  forward_x_content: [s1[:x], '%'],
51
+ forward_g_c_skew: [s1[:gc_skew], '%'],
52
+ forward_a_t_skew: [s1[:at_skew], '%'],
42
53
  reverse_length_average: [s2[:avg], 'bp'],
43
54
  reverse_length_standard_deviation: [s2[:sd], 'bp'],
44
55
  reverse_g_c_content: [s2[:gc], '%'],
45
- reverse_x_content: [s2[:x], '%']
56
+ reverse_x_content: [s2[:x], '%'],
57
+ reverse_g_c_skew: [s2[:gc_skew], '%'],
58
+ reverse_a_t_skew: [s2[:at_skew], '%']
46
59
  }
47
60
  end
48
61
  stats
@@ -50,19 +63,22 @@ module MiGA::Result::Stats
50
63
 
51
64
  def compute_stats_trimmed_fasta
52
65
  f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
53
- s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true)
66
+ s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true, skew: true)
54
67
  {
55
68
  reads: s[:n],
56
69
  length_average: [s[:avg], 'bp'],
57
70
  length_standard_deviation: [s[:sd], 'bp'],
58
71
  g_c_content: [s[:gc], '%'],
59
- x_content: [s[:x], '%']
72
+ x_content: [s[:x], '%'],
73
+ g_c_skew: [s[:gc_skew], '%'],
74
+ a_t_skew: [s[:at_skew], '%']
60
75
  }
61
76
  end
62
77
 
63
78
  def compute_stats_assembly
64
79
  s = MiGA::MiGA.seqs_length(
65
- file_path(:largecontigs), :fasta, n50: true, gc: true, x: true
80
+ file_path(:largecontigs), :fasta,
81
+ n50: true, gc: true, x: true, skew: true
66
82
  )
67
83
  {
68
84
  contigs: s[:n],
@@ -70,7 +86,9 @@ module MiGA::Result::Stats
70
86
  total_length: [s[:tot], 'bp'],
71
87
  longest_sequence: [s[:max], 'bp'],
72
88
  g_c_content: [s[:gc], '%'],
73
- x_content: [s[:x], '%']
89
+ x_content: [s[:x], '%'],
90
+ g_c_skew: [s[:gc_skew], '%'],
91
+ a_t_skew: [s[:at_skew], '%']
74
92
  }
75
93
  end
76
94
 
@@ -133,9 +151,9 @@ module MiGA::Result::Stats
133
151
  source.save
134
152
 
135
153
  # Inactivate low-quality datasets
136
- min_qual = (project.metadata[:min_qual] || 50)
154
+ min_qual = (project.metadata[:min_qual] || 25)
137
155
  if min_qual != 'no' && stats[:quality] < min_qual
138
- source.inactivate! 'Low genome quality'
156
+ source.inactivate! 'Low quality genome'
139
157
  end
140
158
  end
141
159
  stats
@@ -8,7 +8,7 @@ module MiGA
8
8
  # - Float representing the major.minor version.
9
9
  # - Integer representing gem releases of the current version.
10
10
  # - Integer representing minor changes that require new version number.
11
- VERSION = [0.7, 10, 0]
11
+ VERSION = [0.7, 12, 2]
12
12
 
13
13
  ##
14
14
  # Nickname for the current major.minor version.
@@ -16,7 +16,7 @@ module MiGA
16
16
 
17
17
  ##
18
18
  # Date of the current gem release.
19
- VERSION_DATE = Date.new(2020, 6, 29)
19
+ VERSION_DATE = Date.new(2020, 7, 24)
20
20
 
21
21
  ##
22
22
  # Reference of MiGA.
@@ -20,7 +20,6 @@ fi
20
20
  TYPE=$(miga ls -P "$PROJECT" -D "$DATASET" -m type | cut -f 2)
21
21
  case "$TYPE" in
22
22
  metagenome|virome)
23
- $CMD -p meta
24
23
  prodigal -a "${DATASET}.faa" -d "${DATASET}.fna" -o "${DATASET}.gff3" \
25
24
  -f gff -q -i "../05.assembly/${DATASET}.LargeContigs.fna" -p meta
26
25
  ;;
@@ -108,7 +108,7 @@ class ProjectTest < Test::Unit::TestCase
108
108
  d1 = p1.add_dataset('BAH')
109
109
  assert_not_predicate(p1, :done_preprocessing?)
110
110
  FileUtils.touch(File.join(p1.path, 'data', '90.stats', "#{d1.name}.done"))
111
- assert_predicate(p1, :done_preprocessing?)
111
+ assert { p1.done_preprocessing? true }
112
112
  assert_nil(p1.next_inclade)
113
113
  p1.metadata[:type] = :clade
114
114
  assert_equal(:subclades, p1.next_inclade)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.10.0
4
+ version: 0.7.12.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-29 00:00:00.000000000 Z
11
+ date: 2020-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -118,6 +118,16 @@ files:
118
118
  - lib/miga/cli/action/add.rb
119
119
  - lib/miga/cli/action/add_result.rb
120
120
  - lib/miga/cli/action/archive.rb
121
+ - lib/miga/cli/action/browse.rb
122
+ - lib/miga/cli/action/browse/about.html
123
+ - lib/miga/cli/action/browse/dataset.html
124
+ - lib/miga/cli/action/browse/dataset_menu_item.html
125
+ - lib/miga/cli/action/browse/datasets.html
126
+ - lib/miga/cli/action/browse/favicon-32.png
127
+ - lib/miga/cli/action/browse/index.html
128
+ - lib/miga/cli/action/browse/layout.html
129
+ - lib/miga/cli/action/browse/redirect.html
130
+ - lib/miga/cli/action/browse/style.css
121
131
  - lib/miga/cli/action/classify_wf.rb
122
132
  - lib/miga/cli/action/console.rb
123
133
  - lib/miga/cli/action/daemon.rb