miga-base 0.7.10.0 → 0.7.12.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 30d61eb5847a28c4d83a43e6e47ff0738bb819218e677b9aa43de158b441e0ae
4
- data.tar.gz: 46338ae15353b71fb6e7eff390c8bb976d1c11fc296bb83f33aab8ffba1a3fa8
3
+ metadata.gz: c57b841cc84841ebfd868af862204179e443d37eb4ec5550ea9ef7df63b9d933
4
+ data.tar.gz: 5512fe8305c2b6193f5d1a5c99e663ef480be32db2e8bdc7087826bdeeae5b35
5
5
  SHA512:
6
- metadata.gz: 488e7888039bb9e08e7c257fdd7cb0cf34340766f73b9b46c28b332072f5f207f5dcfb0df08c98b27f5640158d3a14ae6d0f5ab19d78f3e9e1ef44e381d34e74
7
- data.tar.gz: 992cfc225eeb2f4b8017260e66c67bc9df39a79757ecdb65e01766079e103385259c1eeb447fdc5e6b769990f96256d384dc4adddf184693a3bc94b8323b9a5a
6
+ metadata.gz: 63e095dded680c2a6d1519d8fc8788d4d44cf523fc84b9e67ee453dffb9f05c75e2acafc283b511f6b27638e678d53f7362b056f35d18d5b08893419c7d24302
7
+ data.tar.gz: efac7f7c3d65640b64dc6d9b32e3bd520faebda05a72f1b88a2ee4c11546b5366db1ac42ea296ba73fe1f7873244a9f64791a180c9094a374a7ebbf1c589d491
@@ -0,0 +1,214 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'miga/cli/action'
4
+
5
+ # Action: miga browse
6
+ class MiGA::Cli::Action::Browse < MiGA::Cli::Action
7
+ def parse_cli
8
+ cli.parse do |opt|
9
+ cli.defaults = { open: true }
10
+ cli.opt_object(opt, [:project])
11
+ end
12
+ end
13
+
14
+ def perform
15
+ p = cli.load_project
16
+ create_empty_page(p)
17
+ generate_project_page(p)
18
+ say 'Creating dataset pages'
19
+ cli.load_project.each_dataset do |d|
20
+ generate_dataset_page(p, d)
21
+ end
22
+ generate_datasets_index(p)
23
+ say "Open in your browser: #{File.join(p.path, 'index.html')}"
24
+ end
25
+
26
+ private
27
+
28
+ ##
29
+ # Create an empty page with necessary assets for project +p+
30
+ def create_empty_page(p)
31
+ say 'Creating project page'
32
+ FileUtils.mkdir_p(browse_file(p, '.'))
33
+ %w[favicon-32.png style.css].each do |i|
34
+ FileUtils.cp(template_file(i), browse_file(p, i))
35
+ end
36
+ write_file(p, 'about.html') do
37
+ build_from_template('about.html', citation: MiGA::MiGA.CITATION)
38
+ end
39
+ end
40
+
41
+ ##
42
+ # Create landing page for project +p+
43
+ def generate_project_page(p)
44
+ # Redirect page
45
+ write_file(p, '../index.html') { build_from_template('redirect.html') }
46
+
47
+ # Summaries
48
+ summaries = Dir["#{p.path}/*.tsv"].map do |i|
49
+ b = File.basename(i)
50
+ "<li><a href='../#{b}'>#{b}</a></li>"
51
+ end.join('')
52
+
53
+ # Project index page
54
+ data = {
55
+ project_active: 'active',
56
+ information: format_metadata(p),
57
+ summaries: summaries.empty? ? 'None' : "<ul>#{summaries}</ul>",
58
+ results: format_results(p)
59
+ }
60
+ write_file(p, 'index.html') { build_from_template('index.html', data) }
61
+ end
62
+
63
+ ##
64
+ # Create page for dataset +d+ within project +p+
65
+ def generate_dataset_page(p, d)
66
+ data = {
67
+ unmiga_name: d.name.unmiga_name,
68
+ information: format_metadata(d),
69
+ results: format_results(d)
70
+ }
71
+ write_file(p, "d_#{d.name}.html") do
72
+ build_from_template('dataset.html', data)
73
+ end
74
+ end
75
+
76
+ ##
77
+ # Create pages for reference and query dataset indexes
78
+ def generate_datasets_index(p)
79
+ say 'Creating index pages'
80
+ data = format_dataset_index(p)
81
+ data.each do |k, v|
82
+ write_file(p, "#{k}_datasets.html") do
83
+ v[:list] = 'None' if v[:list] == ''
84
+ build_from_template(
85
+ 'datasets.html',
86
+ v.merge(:"#{k}_datasets_active" => 'active')
87
+ )
88
+ end
89
+ end
90
+ end
91
+
92
+ def format_dataset_index(p)
93
+ data = {
94
+ ref: { type_name: 'Reference', list: '' },
95
+ qry: { type_name: 'Query', list: '' }
96
+ }
97
+ p.each_dataset do |d|
98
+ data[d.ref? ? :ref : :qry][:list] +=
99
+ "<li><a href='d_#{d.name}.html'>#{d.name.unmiga_name}</a></li>"
100
+ end
101
+ data
102
+ end
103
+
104
+ ##
105
+ # Format +obj+ metadata as a table
106
+ def format_metadata(obj)
107
+ '<table class="table table-sm table-responsive">' +
108
+ obj.metadata.data.map do |k, v|
109
+ case k
110
+ when /^run_/, :plugins, :user
111
+ next
112
+ when :web_assembly_gz
113
+ v = "<a href='#{v}'>#{v[0..50]}...</a>"
114
+ when :datasets
115
+ v = v.size
116
+ end
117
+ "<tr><td class='text-right pr-4'><b>#{format_name(k)}</b></td>" \
118
+ "<td>#{v}</td></tr>"
119
+ end.compact.join('') +
120
+ '</table>'
121
+ end
122
+
123
+ ##
124
+ # Format +obj+ results as cards
125
+ def format_results(obj)
126
+ o = ''
127
+ obj.each_result do |key, res|
128
+ links = format_result_links(res)
129
+ stats = format_result_stats(res)
130
+ next unless links || stats
131
+ name = format_name(key)
132
+ url_doc =
133
+ 'http://manual.microbial-genomes.org/part5/workflow#' +
134
+ key.to_s.tr('_', '-')
135
+ o += <<~CARD
136
+ <div class="col-md-6 mb-4">
137
+ <h3>#{name}</h3>
138
+ <div class='border-left p-3'>
139
+ #{stats}
140
+ #{links}
141
+ </div>
142
+ <div class='border-top p-2 bg-light'>
143
+ <a target=_blank href="#{url_doc}" class='p-2'>Learn more</a>
144
+ </div>
145
+ </div>
146
+ CARD
147
+ end
148
+ "<div class='row'>#{o}</div>"
149
+ end
150
+
151
+ def format_name(str)
152
+ str
153
+ .to_s.unmiga_name
154
+ .sub(/^./, &:upcase)
155
+ .gsub(/(Aai|Ani|Ogs|Cds|Ssu| db$| ssu )/, &:upcase)
156
+ .sub(/Haai/, 'hAAI')
157
+ .sub(/Mytaxa/, 'MyTaxa')
158
+ .sub(/ pvalue$/, ' p-value')
159
+ .sub(/contigs$/, 'Contigs')
160
+ end
161
+
162
+ def format_result_links(res)
163
+ links = []
164
+ res.each_file do |key, _|
165
+ name = format_name(key)
166
+ links << "<a href='../#{res.file_path(key, true)}'>#{name}</a><br/>"
167
+ end
168
+ links.empty? ? nil : links.join('')
169
+ end
170
+
171
+ def format_result_stats(res)
172
+ res.stats.map do |k, v|
173
+ v = [v, ''] unless v.is_a? Array
174
+ v[0] = ('%.3g' % v[0]) if v[0].is_a? Float
175
+ "<b>#{format_name(k)}:</b> #{v[0]}#{v[1]}<br/>"
176
+ end.join('') + '<br/>' unless res.stats.empty?
177
+ end
178
+
179
+ ##
180
+ # Write +file+ within the browse folder of project +p+ using the passed
181
+ # block output as content
182
+ def write_file(p, file)
183
+ File.open(browse_file(p, file), 'w') { |fh| fh.print yield }
184
+ end
185
+
186
+ ##
187
+ # Use a +template+ file to generate content with a hash of +data+ over the
188
+ # layout page if +layout+ is true
189
+ def build_from_template(template, data = {}, layout = true)
190
+ cont = File.read(template_file(template)).miga_variables(data)
191
+ return cont unless layout
192
+
193
+ build_from_template(
194
+ 'layout.html',
195
+ data.merge(content: cont, project_name: cli.load_project.name),
196
+ false
197
+ )
198
+ end
199
+
200
+ ##
201
+ # Path to the template browse file
202
+ def template_file(file)
203
+ File.join(
204
+ MiGA::MiGA.root_path,
205
+ 'lib', 'miga', 'cli', 'action', 'browse', file
206
+ )
207
+ end
208
+
209
+ ##
210
+ # Path to the browse file in the project
211
+ def browse_file(p, file)
212
+ File.join(p.path, 'browse', file)
213
+ end
214
+ end
@@ -0,0 +1,31 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">About MiGA</h1>
2
+ <p>
3
+ MiGA is developed and maintained by
4
+ <a href='https://rodriguez-r.com/'>Luis M. Rodriguez-R</a>.
5
+
6
+ The MiGA codebase is
7
+ <a href='http://code.microbial-genomes.org/miga'>freely available</a> under the
8
+ terms of the terms of the
9
+ <a href='http://code.microbial-genomes.org/miga/blob/master/LICENSE'>Artistic License 2.0</a>.
10
+ </p>
11
+
12
+ <p>
13
+ MiGA is the result of a collaboration between the
14
+ <a href='http://enve-omics.gatech.edu/'>Kostas Lab</a>
15
+ (<a href='http://www.gatech.edu/'>Georgia Institute of Technology</a>) and the
16
+ <a href='http://rdp.cme.msu.edu/'>RDP team</a>
17
+ (<a href='http://cme.msu.edu/'>Center for Microbial Ecology</a>,
18
+ <a href='https://msu.edu/'>Michigan State University</a>).
19
+ The MiGA project is funded by the
20
+ <a href='http://nsf.gov/'>US National Science Foundation</a>
21
+ (Awards <a href='http://nsf.gov/awardsearch/showAward?AWD_ID=1356288'>#1356288</a> &amp;
22
+ <a href='https://xras.xsede.org/public/requests/31162-XSEDE-MCB190042-1190572'>#MCB190042</a>).
23
+ </p>
24
+
25
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Citation</h1>
26
+ If you use MiGA in your work, consider citing:
27
+ <blockquote class='border-left p-3'>
28
+ {{citation}}
29
+ </blockquote>
30
+
31
+
@@ -0,0 +1,5 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">{{unmiga_name}}</h1>
2
+ {{information}}
3
+
4
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Results</h1>
5
+ {{results}}
@@ -0,0 +1,3 @@
1
+ <li class="nav-item">
2
+ <a class="nav-link" href="ds_{{name}}.html">{{unmiga_name}}</a>
3
+ </li>
@@ -0,0 +1,4 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">{{type_name}} Datasets</h1>
2
+ <ul>
3
+ {{list}}
4
+ </ul>
@@ -0,0 +1,8 @@
1
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Project Information</h1>
2
+ {{information}}
3
+
4
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Summaries</h1>
5
+ {{summaries}}
6
+
7
+ <h1 class="h2 border-bottom pt-3 pb-2 mb-3">Project Results</h1>
8
+ {{results}}
@@ -0,0 +1,57 @@
1
+ <!doctype html>
2
+ <head>
3
+ <meta charset="utf-8">
4
+ <title>MiGA | {{project_name}}</title>
5
+
6
+ <!-- Remote assets -->
7
+ <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/css/bootstrap.min.css" integrity="sha384-9aIt2nRpC12Uk9gS9baDl411NQApFmC26EwAOH8WgZl5MYYxFfc+NcPb1dKGj7Sk" crossorigin="anonymous">
8
+ <script src="https://code.jquery.com/jquery-3.5.1.slim.min.js" integrity="sha384-DfXdz2htPH0lsSSs5nCTpuj/zy4C+OGpamoFVy38MVBnE+IbbVYUew+OrCXaRkfj" crossorigin="anonymous"></script>
9
+ <script src="https://cdn.jsdelivr.net/npm/popper.js@1.16.0/dist/umd/popper.min.js" integrity="sha384-Q6E9RHvbIyZFJoft+2mJbHaEWldlvI9IOYy5n3zV9zzTtmI3UksdQRVvoxMfooAo" crossorigin="anonymous"></script>
10
+ <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/js/bootstrap.min.js" integrity="sha384-OgVRvuATP1z7JjHLkuOU7Xw704+h835Lr+6QL9UvYjZE3Ipu6Tp75j7Bh/kR0JKI" crossorigin="anonymous"></script>
11
+
12
+ <!-- Local assets -->
13
+ <link href="style.css" rel="stylesheet">
14
+ <link rel="icon" href="favicon-32.png" sizes="32x32" type="image/png">
15
+ </head>
16
+ <body>
17
+ <nav class="navbar navbar-dark sticky-top bg-dark flex-md-nowrap p-0 shadow">
18
+ <a class="navbar-brand col-md-12 col-lg-12 mr-0 px-3"
19
+ href="index.html">MiGA | {{project_name}}</a>
20
+ <button class="navbar-toggler position-absolute d-md-none collapsed"
21
+ type="button" data-toggle="collapse" data-target="#sidebarMenu"
22
+ aria-controls="sidebarMenu" aria-expanded="false"
23
+ aria-label="Toggle navigation">
24
+ <span class="navbar-toggler-icon"></span>
25
+ </button>
26
+ </nav>
27
+ <div class="container-fluid">
28
+ <div class="row">
29
+ <nav id="sidebarMenu" class="col-md-3 col-lg-2 d-md-block bg-light sidebar collapse">
30
+ <div class="sidebar-sticky pt-3">
31
+ <ul class="nav flex-column">
32
+ <li class="nav-item">
33
+ <a class="nav-link {{project_active}}" href="index.html">Project</a>
34
+ </li>
35
+ <li class="nav-item">
36
+ <a class="nav-link {{ref_datasets_active}}"
37
+ href="ref_datasets.html">Reference datasets</a>
38
+ </li>
39
+ <li class="nav-item">
40
+ <a class="nav-link {{qry_datasets_active}}"
41
+ href="qry_datasets.html">Query datasets</a>
42
+ </li>
43
+ <li class="nav-item border-top mt-4">
44
+ <a class="nav-link {{about_miga_active}}"
45
+ href="about.html">About MiGA</a>
46
+ </li>
47
+ </ul>
48
+ </div>
49
+ </nav>
50
+
51
+ <main role="main" class="col-md-9 ml-sm-auto col-lg-10 px-md-4">
52
+ {{content}}
53
+ </main>
54
+ </div>
55
+ </div>
56
+ </body>
57
+
@@ -0,0 +1,11 @@
1
+ <!doctype html>
2
+ <head>
3
+ <title>MiGA Project</title>
4
+ <meta http-equiv = "refresh" content = "1; url = browse/index.html" />
5
+ </head>
6
+ <body>
7
+ <div style='font-size:200%; margin-top: 5em; text-align: center;'>
8
+ Redirecting to <a href='browse/index.html'>Project page</a>...
9
+ </div>
10
+ </body>
11
+
@@ -0,0 +1,97 @@
1
+ body {
2
+ font-size: .875rem;
3
+ }
4
+
5
+ /*
6
+ * Sidebar
7
+ */
8
+
9
+ .sidebar {
10
+ position: fixed;
11
+ top: 0;
12
+ bottom: 0;
13
+ left: 0;
14
+ z-index: 100; /* Behind the navbar */
15
+ padding: 48px 0 0; /* Height of navbar */
16
+ box-shadow: inset -1px 0 0 rgba(0, 0, 0, .1);
17
+ }
18
+
19
+ @media (max-width: 767.98px) {
20
+ .sidebar {
21
+ top: 3rem;
22
+ }
23
+ }
24
+
25
+ .sidebar-sticky {
26
+ position: relative;
27
+ top: 0;
28
+ height: calc(100vh - 48px);
29
+ padding-top: .5rem;
30
+ overflow-x: hidden;
31
+ overflow-y: auto; /* Scrollable contents if viewport is shorter than content. */
32
+ }
33
+
34
+ @supports ((position: -webkit-sticky) or (position: sticky)) {
35
+ .sidebar-sticky {
36
+ position: -webkit-sticky;
37
+ position: sticky;
38
+ }
39
+ }
40
+
41
+ .sidebar .nav-link {
42
+ font-weight: 500;
43
+ color: #333;
44
+ }
45
+
46
+ .sidebar .nav-link .feather {
47
+ margin-right: 4px;
48
+ color: #999;
49
+ }
50
+
51
+ .sidebar .nav-link.active {
52
+ color: #007bff;
53
+ }
54
+
55
+ .sidebar .nav-link:hover .feather,
56
+ .sidebar .nav-link.active .feather {
57
+ color: inherit;
58
+ }
59
+
60
+ .sidebar-heading {
61
+ font-size: .75rem;
62
+ text-transform: uppercase;
63
+ }
64
+
65
+ /*
66
+ * Navbar
67
+ */
68
+
69
+ .navbar-brand {
70
+ padding-top: .75rem;
71
+ padding-bottom: .75rem;
72
+ font-size: 1rem;
73
+ background-color: rgba(0, 0, 0, .25);
74
+ box-shadow: inset -1px 0 0 rgba(0, 0, 0, .25);
75
+ }
76
+
77
+ .navbar .navbar-toggler {
78
+ top: .25rem;
79
+ right: 1rem;
80
+ }
81
+
82
+ .navbar .form-control {
83
+ padding: .75rem 1rem;
84
+ border-width: 0;
85
+ border-radius: 0;
86
+ }
87
+
88
+ .form-control-dark {
89
+ color: #fff;
90
+ background-color: rgba(255, 255, 255, .1);
91
+ border-color: rgba(255, 255, 255, .1);
92
+ }
93
+
94
+ .form-control-dark:focus {
95
+ border-color: transparent;
96
+ box-shadow: 0 0 0 3px rgba(255, 255, 255, .25);
97
+ }
@@ -19,6 +19,10 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
19
19
  'Use Average Amino Acid Identity (AAI) as genome similarity metric',
20
20
  'By default: Use Average Nucleotide Identity (ANI)'
21
21
  ) { cli[:metric] = :aai }
22
+ opt.on(
23
+ '--ani',
24
+ 'Use Average Nucleotide Identity (ANI) as similarity metric (default)'
25
+ ) { cli[:metric] = :ani }
22
26
  opt.on(
23
27
  '--threshold FLOAT', Float,
24
28
  "Metric threshold (%) to dereplicate. By default: #{cli[:threshold]}"
@@ -38,7 +38,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
38
38
  end
39
39
  if cli[:key].nil?
40
40
  r[:stats].each do |k, v|
41
- k_n = k == :g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize
41
+ k_n = k.to_s.unmiga_name.sub(/^./, &:upcase)
42
42
  cli.puts "#{k_n}: #{v.is_a?(Array) ? v.join(' ') : v}"
43
43
  end
44
44
  else
@@ -169,6 +169,7 @@ module MiGA::Cli::Action::Wf
169
169
  '--tab', '--ref', '--active'
170
170
  ])
171
171
  end
172
+ call_cli(['browse', '-P', cli[:outdir]])
172
173
  end
173
174
 
174
175
  def cleanup
@@ -11,39 +11,40 @@ module MiGA::Cli::Base
11
11
  preproc_wf: 'Preprocess input genomes or metagenomes',
12
12
  index_wf: 'Generate distance indexing of input genomes',
13
13
  # Projects
14
- new: 'Creates an empty MiGA project',
15
- about: 'Displays information about a MiGA project',
16
- doctor: 'Performs consistency checks on a MiGA project',
17
- get_db: 'Downloads a pre-indexed database',
14
+ new: 'Create an empty MiGA project',
15
+ about: 'Display information about a MiGA project',
16
+ doctor: 'Perform consistency checks on a MiGA project',
17
+ get_db: 'Download a pre-indexed database',
18
+ browse: 'Explore a project locally using a web browser',
18
19
  # Datasets
19
- add: 'Creates a dataset in a MiGA project',
20
- get: 'Downloads a dataset from public databases into a MiGA project',
21
- ncbi_get: 'Downloads all genomes in a taxon from NCBI into a MiGA project',
22
- rm: 'Removes a dataset from an MiGA project',
23
- find: 'Finds unregistered datasets based on result files',
20
+ add: 'Create a dataset in a MiGA project',
21
+ get: 'Download a dataset from public databases into a MiGA project',
22
+ ncbi_get: 'Download all genomes in a taxon from NCBI into a MiGA project',
23
+ rm: 'Remove a dataset from an MiGA project',
24
+ find: 'Find unregistered datasets based on result files',
24
25
  ln: 'Link datasets (including results) from one project to another',
25
- ls: 'Lists all registered datasets in an MiGA project',
26
- archive: 'Generates a tar-ball with all files from select datasets',
26
+ ls: 'List all registered datasets in an MiGA project',
27
+ archive: 'Generate a tar-ball with all files from select datasets',
27
28
  # Results
28
- add_result: 'Registers a result',
29
- stats: 'Extracts statistics for the given result',
30
- files: 'Lists registered files from the results of a dataset or project',
31
- run: 'Executes locally one step analysis producing the given result',
32
- summary: 'Generates a summary table for the statistics of all datasets',
33
- next_step: 'Returns the next task to run in a dataset or project',
29
+ add_result: 'Register a result',
30
+ stats: 'Extract statistics for the given result',
31
+ files: 'List registered files from the results of a dataset or project',
32
+ run: 'Execute locally one step analysis producing the given result',
33
+ summary: 'Generate a summary table for the statistics of all datasets',
34
+ next_step: 'Return the next task to run in a dataset or project',
34
35
  # Objects (Datasets or Projects)
35
- edit: 'Edits the metadata of a dataset or project',
36
+ edit: 'Edit the metadata of a dataset or project',
36
37
  # System
37
38
  init: 'Initialize MiGA to process new projects',
38
- daemon: 'Controls the daemon of a MiGA project',
39
- lair: 'Controls groups of daemons for several MiGA projects',
40
- date: 'Returns the current date in standard MiGA format',
41
- console: 'Opens an IRB console with MiGA',
39
+ daemon: 'Control the daemon of a MiGA project',
40
+ lair: 'Control groups of daemons for several MiGA projects',
41
+ date: 'Return the current date in standard MiGA format',
42
+ console: 'Open an IRB console with MiGA',
42
43
  # Taxonomy
43
- tax_set: 'Registers taxonomic information for datasets',
44
- tax_test: 'Returns test of taxonomic distributions for query datasets',
45
- tax_index: 'Creates a taxonomy-indexed list of the datasets',
46
- tax_dist: 'Estimates distributions of distance by taxonomy',
44
+ tax_set: 'Register taxonomic information for datasets',
45
+ tax_test: 'Return test of taxonomic distributions for query datasets',
46
+ tax_index: 'Create a taxonomy-indexed list of the datasets',
47
+ tax_dist: 'Estimate distributions of distance by taxonomy',
47
48
  }
48
49
 
49
50
  @@TASK_ALIAS = {
@@ -68,15 +68,20 @@ module MiGA::Common::Format
68
68
  # a FastA or FastQ file (supports gzipped files). The +format+ must be a
69
69
  # Symbol, one of +:fasta+ or +:fastq+. Additional estimations can be
70
70
  # controlled via the +opts+ Hash. Supported options include:
71
- # - +:n50+: If true, it also returns the N50 and the median (in bp)
72
- # - +:gc+: If true, it also returns the G+C content (in %)
73
- # - +:x+: If true, it also returns the undetermined bases content (in %)
71
+ # - +:n50+: Include the N50 and the median (in bp)
72
+ # - +:gc+: Include the G+C content (in %)
73
+ # - +:x+: Include the undetermined bases content (in %)
74
+ # - +:skew+: Include G-C and A-T sequence skew (in %; forces gc: true).
75
+ # See definition used here in DOI:10.1177/117693430700300006
74
76
  def seqs_length(file, format, opts = {})
77
+ opts[:gc] = true if opts[:skew]
75
78
  fh = file =~ /\.gz/ ? Zlib::GzipReader.open(file) : File.open(file, 'r')
76
79
  l = []
77
80
  gc = 0
78
81
  xn = 0
79
- i = 0 # <- Zlib::GzipReader doesn't set `$.`
82
+ t = 0
83
+ c = 0
84
+ i = 0 # <- Zlib::GzipReader doesn't set `$.`
80
85
  fh.each_line do |ln|
81
86
  i += 1
82
87
  if (format == :fasta and ln =~ /^>/) or
@@ -86,6 +91,10 @@ module MiGA::Common::Format
86
91
  l[l.size - 1] += ln.chomp.size
87
92
  gc += ln.scan(/[GCgc]/).count if opts[:gc]
88
93
  xn += ln.scan(/[XNxn]/).count if opts[:x]
94
+ if opts[:skew]
95
+ t += ln.scan(/[Tt]/).count
96
+ c += ln.scan(/[Cc]/).count
97
+ end
89
98
  end
90
99
  end
91
100
  fh.close
@@ -97,6 +106,12 @@ module MiGA::Common::Format
97
106
  o[:sd] = Math.sqrt o[:var]
98
107
  o[:gc] = 100.0 * gc / o[:tot] if opts[:gc]
99
108
  o[:x] = 100.0 * xn / o[:tot] if opts[:x]
109
+ if opts[:skew]
110
+ at = o[:tot] - gc
111
+ o[:at_skew] = 100.0 * (2 * t - at) / at
112
+ o[:gc_skew] = 100.0 * (2 * c - gc) / gc
113
+ end
114
+
100
115
  if opts[:n50]
101
116
  l.sort!
102
117
  thr = o[:tot] / 2
@@ -132,9 +147,14 @@ class String
132
147
  end
133
148
 
134
149
  ##
135
- # Replace underscores by spaces or dots (depending on context).
150
+ # Replace underscores by spaces or other symbols depending on context
136
151
  def unmiga_name
137
- gsub(/_(str|sp|subsp|pv)__/, '_\\1._').tr('_', ' ')
152
+ gsub(/_(str|sp|subsp|pv)__/, '_\\1._')
153
+ .gsub(/g_c_(content)/, 'G+C \\1')
154
+ .gsub(/g_c_(skew)/, 'G-C \\1')
155
+ .gsub(/a_t_(skew)/, 'A-T \\1')
156
+ .gsub(/x_content/, &:capitalize)
157
+ .tr('_', ' ')
138
158
  end
139
159
 
140
160
  ##
@@ -185,7 +185,7 @@ class MiGA::Daemon < MiGA::MiGA
185
185
  return if project.dataset_names.empty?
186
186
 
187
187
  # Double-check if all datasets are ready
188
- return unless project.done_preprocessing?(false)
188
+ return unless project.done_preprocessing?
189
189
 
190
190
  # Queue project-level job
191
191
  to_run = project.next_task(nil, false)
@@ -66,7 +66,7 @@ module MiGA::Dataset::Base
66
66
  @@PREPROCESSING_TASKS = [
67
67
  :raw_reads, :trimmed_reads, :read_quality, :trimmed_fasta,
68
68
  :assembly, :cds, :essential_genes, :ssu, :mytaxa, :mytaxa_scan,
69
- :distances, :taxonomy, :stats
69
+ :taxonomy, :distances, :stats
70
70
  ]
71
71
 
72
72
  ##
@@ -77,7 +77,7 @@ module MiGA::Dataset::Base
77
77
  ##
78
78
  # Tasks to be executed only in datasets that are not multi-organism. These
79
79
  # tasks are ignored for multi-organism datasets or for unknown types.
80
- @@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances, :taxonomy]
80
+ @@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :taxonomy, :distances]
81
81
  @@_ONLY_NONMULTI_TASKS_H = Hash[@@ONLY_NONMULTI_TASKS.map { |i| [i, true] }]
82
82
 
83
83
  ##
@@ -26,15 +26,24 @@ module MiGA::Dataset::Result
26
26
  # The values are symbols:
27
27
  # - empty: the dataset has no data
28
28
  # - inactive: the dataset is inactive
29
+ # - upstream: the task is upstream from dataset's input
29
30
  # - force: forced to ignore by metadata
30
31
  # - project: incompatible project
31
32
  # - noref: incompatible dataset, only for reference
32
33
  # - multi: incompatible dataset, only for multi
33
34
  # - nonmulti: incompatible dataset, only for nonmulti
35
+ # - complete: the task is already complete
34
36
  # - execute: do not ignore, execute the task
35
37
  def why_ignore(task)
36
- if !active?
38
+ if !get_result(task).nil?
39
+ :complete
40
+ elsif !active?
37
41
  :inactive
42
+ elsif first_preprocessing.nil?
43
+ :empty
44
+ elsif @@PREPROCESSING_TASKS.index(task) <
45
+ @@PREPROCESSING_TASKS.index(first_preprocessing)
46
+ :upstream
38
47
  elsif !metadata["run_#{task}"].nil?
39
48
  metadata["run_#{task}"] ? :execute : :force
40
49
  elsif task == :taxonomy && project.metadata[:ref_project].nil?
@@ -56,7 +65,7 @@ module MiGA::Dataset::Result
56
65
  # initial input. Passes +save+ to #add_result.
57
66
  def first_preprocessing(save = false)
58
67
  @first_processing ||= @@PREPROCESSING_TASKS.find do |t|
59
- !ignore_task?(t) && !add_result(t, save).nil?
68
+ !add_result(t, save).nil?
60
69
  end
61
70
  end
62
71
 
@@ -121,17 +130,12 @@ module MiGA::Dataset::Result
121
130
  # - complete: a task with registered results
122
131
  # - pending: a task queued to be performed
123
132
  def result_status(task)
124
- if first_preprocessing.nil?
125
- :ignore_empty
126
- elsif !get_result(task).nil?
127
- :complete
128
- elsif @@PREPROCESSING_TASKS.index(task) <
129
- @@PREPROCESSING_TASKS.index(first_preprocessing)
130
- :-
131
- elsif ignore_task?(task)
132
- :"ignore_#{why_ignore task}"
133
- else
134
- :pending
133
+ reason = why_ignore(task)
134
+ case reason
135
+ when :upstream; :-
136
+ when :execute; :pending
137
+ when :complete; :complete
138
+ else; :"ignore_#{reason}"
135
139
  end
136
140
  end
137
141
 
@@ -111,7 +111,7 @@ class MiGA::Lair < MiGA::MiGA
111
111
 
112
112
  yield(project)
113
113
  elsif Dir.exist? f
114
- each_project(f) { |project| yield(project) }
114
+ each_project(f) { |p| yield(p) }
115
115
  end
116
116
  end
117
117
  end
@@ -134,12 +134,10 @@ module MiGA::Project::Dataset
134
134
  ##
135
135
  # Are all the datasets in the project preprocessed? Save intermediate results
136
136
  # if +save+ (until the first incomplete dataset is reached).
137
- def done_preprocessing?(save = true)
138
- dataset_names.each do |dn|
139
- ds = dataset(dn)
140
- return false if ds.is_ref? and not ds.done_preprocessing?(save)
137
+ def done_preprocessing?(save = false)
138
+ !each_dataset.any? do |d|
139
+ d.ref? && d.active? && !d.done_preprocessing?(save)
141
140
  end
142
- true
143
141
  end
144
142
 
145
143
  ##
@@ -81,20 +81,22 @@ class MiGA::Result < MiGA::MiGA
81
81
  end
82
82
 
83
83
  ##
84
- # Directory containing the result
85
- def dir
86
- File.dirname(path)
84
+ # Directory containing the result; by default an absolute path, if
85
+ # +relative+ is true returns the path relative to the parent project
86
+ def dir(relative = false)
87
+ relative ? relative_dir : File.dirname(path)
87
88
  end
88
89
 
89
90
  ##
90
- # Absolute path to the file(s) defined by symbol +k+
91
- def file_path(k)
91
+ # Absolute path to the file(s) defined by symbol +k+, or relative
92
+ # path if +relative+ is true
93
+ def file_path(k, relative = false)
92
94
  k = k.to_sym
93
95
  f = self[:files].nil? ? nil : self[:files][k]
94
96
  return nil if f.nil?
95
- return File.expand_path(f, dir) unless f.is_a? Array
97
+ return File.join(dir(relative), f) unless f.is_a? Array
96
98
 
97
- f.map { |fi| File.expand_path(fi, dir) }
99
+ f.map { |fi| File.join(dir(relative), fi) }
98
100
  end
99
101
 
100
102
  ##
@@ -17,32 +17,45 @@ module MiGA::Result::Stats
17
17
  self[:stats]
18
18
  end
19
19
 
20
+ ##
21
+ # Access the stats entry of results
22
+ def stats
23
+ self[:stats]
24
+ end
25
+
20
26
  private
21
27
 
22
28
  def compute_stats_raw_reads
23
29
  stats = {}
30
+ seq_opts = { gc: true, x: true, skew: true }
24
31
  if self[:files][:pair1].nil?
25
- s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, gc: true, x: true)
32
+ s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, seq_opts)
26
33
  stats = {
27
34
  reads: s[:n],
28
35
  length_average: [s[:avg], 'bp'],
29
36
  length_standard_deviation: [s[:sd], 'bp'],
30
37
  g_c_content: [s[:gc], '%'],
31
- x_content: [s[:x], '%']
38
+ x_content: [s[:x], '%'],
39
+ g_c_skew: [s[:gc_skew], '%'],
40
+ a_t_skew: [s[:at_skew], '%']
32
41
  }
33
42
  else
34
- s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, gc: true, x: true)
35
- s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, gc: true, x: true)
43
+ s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, seq_opts)
44
+ s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, seq_opts)
36
45
  stats = {
37
46
  read_pairs: s1[:n],
38
47
  forward_length_average: [s1[:avg], 'bp'],
39
48
  forward_length_standard_deviation: [s1[:sd], 'bp'],
40
49
  forward_g_c_content: [s1[:gc], '%'],
41
50
  forward_x_content: [s1[:x], '%'],
51
+ forward_g_c_skew: [s1[:gc_skew], '%'],
52
+ forward_a_t_skew: [s1[:at_skew], '%'],
42
53
  reverse_length_average: [s2[:avg], 'bp'],
43
54
  reverse_length_standard_deviation: [s2[:sd], 'bp'],
44
55
  reverse_g_c_content: [s2[:gc], '%'],
45
- reverse_x_content: [s2[:x], '%']
56
+ reverse_x_content: [s2[:x], '%'],
57
+ reverse_g_c_skew: [s2[:gc_skew], '%'],
58
+ reverse_a_t_skew: [s2[:at_skew], '%']
46
59
  }
47
60
  end
48
61
  stats
@@ -50,19 +63,22 @@ module MiGA::Result::Stats
50
63
 
51
64
  def compute_stats_trimmed_fasta
52
65
  f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
53
- s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true)
66
+ s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true, skew: true)
54
67
  {
55
68
  reads: s[:n],
56
69
  length_average: [s[:avg], 'bp'],
57
70
  length_standard_deviation: [s[:sd], 'bp'],
58
71
  g_c_content: [s[:gc], '%'],
59
- x_content: [s[:x], '%']
72
+ x_content: [s[:x], '%'],
73
+ g_c_skew: [s[:gc_skew], '%'],
74
+ a_t_skew: [s[:at_skew], '%']
60
75
  }
61
76
  end
62
77
 
63
78
  def compute_stats_assembly
64
79
  s = MiGA::MiGA.seqs_length(
65
- file_path(:largecontigs), :fasta, n50: true, gc: true, x: true
80
+ file_path(:largecontigs), :fasta,
81
+ n50: true, gc: true, x: true, skew: true
66
82
  )
67
83
  {
68
84
  contigs: s[:n],
@@ -70,7 +86,9 @@ module MiGA::Result::Stats
70
86
  total_length: [s[:tot], 'bp'],
71
87
  longest_sequence: [s[:max], 'bp'],
72
88
  g_c_content: [s[:gc], '%'],
73
- x_content: [s[:x], '%']
89
+ x_content: [s[:x], '%'],
90
+ g_c_skew: [s[:gc_skew], '%'],
91
+ a_t_skew: [s[:at_skew], '%']
74
92
  }
75
93
  end
76
94
 
@@ -133,9 +151,9 @@ module MiGA::Result::Stats
133
151
  source.save
134
152
 
135
153
  # Inactivate low-quality datasets
136
- min_qual = (project.metadata[:min_qual] || 50)
154
+ min_qual = (project.metadata[:min_qual] || 25)
137
155
  if min_qual != 'no' && stats[:quality] < min_qual
138
- source.inactivate! 'Low genome quality'
156
+ source.inactivate! 'Low quality genome'
139
157
  end
140
158
  end
141
159
  stats
@@ -8,7 +8,7 @@ module MiGA
8
8
  # - Float representing the major.minor version.
9
9
  # - Integer representing gem releases of the current version.
10
10
  # - Integer representing minor changes that require new version number.
11
- VERSION = [0.7, 10, 0]
11
+ VERSION = [0.7, 12, 2]
12
12
 
13
13
  ##
14
14
  # Nickname for the current major.minor version.
@@ -16,7 +16,7 @@ module MiGA
16
16
 
17
17
  ##
18
18
  # Date of the current gem release.
19
- VERSION_DATE = Date.new(2020, 6, 29)
19
+ VERSION_DATE = Date.new(2020, 7, 24)
20
20
 
21
21
  ##
22
22
  # Reference of MiGA.
@@ -20,7 +20,6 @@ fi
20
20
  TYPE=$(miga ls -P "$PROJECT" -D "$DATASET" -m type | cut -f 2)
21
21
  case "$TYPE" in
22
22
  metagenome|virome)
23
- $CMD -p meta
24
23
  prodigal -a "${DATASET}.faa" -d "${DATASET}.fna" -o "${DATASET}.gff3" \
25
24
  -f gff -q -i "../05.assembly/${DATASET}.LargeContigs.fna" -p meta
26
25
  ;;
@@ -108,7 +108,7 @@ class ProjectTest < Test::Unit::TestCase
108
108
  d1 = p1.add_dataset('BAH')
109
109
  assert_not_predicate(p1, :done_preprocessing?)
110
110
  FileUtils.touch(File.join(p1.path, 'data', '90.stats', "#{d1.name}.done"))
111
- assert_predicate(p1, :done_preprocessing?)
111
+ assert { p1.done_preprocessing? true }
112
112
  assert_nil(p1.next_inclade)
113
113
  p1.metadata[:type] = :clade
114
114
  assert_equal(:subclades, p1.next_inclade)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.10.0
4
+ version: 0.7.12.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-29 00:00:00.000000000 Z
11
+ date: 2020-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -118,6 +118,16 @@ files:
118
118
  - lib/miga/cli/action/add.rb
119
119
  - lib/miga/cli/action/add_result.rb
120
120
  - lib/miga/cli/action/archive.rb
121
+ - lib/miga/cli/action/browse.rb
122
+ - lib/miga/cli/action/browse/about.html
123
+ - lib/miga/cli/action/browse/dataset.html
124
+ - lib/miga/cli/action/browse/dataset_menu_item.html
125
+ - lib/miga/cli/action/browse/datasets.html
126
+ - lib/miga/cli/action/browse/favicon-32.png
127
+ - lib/miga/cli/action/browse/index.html
128
+ - lib/miga/cli/action/browse/layout.html
129
+ - lib/miga/cli/action/browse/redirect.html
130
+ - lib/miga/cli/action/browse/style.css
121
131
  - lib/miga/cli/action/classify_wf.rb
122
132
  - lib/miga/cli/action/console.rb
123
133
  - lib/miga/cli/action/daemon.rb