miga-base 1.3.8.1 → 1.3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/miga/cli/action/add_result.rb +22 -1
- data/lib/miga/cli/action/browse/about.html +4 -2
- data/lib/miga/cli/action/doctor.rb +1 -1
- data/lib/miga/cli/action/download/gtdb.rb +1 -1
- data/lib/miga/cli/action/download/ncbi.rb +43 -68
- data/lib/miga/cli/action/download/seqcode.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +1 -8
- data/lib/miga/cli/action/wf.rb +15 -6
- data/lib/miga/cli/objects_helper.rb +3 -0
- data/lib/miga/cli/opt_helper.rb +8 -2
- data/lib/miga/common/net.rb +100 -18
- data/lib/miga/dataset/base.rb +40 -12
- data/lib/miga/dataset/hooks.rb +8 -0
- data/lib/miga/dataset/result/ignore.rb +14 -2
- data/lib/miga/dataset/type.rb +51 -0
- data/lib/miga/dataset.rb +3 -22
- data/lib/miga/json.rb +9 -0
- data/lib/miga/project/base.rb +15 -9
- data/lib/miga/project.rb +7 -1
- data/lib/miga/remote_dataset/base.rb +117 -36
- data/lib/miga/remote_dataset/download.rb +121 -54
- data/lib/miga/remote_dataset.rb +34 -13
- data/lib/miga/result/stats.rb +2 -0
- data/lib/miga/result/versions.rb +23 -0
- data/lib/miga/result.rb +7 -1
- data/lib/miga/taxonomy/base.rb +3 -2
- data/lib/miga/version.rb +2 -2
- data/scripts/assembly.bash +15 -1
- data/scripts/cds.bash +9 -3
- data/scripts/distances.bash +103 -5
- data/scripts/essential_genes.bash +14 -1
- data/scripts/mytaxa.bash +18 -3
- data/scripts/mytaxa_scan.bash +16 -3
- data/scripts/read_quality.bash +6 -2
- data/scripts/ssu.bash +19 -1
- data/scripts/stats.bash +9 -3
- data/scripts/taxonomy.bash +98 -2
- data/scripts/trimmed_fasta.bash +10 -2
- data/scripts/trimmed_reads.bash +26 -6
- data/test/dataset_test.rb +17 -2
- data/test/hook_test.rb +3 -2
- data/test/net_test.rb +21 -5
- data/test/project_test.rb +13 -0
- data/test/remote_dataset_test.rb +106 -7
- data/test/result_test.rb +47 -21
- data/test/taxonomy_test.rb +9 -3
- data/utils/distance/runner.rb +3 -1
- data/utils/distances.rb +1 -1
- data/utils/subclades.R +15 -8
- metadata +4 -2
data/lib/miga/dataset/hooks.rb
CHANGED
@@ -15,6 +15,7 @@ require 'miga/common/hooks'
|
|
15
15
|
# Supported hooks:
|
16
16
|
# - run_lambda(lambda, args...)
|
17
17
|
# - recalculate_status()
|
18
|
+
# - check_type()
|
18
19
|
# - clear_run_counts()
|
19
20
|
# - run_cmd(cmd)
|
20
21
|
# Internal hooks:
|
@@ -27,6 +28,7 @@ module MiGA::Dataset::Hooks
|
|
27
28
|
def default_hooks
|
28
29
|
{
|
29
30
|
on_create: [[:recalculate_status]],
|
31
|
+
on_save: [[:check_type]],
|
30
32
|
on_activate: [[:clear_run_counts], [:recalculate_status]],
|
31
33
|
on_inactivate: [[:recalculate_status]],
|
32
34
|
on_result_ready: [[:_pull_result_hooks]],
|
@@ -51,6 +53,12 @@ module MiGA::Dataset::Hooks
|
|
51
53
|
recalculate_status
|
52
54
|
end
|
53
55
|
|
56
|
+
##
|
57
|
+
# Ensure that the dataset type exists and is compatible with the project type
|
58
|
+
def hook_check_type(_hook_args, _event_args)
|
59
|
+
check_type
|
60
|
+
end
|
61
|
+
|
54
62
|
##
|
55
63
|
# Run +cmd+ in the command-line with {{variables}}:
|
56
64
|
# dataset, project, project_name, miga, object (if defined for the event)
|
@@ -17,10 +17,14 @@ module MiGA::Dataset::Result::Ignore
|
|
17
17
|
# - project: incompatible project
|
18
18
|
# - noref: incompatible dataset, only for reference
|
19
19
|
# - multi: incompatible dataset, only for multi
|
20
|
+
# - nomarkers: incompatible dataset, only for markers
|
20
21
|
# - nonmulti: incompatible dataset, only for nonmulti
|
21
22
|
# - complete: the task is already complete
|
22
23
|
def ignore_reasons
|
23
|
-
%i[
|
24
|
+
%i[
|
25
|
+
empty inactive upstream force project
|
26
|
+
noref multi nonmulti nomarkers complete
|
27
|
+
]
|
24
28
|
end
|
25
29
|
|
26
30
|
##
|
@@ -91,9 +95,15 @@ module MiGA::Dataset::Result::Ignore
|
|
91
95
|
ignore_by_type?(task, :nonmulti)
|
92
96
|
end
|
93
97
|
|
98
|
+
##
|
99
|
+
# Ignore +task+ because it's not a markers dataset
|
100
|
+
def ignore_nomarkers?(task)
|
101
|
+
ignore_by_type?(task, :nomarkers)
|
102
|
+
end
|
103
|
+
|
94
104
|
##
|
95
105
|
# Ignore +task+ by +type+ of dataset, one of: +:noref+, +:multi+, or
|
96
|
-
# +:nonmulti+
|
106
|
+
# +:nonmulti+, +:nomarkers+
|
97
107
|
def ignore_by_type?(task, type)
|
98
108
|
return false if force_task?(task)
|
99
109
|
|
@@ -105,6 +115,8 @@ module MiGA::Dataset::Result::Ignore
|
|
105
115
|
[:multi?, self.class.ONLY_MULTI_TASKS]
|
106
116
|
when :nonmulti
|
107
117
|
[:nonmulti?, self.class.ONLY_NONMULTI_TASKS]
|
118
|
+
when :nomarkers
|
119
|
+
[:markers?, self.class.EXCLUDE_NOMARKER_TASKS]
|
108
120
|
else
|
109
121
|
raise "Unexpected error, unknown type reason: #{type}"
|
110
122
|
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
##
|
2
|
+
# Helper module including specific functions for dataset type
|
3
|
+
module MiGA::Dataset::Type
|
4
|
+
##
|
5
|
+
# Get the type of dataset as Symbol
|
6
|
+
def type
|
7
|
+
metadata[:type]
|
8
|
+
end
|
9
|
+
|
10
|
+
##
|
11
|
+
# Is this dataset known to be multi-organism?
|
12
|
+
def multi?
|
13
|
+
self.class.KNOWN_TYPES.dig(type, :multi)
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Is this dataset known to be single-organism?
|
18
|
+
def nonmulti?
|
19
|
+
y = self.class.KNOWN_TYPES.dig(type, :multi)
|
20
|
+
y.nil? ? nil : !y
|
21
|
+
end
|
22
|
+
|
23
|
+
##
|
24
|
+
# Are universal marker genes expected to be found in this dataset?
|
25
|
+
def markers?
|
26
|
+
self.class.KNOWN_TYPES.dig(type, :markers)
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# Check that the dataset type is defined, known, and compatible with the
|
31
|
+
# project type and raise an exception if any of these checks fail
|
32
|
+
#
|
33
|
+
# If the dataset type is +:empty+, it returns +false+ without raising an
|
34
|
+
# exception, and true otherwise (and no tests are failed)
|
35
|
+
def check_type
|
36
|
+
raise MiGA::Error.new('Undefined dataset type') unless type
|
37
|
+
return false if type == :empty
|
38
|
+
|
39
|
+
unless self.class.KNOWN_TYPES[type]
|
40
|
+
raise MiGA::Error.new("Unknown dataset type: #{type}")
|
41
|
+
end
|
42
|
+
unless self.class.KNOWN_TYPES[type][:project_types].include? project.type
|
43
|
+
raise MiGA::Error.new(
|
44
|
+
"Dataset type (#{type}) incompatible with project (#{project.type})"
|
45
|
+
)
|
46
|
+
end
|
47
|
+
|
48
|
+
true
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
require 'miga/metadata'
|
7
7
|
require 'miga/dataset/result'
|
8
8
|
require 'miga/dataset/status'
|
9
|
+
require 'miga/dataset/type'
|
9
10
|
require 'miga/dataset/hooks'
|
10
11
|
|
11
12
|
# This library is only required by +#closest_relatives+, so it is now
|
@@ -18,6 +19,7 @@ require 'miga/dataset/hooks'
|
|
18
19
|
class MiGA::Dataset < MiGA::MiGA
|
19
20
|
include MiGA::Dataset::Result
|
20
21
|
include MiGA::Dataset::Status
|
22
|
+
include MiGA::Dataset::Type
|
21
23
|
include MiGA::Dataset::Hooks
|
22
24
|
|
23
25
|
# Class-level
|
@@ -56,6 +58,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
56
58
|
name.to_s
|
57
59
|
@project, @name, @metadata = project, name, nil
|
58
60
|
metadata[:ref] = is_ref
|
61
|
+
metadata[:type] ||= :empty
|
59
62
|
@metadata_future = [
|
60
63
|
File.join(project.path, 'metadata', "#{name}.json"),
|
61
64
|
metadata
|
@@ -89,12 +92,6 @@ class MiGA::Dataset < MiGA::MiGA
|
|
89
92
|
# +Project+ interface
|
90
93
|
alias :save! :save
|
91
94
|
|
92
|
-
##
|
93
|
-
# Get the type of dataset as Symbol
|
94
|
-
def type
|
95
|
-
metadata[:type]
|
96
|
-
end
|
97
|
-
|
98
95
|
##
|
99
96
|
# Delete the dataset with all it's contents (including results) and returns
|
100
97
|
# nil
|
@@ -146,22 +143,6 @@ class MiGA::Dataset < MiGA::MiGA
|
|
146
143
|
!metadata[:ref]
|
147
144
|
end
|
148
145
|
|
149
|
-
##
|
150
|
-
# Is this dataset known to be multi-organism?
|
151
|
-
def multi?
|
152
|
-
return false if metadata[:type].nil? || @@KNOWN_TYPES[type].nil?
|
153
|
-
|
154
|
-
@@KNOWN_TYPES[type][:multi]
|
155
|
-
end
|
156
|
-
|
157
|
-
##
|
158
|
-
# Is this dataset known to be single-organism?
|
159
|
-
def nonmulti?
|
160
|
-
return false if metadata[:type].nil? || @@KNOWN_TYPES[type].nil?
|
161
|
-
|
162
|
-
!@@KNOWN_TYPES[type][:multi]
|
163
|
-
end
|
164
|
-
|
165
146
|
##
|
166
147
|
# Is this dataset active?
|
167
148
|
def active?
|
data/lib/miga/json.rb
CHANGED
@@ -69,5 +69,14 @@ class MiGA::Json < MiGA::MiGA
|
|
69
69
|
File.open(path, 'w') { |fh| fh.print y } unless path.nil?
|
70
70
|
y
|
71
71
|
end
|
72
|
+
|
73
|
+
##
|
74
|
+
# Generates and returns plain JSON to represent +obj+.
|
75
|
+
# If +path+ is passed, it saves the JSON in that file.
|
76
|
+
def generate_plain(obj, path = nil)
|
77
|
+
y = JSON.generate(obj)
|
78
|
+
File.open(path, 'w') { |fh| fh.print y } unless path.nil?
|
79
|
+
y
|
80
|
+
end
|
72
81
|
end
|
73
82
|
end
|
data/lib/miga/project/base.rb
CHANGED
@@ -89,32 +89,36 @@ module MiGA::Project::Base
|
|
89
89
|
@@KNOWN_TYPES = {
|
90
90
|
mixed: {
|
91
91
|
description: 'Mixed collection of genomes, metagenomes, and viromes',
|
92
|
-
single: true, multi: true
|
92
|
+
single: true, multi: true, markers: true
|
93
93
|
},
|
94
94
|
genomes: {
|
95
95
|
description: 'Collection of genomes',
|
96
|
-
single: true, multi: false
|
96
|
+
single: true, multi: false, markers: true
|
97
97
|
},
|
98
98
|
clade: {
|
99
99
|
description: 'Collection of closely-related genomes (ANI >= 90%)',
|
100
|
-
single: true, multi: false
|
100
|
+
single: true, multi: false, markers: true
|
101
101
|
},
|
102
102
|
metagenomes: {
|
103
103
|
description: 'Collection of metagenomes and/or viromes',
|
104
|
-
single: false, multi: true
|
104
|
+
single: false, multi: true, markers: true
|
105
|
+
},
|
106
|
+
plasmids: {
|
107
|
+
description: 'Collection of plasmids',
|
108
|
+
single: true, multi: false, markers: false
|
105
109
|
}
|
106
110
|
}
|
107
111
|
|
108
112
|
##
|
109
113
|
# Project-wide distance estimations
|
110
|
-
@@DISTANCE_TASKS = [
|
111
|
-
|
112
|
-
|
114
|
+
@@DISTANCE_TASKS = %i[
|
115
|
+
project_stats haai_distances aai_distances ani_distances
|
116
|
+
clade_finding
|
113
117
|
]
|
114
118
|
|
115
119
|
##
|
116
120
|
# Project-wide tasks for :clade projects
|
117
|
-
@@INCLADE_TASKS = [
|
121
|
+
@@INCLADE_TASKS = %i[subclades ogs]
|
118
122
|
|
119
123
|
##
|
120
124
|
# Options supported by projects
|
@@ -131,7 +135,9 @@ module MiGA::Project::Base
|
|
131
135
|
},
|
132
136
|
haai_p: {
|
133
137
|
desc: 'Value of aai.rb -p on hAAI', type: String,
|
134
|
-
default: proc { |project|
|
138
|
+
default: proc { |project|
|
139
|
+
project.clade? || !project.markers? ? 'no' : 'fastaai'
|
140
|
+
},
|
135
141
|
in: %w[blast+ blast blat diamond fastaai no]
|
136
142
|
},
|
137
143
|
aai_p: {
|
data/lib/miga/project.rb
CHANGED
@@ -98,7 +98,7 @@ class MiGA::Project < MiGA::MiGA
|
|
98
98
|
##
|
99
99
|
# Is this a clade project?
|
100
100
|
def clade?
|
101
|
-
|
101
|
+
%i[clade plasmids].include? type
|
102
102
|
end
|
103
103
|
|
104
104
|
##
|
@@ -115,6 +115,12 @@ class MiGA::Project < MiGA::MiGA
|
|
115
115
|
# Same as multi? For backward compatibility
|
116
116
|
alias is_multi? multi?
|
117
117
|
|
118
|
+
##
|
119
|
+
# Does the project support the use of universal markers?
|
120
|
+
def markers?
|
121
|
+
@@KNOWN_TYPES[type][:markers]
|
122
|
+
end
|
123
|
+
|
118
124
|
##
|
119
125
|
# Is this project active? Currently a dummy function, returns
|
120
126
|
# always true.
|
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'open-uri'
|
2
1
|
require 'cgi'
|
3
2
|
|
4
3
|
class MiGA::RemoteDataset < MiGA::MiGA
|
@@ -10,13 +9,24 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
10
9
|
end
|
11
10
|
end
|
12
11
|
|
12
|
+
def uri_safe_join(*parts)
|
13
|
+
safe = parts.map { |i| i.is_a?(Array) ? i.join(',') : i.to_s }
|
14
|
+
last = safe.pop
|
15
|
+
safe.map! { |i| i[-1] == '/' ? i : "#{i}/" }
|
16
|
+
safe << last
|
17
|
+
URI::join(*safe)
|
18
|
+
end
|
19
|
+
|
13
20
|
module MiGA::RemoteDataset::Base
|
14
|
-
@@
|
15
|
-
@@
|
16
|
-
@@
|
17
|
-
@@
|
18
|
-
@@
|
19
|
-
|
21
|
+
@@_NCBI_DATASETS = 'https://api.ncbi.nlm.nih.gov/datasets/v2alpha/'
|
22
|
+
@@_EUTILS = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
|
23
|
+
@@_EBI_API = 'https://www.ebi.ac.uk/Tools/'
|
24
|
+
@@_GTDB_API = 'https://api.gtdb.ecogenomic.org/'
|
25
|
+
@@_SEQCODE_API = 'https://disc-genomics.uibk.ac.at/seqcode/'
|
26
|
+
@@_EUTILS_BUILD = lambda { |service, q|
|
27
|
+
q[:api_key] = ENV['NCBI_API_KEY'] if ENV['NCBI_API_KEY']
|
28
|
+
uri_safe_join(@@_EUTILS, "#{service}.fcgi")
|
29
|
+
.tap { |uri| uri.query = URI.encode_www_form(q) }
|
20
30
|
}
|
21
31
|
|
22
32
|
##
|
@@ -25,15 +35,13 @@ module MiGA::RemoteDataset::Base
|
|
25
35
|
# supported keys as Symbol:
|
26
36
|
# - +:dbs+ => Hash with keys being the database name and the values a Hash of
|
27
37
|
# properties such as +stage+, +format+, +map_to+, and +getter+.
|
28
|
-
# - +
|
29
|
-
#
|
30
|
-
# Additional parameters can be passed to certain functions using the +extra+
|
31
|
-
# option.
|
38
|
+
# - +uri+ => Function producing a parsed URI object, accepting one parameter:
|
39
|
+
# a Hash of options.
|
32
40
|
# - +method+ => Method used to query the URL. Only +:rest+ and +:net+ are
|
33
41
|
# currently supported.
|
34
|
-
# - +api_key+ => A lambda function that takes a URL as input and returns the
|
35
|
-
# URL to be downloaded with an API Key (if available).
|
36
42
|
# - +map_to_universe+ => Universe where results map to. Currently unsupported.
|
43
|
+
# - +scheme+ => Function returning the scheme used as a String (ftp, http,
|
44
|
+
# https). Mandatory if method is :net.
|
37
45
|
@@UNIVERSE = {
|
38
46
|
web: {
|
39
47
|
dbs: {
|
@@ -41,13 +49,18 @@ module MiGA::RemoteDataset::Base
|
|
41
49
|
assembly_gz: { stage: :assembly, format: :fasta_gz },
|
42
50
|
text: { stage: :metadata, format: :text }
|
43
51
|
},
|
44
|
-
|
52
|
+
uri: lambda { |opts| URI.parse(opts[:ids][0]) },
|
53
|
+
scheme: lambda { |opts| opts[:ids][0].split(':', 2)[0] },
|
45
54
|
method: :net
|
46
55
|
},
|
47
56
|
ebi: {
|
48
57
|
dbs: { embl: { stage: :assembly, format: :fasta } },
|
49
|
-
|
50
|
-
|
58
|
+
uri: lambda do |opts|
|
59
|
+
uri_safe_join(
|
60
|
+
@@_EBI_API, 'dbfetch', 'dbfetch', opts[:db], opts[:ids], opts[:format]
|
61
|
+
)
|
62
|
+
end,
|
63
|
+
method: :get
|
51
64
|
},
|
52
65
|
gtdb: {
|
53
66
|
dbs: {
|
@@ -56,15 +69,18 @@ module MiGA::RemoteDataset::Base
|
|
56
69
|
# The 'taxon' namespace actually returns a list of genomes (+format+)
|
57
70
|
taxon: {
|
58
71
|
stage: :metadata, format: :genomes, map_to: [:assembly],
|
59
|
-
extra:
|
72
|
+
extra: { sp_reps_only: false }
|
60
73
|
},
|
61
74
|
# The 'genome' namespace actually returns the taxonomy (+format+)
|
62
75
|
genome: { stage: :metadata, format: 'taxon-history' }
|
63
76
|
},
|
64
|
-
|
65
|
-
|
77
|
+
uri: lambda do |opts|
|
78
|
+
uri_safe_join(@@_GTDB_API, opts[:db], opts[:ids], opts[:format])
|
79
|
+
.tap { |uri| uri.query = URI.encode_www_form(opts[:extra]) }
|
80
|
+
end,
|
81
|
+
method: :get,
|
66
82
|
map_to_universe: :ncbi,
|
67
|
-
headers: '
|
83
|
+
headers: lambda { |_opts| { 'Accept' => 'application/json' } }
|
68
84
|
},
|
69
85
|
seqcode: {
|
70
86
|
dbs: {
|
@@ -74,8 +90,11 @@ module MiGA::RemoteDataset::Base
|
|
74
90
|
# This is the list of type genomes
|
75
91
|
:'type-genomes' => { stage: :metadata, format: :json }
|
76
92
|
},
|
77
|
-
|
78
|
-
|
93
|
+
uri: lambda do |opts|
|
94
|
+
uri_safe_join(@@_SEQCODE_API, "#{opts[:db]}.json")
|
95
|
+
.tap { |uri| uri.query = URI.encode_www_form(opts[:extra]) }
|
96
|
+
end,
|
97
|
+
method: :get,
|
79
98
|
map_to_universe: :ncbi
|
80
99
|
},
|
81
100
|
ncbi: {
|
@@ -84,9 +103,12 @@ module MiGA::RemoteDataset::Base
|
|
84
103
|
assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
|
85
104
|
taxonomy: { stage: :metadata, format: :xml }
|
86
105
|
},
|
87
|
-
|
88
|
-
|
89
|
-
|
106
|
+
uri: lambda do |opts|
|
107
|
+
@@_EUTILS_BUILD[:efetch,
|
108
|
+
db: opts[:db], id: opts[:ids], rettype: opts[:format], retmode: :text
|
109
|
+
]
|
110
|
+
end,
|
111
|
+
method: :get
|
90
112
|
},
|
91
113
|
ncbi_map: {
|
92
114
|
dbs: {
|
@@ -95,22 +117,81 @@ module MiGA::RemoteDataset::Base
|
|
95
117
|
},
|
96
118
|
biosample: { stage: :metadata, map_to: [:assembly], format: :json }
|
97
119
|
},
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
120
|
+
uri: lambda do |opts|
|
121
|
+
@@_EUTILS_BUILD[:elink, {
|
122
|
+
dbfrom: opts[:db], id: opts[:ids], retmode: opts[:format]
|
123
|
+
}.merge(opts[:extra])]
|
124
|
+
end,
|
125
|
+
method: :get,
|
126
|
+
map_to_universe: :ncbi
|
102
127
|
},
|
103
128
|
ncbi_summary: {
|
104
129
|
dbs: { assembly: { stage: :metadata, format: :json } },
|
105
|
-
|
106
|
-
|
107
|
-
|
130
|
+
uri: lambda do |opts|
|
131
|
+
@@_EUTILS_BUILD[:esummary,
|
132
|
+
db: opts[:db], id: opts[:ids], retmode: opts[:format]
|
133
|
+
]
|
134
|
+
end,
|
135
|
+
method: :get
|
108
136
|
},
|
109
137
|
ncbi_search: {
|
110
|
-
dbs: {
|
111
|
-
|
112
|
-
|
113
|
-
|
138
|
+
dbs: {
|
139
|
+
assembly: { stage: :metadata, format: :json },
|
140
|
+
taxonomy: { stage: :metadata, format: :json }
|
141
|
+
},
|
142
|
+
uri: lambda do |opts|
|
143
|
+
@@_EUTILS_BUILD[:esearch,
|
144
|
+
db: opts[:db], term: opts[:ids], retmode: opts[:format]
|
145
|
+
]
|
146
|
+
end,
|
147
|
+
method: :get
|
148
|
+
},
|
149
|
+
ncbi_datasets_download: {
|
150
|
+
dbs: { genome: { stage: :assembly, format: :zip } },
|
151
|
+
uri: lambda do |opts|
|
152
|
+
q = { include_annotation_type: 'GENOME_FASTA' }
|
153
|
+
uri_safe_join(
|
154
|
+
@@_NCBI_DATASETS, opts[:db], :accession, opts[:ids], :download
|
155
|
+
).tap { |uri| uri.query = URI.encode_www_form(q) }
|
156
|
+
end,
|
157
|
+
method: :get,
|
158
|
+
headers: lambda do |opts|
|
159
|
+
{}.tap do |h|
|
160
|
+
h['Accept'] = 'application/zip' if opts[:format] == :zip
|
161
|
+
h['api-key'] = ENV['NCBI_API_KEY'] if ENV['NCBI_API_KEY']
|
162
|
+
end
|
163
|
+
end
|
164
|
+
},
|
165
|
+
ncbi_datasets: {
|
166
|
+
dbs: {
|
167
|
+
genome: {
|
168
|
+
stage: :metadata, format: :json, extra: { action: 'dataset_report' }
|
169
|
+
}
|
170
|
+
},
|
171
|
+
uri: lambda do |opts|
|
172
|
+
uri_safe_join(@@_NCBI_DATASETS, opts[:db], opts[:extra][:action])
|
173
|
+
end,
|
174
|
+
payload: lambda do |opts|
|
175
|
+
query = opts[:ids][0]
|
176
|
+
q = {
|
177
|
+
filters: {
|
178
|
+
assembly_version: 'current',
|
179
|
+
exclude_paired_reports: true
|
180
|
+
}.merge(query[:filters] || {}),
|
181
|
+
page_size: query[:page_size] || 1_000,
|
182
|
+
returned_content: 'COMPLETE'
|
183
|
+
}
|
184
|
+
q[:page_token] = query[:page_token] if query[:page_token]
|
185
|
+
q[:taxons] = query[:taxons] if query[:taxons]
|
186
|
+
MiGA::Json.generate_plain(q)
|
187
|
+
end,
|
188
|
+
headers: lambda do |opts|
|
189
|
+
{}.tap do |h|
|
190
|
+
h['api-key'] = ENV['NCBI_API_KEY'] if ENV['NCBI_API_KEY']
|
191
|
+
h['Content-Type'] = 'application/json' if opts[:format] == :json
|
192
|
+
end
|
193
|
+
end,
|
194
|
+
method: :post
|
114
195
|
}
|
115
196
|
}
|
116
197
|
end
|