genelastic 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genelastic/api/.env +4 -0
- genelastic/api/cli_start_api.py +18 -0
- genelastic/api/errors.py +52 -0
- genelastic/api/extends/example.py +0 -6
- genelastic/api/extends/example.yml +0 -0
- genelastic/api/routes.py +313 -181
- genelastic/api/server.py +34 -26
- genelastic/api/settings.py +5 -9
- genelastic/api/specification.yml +512 -0
- genelastic/common/__init__.py +0 -39
- genelastic/common/cli.py +100 -0
- genelastic/common/elastic.py +374 -46
- genelastic/common/exceptions.py +34 -2
- genelastic/common/server.py +59 -0
- genelastic/common/types.py +1 -14
- genelastic/import_data/__init__.py +0 -27
- genelastic/import_data/checker.py +99 -0
- genelastic/import_data/checker_observer.py +13 -0
- genelastic/import_data/cli/__init__.py +0 -0
- genelastic/import_data/cli/cli_check.py +136 -0
- genelastic/import_data/cli/gen_data.py +143 -0
- genelastic/import_data/cli/import_data.py +346 -0
- genelastic/import_data/cli/info.py +247 -0
- genelastic/import_data/{cli_integrity.py → cli/integrity.py} +29 -7
- genelastic/import_data/cli/validate.py +146 -0
- genelastic/import_data/collect.py +185 -0
- genelastic/import_data/constants.py +136 -11
- genelastic/import_data/import_bundle.py +102 -59
- genelastic/import_data/import_bundle_factory.py +70 -149
- genelastic/import_data/importers/__init__.py +0 -0
- genelastic/import_data/importers/importer_base.py +131 -0
- genelastic/import_data/importers/importer_factory.py +85 -0
- genelastic/import_data/importers/importer_types.py +223 -0
- genelastic/import_data/logger.py +2 -1
- genelastic/import_data/models/__init__.py +0 -0
- genelastic/import_data/models/analyses.py +178 -0
- genelastic/import_data/models/analysis.py +144 -0
- genelastic/import_data/models/data_file.py +110 -0
- genelastic/import_data/models/process.py +45 -0
- genelastic/import_data/models/processes.py +84 -0
- genelastic/import_data/models/tags.py +170 -0
- genelastic/import_data/models/unique_list.py +109 -0
- genelastic/import_data/models/validate.py +26 -0
- genelastic/import_data/patterns.py +90 -0
- genelastic/import_data/random_bundle.py +79 -54
- genelastic/import_data/resolve.py +157 -0
- genelastic/ui/.env +1 -0
- genelastic/ui/cli_start_ui.py +20 -0
- genelastic/ui/routes.py +333 -0
- genelastic/ui/server.py +9 -82
- genelastic/ui/settings.py +2 -6
- genelastic/ui/static/cea-cnrgh.ico +0 -0
- genelastic/ui/static/cea.ico +0 -0
- genelastic/ui/static/layout.ico +0 -0
- genelastic/ui/static/novaseq6000.png +0 -0
- genelastic/ui/static/style.css +430 -0
- genelastic/ui/static/ui.js +458 -0
- genelastic/ui/templates/analyses.html +98 -0
- genelastic/ui/templates/analysis_detail.html +44 -0
- genelastic/ui/templates/bi_process_detail.html +129 -0
- genelastic/ui/templates/bi_processes.html +116 -0
- genelastic/ui/templates/explorer.html +356 -0
- genelastic/ui/templates/home.html +207 -0
- genelastic/ui/templates/layout.html +153 -0
- genelastic/ui/templates/version.html +21 -0
- genelastic/ui/templates/wet_process_detail.html +131 -0
- genelastic/ui/templates/wet_processes.html +116 -0
- genelastic-0.9.0.dist-info/METADATA +686 -0
- genelastic-0.9.0.dist-info/RECORD +76 -0
- genelastic-0.9.0.dist-info/WHEEL +4 -0
- genelastic-0.9.0.dist-info/entry_points.txt +10 -0
- genelastic-0.9.0.dist-info/licenses/LICENSE +519 -0
- genelastic/import_data/analyses.py +0 -69
- genelastic/import_data/analysis.py +0 -205
- genelastic/import_data/bi_process.py +0 -27
- genelastic/import_data/bi_processes.py +0 -49
- genelastic/import_data/cli_gen_data.py +0 -116
- genelastic/import_data/cli_import.py +0 -379
- genelastic/import_data/cli_info.py +0 -256
- genelastic/import_data/cli_validate.py +0 -54
- genelastic/import_data/data_file.py +0 -87
- genelastic/import_data/filename_pattern.py +0 -57
- genelastic/import_data/tags.py +0 -123
- genelastic/import_data/wet_process.py +0 -28
- genelastic/import_data/wet_processes.py +0 -53
- genelastic-0.7.0.dist-info/METADATA +0 -105
- genelastic-0.7.0.dist-info/RECORD +0 -40
- genelastic-0.7.0.dist-info/WHEEL +0 -5
- genelastic-0.7.0.dist-info/entry_points.txt +0 -6
- genelastic-0.7.0.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import subprocess
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
import uvicorn
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def start_dev_server(
|
|
9
|
+
app_module: str,
|
|
10
|
+
args: argparse.Namespace,
|
|
11
|
+
reload_includes: list[str] | None = None,
|
|
12
|
+
) -> None:
|
|
13
|
+
"""Start the development server using Uvicorn.
|
|
14
|
+
:args app_module: The module containing the Flask server to start.
|
|
15
|
+
:args argparse.Namespace: The parsed arguments.
|
|
16
|
+
"""
|
|
17
|
+
if reload_includes is None:
|
|
18
|
+
reload_includes = []
|
|
19
|
+
|
|
20
|
+
uvicorn.run(
|
|
21
|
+
app_module,
|
|
22
|
+
host=args.host,
|
|
23
|
+
port=args.port,
|
|
24
|
+
log_level=args.log_level,
|
|
25
|
+
reload=True,
|
|
26
|
+
reload_includes=reload_includes,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def start_prod_server(app_module: str, args: argparse.Namespace) -> None:
|
|
31
|
+
"""Start the production server using Gunicorn.
|
|
32
|
+
It will spawn one primary process and workers
|
|
33
|
+
:args app_module: The module containing the Flask server to start.
|
|
34
|
+
:args argparse.Namespace: The parsed arguments.
|
|
35
|
+
:raises subprocess.CalledProcessError: If gunicorn exits with a non-zero status code.
|
|
36
|
+
"""
|
|
37
|
+
cmd = [
|
|
38
|
+
sys.executable,
|
|
39
|
+
"-m",
|
|
40
|
+
"gunicorn",
|
|
41
|
+
"-k",
|
|
42
|
+
"uvicorn.workers.UvicornWorker",
|
|
43
|
+
"--workers",
|
|
44
|
+
str(args.workers),
|
|
45
|
+
"--log-level",
|
|
46
|
+
args.log_level,
|
|
47
|
+
"-b",
|
|
48
|
+
f"{args.host}:{args.port}",
|
|
49
|
+
"--capture-output",
|
|
50
|
+
app_module,
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
if args.log_file:
|
|
54
|
+
cmd.extend(["--log-file", args.log_file])
|
|
55
|
+
|
|
56
|
+
if args.access_logfile:
|
|
57
|
+
cmd.extend(["--access-logfile", args.access_logfile])
|
|
58
|
+
|
|
59
|
+
subprocess.run(cmd, check=True) # noqa: S603
|
genelastic/common/types.py
CHANGED
|
@@ -4,20 +4,7 @@ import typing
|
|
|
4
4
|
Bucket: typing.TypeAlias = dict[str, dict[typing.Any, typing.Any]]
|
|
5
5
|
BundleDict: typing.TypeAlias = dict[str, typing.Any]
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
WetProcessesData: typing.TypeAlias = dict[str, str | int | float]
|
|
9
|
-
BioInfoProcessData: typing.TypeAlias = dict[str, str | list[str]]
|
|
10
|
-
|
|
11
|
-
AnalysisDocument: typing.TypeAlias = dict[str, str | None | AnalysisMetaData]
|
|
12
|
-
MetadataDocument: typing.TypeAlias = dict[
|
|
13
|
-
str, int | str | list[typing.Any | None]
|
|
14
|
-
]
|
|
15
|
-
ProcessDocument: typing.TypeAlias = (
|
|
16
|
-
dict[str, str] | WetProcessesData | BioInfoProcessData
|
|
17
|
-
)
|
|
18
|
-
BulkItems: typing.TypeAlias = list[
|
|
19
|
-
dict[str, str | MetadataDocument | AnalysisDocument | ProcessDocument]
|
|
20
|
-
]
|
|
7
|
+
Metadata: typing.TypeAlias = dict[str, str | int]
|
|
21
8
|
|
|
22
9
|
# Types related to random bundle generation.
|
|
23
10
|
RandomBiProcessData: typing.TypeAlias = dict[str, str | list[dict[str, str]]]
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
"""Genelastic package for importing Genomic data into Elasticsearch."""
|
|
2
|
-
|
|
3
|
-
from .analysis import Analysis
|
|
4
|
-
from .import_bundle import ImportBundle
|
|
5
|
-
from .import_bundle_factory import (
|
|
6
|
-
load_import_bundle_file,
|
|
7
|
-
make_import_bundle_from_files,
|
|
8
|
-
)
|
|
9
|
-
from .random_bundle import (
|
|
10
|
-
RandomAnalysis,
|
|
11
|
-
RandomBiProcess,
|
|
12
|
-
RandomBundle,
|
|
13
|
-
RandomWetProcess,
|
|
14
|
-
)
|
|
15
|
-
from .tags import Tags
|
|
16
|
-
|
|
17
|
-
__all__ = [
|
|
18
|
-
"Analysis",
|
|
19
|
-
"ImportBundle",
|
|
20
|
-
"RandomAnalysis",
|
|
21
|
-
"RandomBiProcess",
|
|
22
|
-
"RandomBundle",
|
|
23
|
-
"RandomWetProcess",
|
|
24
|
-
"Tags",
|
|
25
|
-
"load_import_bundle_file",
|
|
26
|
-
"make_import_bundle_from_files",
|
|
27
|
-
]
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from genelastic.common.elastic import ElasticQueryConn
|
|
4
|
+
from genelastic.import_data.checker_observer import CheckerObserver
|
|
5
|
+
from genelastic.import_data.models.analyses import Analyses
|
|
6
|
+
from genelastic.import_data.models.processes import Processes
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger("genelastic")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Checker:
|
|
12
|
+
"""Validate coherence between YAML metadata and Elasticsearch,
|
|
13
|
+
using a project-specific observer mechanism.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, es: ElasticQueryConn, *, strict: bool = False) -> None:
|
|
17
|
+
"""Initialize the Checker.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
es: Elasticsearch connection instance.
|
|
21
|
+
strict: Treat ES-only entries as errors when True.
|
|
22
|
+
"""
|
|
23
|
+
self.es = es
|
|
24
|
+
self.strict = strict
|
|
25
|
+
self.errors_detected = False
|
|
26
|
+
self._observers: list[CheckerObserver] = []
|
|
27
|
+
|
|
28
|
+
def attach(self, observer: CheckerObserver) -> None:
|
|
29
|
+
"""Register an observer to receive Checker notifications."""
|
|
30
|
+
self._observers.append(observer)
|
|
31
|
+
|
|
32
|
+
def detach(self, observer: CheckerObserver) -> None:
|
|
33
|
+
"""Unregister an observer so it no longer receives notifications."""
|
|
34
|
+
self._observers.remove(observer)
|
|
35
|
+
|
|
36
|
+
def _notify_missing(self, label: str, missing: list[str]) -> None:
|
|
37
|
+
"""Notify observers about missing IDs."""
|
|
38
|
+
self.errors_detected = True
|
|
39
|
+
for obs in self._observers:
|
|
40
|
+
obs.notify_missing(label, missing)
|
|
41
|
+
|
|
42
|
+
def _notify_extra(self, label: str, extra: list[str]) -> None:
|
|
43
|
+
"""Notify observers about extra IDs."""
|
|
44
|
+
self.errors_detected = True
|
|
45
|
+
for obs in self._observers:
|
|
46
|
+
obs.notify_extra(label, extra)
|
|
47
|
+
|
|
48
|
+
def _check_generic(
|
|
49
|
+
self, label: str, ids_yaml: set[str], ids_es: set[str]
|
|
50
|
+
) -> None:
|
|
51
|
+
"""Compare YAML IDs vs Elasticsearch IDs for a given entity type."""
|
|
52
|
+
logger.info("Checking %s...", label)
|
|
53
|
+
|
|
54
|
+
missing = sorted(ids_yaml - ids_es)
|
|
55
|
+
extra = sorted(ids_es - ids_yaml)
|
|
56
|
+
|
|
57
|
+
if missing:
|
|
58
|
+
logger.error("Missing %s in ES: %s", label, missing)
|
|
59
|
+
self._notify_missing(label, missing)
|
|
60
|
+
|
|
61
|
+
if extra:
|
|
62
|
+
if self.strict:
|
|
63
|
+
logger.error(
|
|
64
|
+
"%s in ES but missing from YAML: %s",
|
|
65
|
+
label.capitalize(),
|
|
66
|
+
extra,
|
|
67
|
+
)
|
|
68
|
+
self._notify_extra(label, extra)
|
|
69
|
+
else:
|
|
70
|
+
logger.info("Extra %s ignored (non-strict mode).", label)
|
|
71
|
+
|
|
72
|
+
if not missing and (self.strict and not extra):
|
|
73
|
+
logger.info("OK ✓ All %s match exactly.", label)
|
|
74
|
+
elif not missing and not self.strict:
|
|
75
|
+
logger.info("OK ✓ YAML %s present (extra ignored).", label)
|
|
76
|
+
|
|
77
|
+
def check_analyses(self, analyses: Analyses) -> None:
|
|
78
|
+
"""Check analysis IDs between YAML and Elasticsearch."""
|
|
79
|
+
ids_yaml = {a.id for a in analyses}
|
|
80
|
+
ids_es = set(
|
|
81
|
+
self.es.get_field_values(self.es.data_files_index, "analysis_id")
|
|
82
|
+
)
|
|
83
|
+
self._check_generic("analyses", ids_yaml, ids_es)
|
|
84
|
+
|
|
85
|
+
def check_wet_processes(self, processes: Processes) -> None:
|
|
86
|
+
"""Check wet process IDs between YAML and Elasticsearch."""
|
|
87
|
+
ids_yaml = set(processes.keys())
|
|
88
|
+
ids_es = set(
|
|
89
|
+
self.es.get_field_values(self.es.wet_processes_index, "proc_id")
|
|
90
|
+
)
|
|
91
|
+
self._check_generic("wet processes", ids_yaml, ids_es)
|
|
92
|
+
|
|
93
|
+
def check_bi_processes(self, processes: Processes) -> None:
|
|
94
|
+
"""Check biological process IDs between YAML and Elasticsearch."""
|
|
95
|
+
ids_yaml = set(processes.keys())
|
|
96
|
+
ids_es = set(
|
|
97
|
+
self.es.get_field_values(self.es.bi_processes_index, "proc_id")
|
|
98
|
+
)
|
|
99
|
+
self._check_generic("bi processes", ids_yaml, ids_es)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from typing import Protocol
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class CheckerObserver(Protocol):
|
|
5
|
+
"""Protocol for classes observing Checker events."""
|
|
6
|
+
|
|
7
|
+
def notify_missing(self, label: str, missing: list[str]) -> None:
|
|
8
|
+
"""Called when expected IDs are missing in Elasticsearch."""
|
|
9
|
+
...
|
|
10
|
+
|
|
11
|
+
def notify_extra(self, label: str, extra: list[str]) -> None:
|
|
12
|
+
"""Called when unexpected IDs exist in Elasticsearch."""
|
|
13
|
+
...
|
|
File without changes
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import logging
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from genelastic.common.cli import (
|
|
7
|
+
add_es_connection_args,
|
|
8
|
+
add_verbose_control_args,
|
|
9
|
+
add_version_arg,
|
|
10
|
+
)
|
|
11
|
+
from genelastic.common.elastic import ElasticQueryConn
|
|
12
|
+
from genelastic.import_data.checker import Checker
|
|
13
|
+
from genelastic.import_data.import_bundle_factory import (
|
|
14
|
+
make_import_bundle_from_files,
|
|
15
|
+
)
|
|
16
|
+
from genelastic.import_data.logger import configure_logging
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger("genelastic")
|
|
19
|
+
logging.getLogger("elastic_transport").setLevel(logging.WARNING)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class CLICheckObserver:
|
|
23
|
+
"""Observer used by the CLI to log Checker errors."""
|
|
24
|
+
|
|
25
|
+
def __init__(self) -> None:
|
|
26
|
+
self._logger = logger
|
|
27
|
+
|
|
28
|
+
def notify_missing(self, label: str, missing: list[str]) -> None:
|
|
29
|
+
"""Handle missing IDs by logging an error."""
|
|
30
|
+
self._logger.error("[CHECKER] Missing %s in ES: %s", label, missing)
|
|
31
|
+
|
|
32
|
+
def notify_extra(self, label: str, extra: list[str]) -> None:
|
|
33
|
+
"""Handle extra IDs by logging an error."""
|
|
34
|
+
self._logger.error("[CHECKER] Extra %s in ES: %s", label, extra)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def read_args() -> argparse.Namespace:
|
|
38
|
+
parser = argparse.ArgumentParser(
|
|
39
|
+
description="Check database coherency against one or more YAML bundles.",
|
|
40
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
add_version_arg(parser)
|
|
44
|
+
add_verbose_control_args(parser)
|
|
45
|
+
add_es_connection_args(parser)
|
|
46
|
+
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"files",
|
|
49
|
+
type=Path,
|
|
50
|
+
nargs="+",
|
|
51
|
+
help="Paths to YAML bundle files to validate.",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
parser.add_argument(
|
|
55
|
+
"--strict",
|
|
56
|
+
action="store_true",
|
|
57
|
+
help=(
|
|
58
|
+
"Enable strict mode: also report entries present in Elasticsearch "
|
|
59
|
+
"but missing from YAML bundles."
|
|
60
|
+
),
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
parser.add_argument(
|
|
64
|
+
"-A",
|
|
65
|
+
"--check-analyses",
|
|
66
|
+
action="store_true",
|
|
67
|
+
help="Check only analyses coherence.",
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"-W",
|
|
72
|
+
"--check-wet",
|
|
73
|
+
action="store_true",
|
|
74
|
+
help="Check only wet processes coherence.",
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
"-B",
|
|
79
|
+
"--check-bi",
|
|
80
|
+
action="store_true",
|
|
81
|
+
help="Check only biological processes coherence.",
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"-X",
|
|
86
|
+
"--all",
|
|
87
|
+
action="store_true",
|
|
88
|
+
help="Check all entities (analyses, wet processes and bi processes).",
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
return parser.parse_args()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def main() -> None:
|
|
95
|
+
args = read_args()
|
|
96
|
+
configure_logging(args.verbose)
|
|
97
|
+
|
|
98
|
+
logger.info(
|
|
99
|
+
"Connecting to Elasticsearch at https://%s:%s ...",
|
|
100
|
+
args.es_host,
|
|
101
|
+
args.es_port,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
es = ElasticQueryConn(
|
|
105
|
+
f"https://{args.es_host}:{args.es_port}",
|
|
106
|
+
args.es_cert_fp,
|
|
107
|
+
args.es_index_prefix,
|
|
108
|
+
basic_auth=(args.es_usr, args.es_pwd),
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
import_bundle = make_import_bundle_from_files(args.files)
|
|
112
|
+
|
|
113
|
+
checker = Checker(es, strict=args.strict)
|
|
114
|
+
checker.attach(CLICheckObserver())
|
|
115
|
+
|
|
116
|
+
run_all = args.all or not (
|
|
117
|
+
args.check_analyses or args.check_wet or args.check_bi
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if args.check_analyses or run_all:
|
|
121
|
+
checker.check_analyses(import_bundle.analyses)
|
|
122
|
+
|
|
123
|
+
if args.check_wet or run_all:
|
|
124
|
+
checker.check_wet_processes(import_bundle.wet_processes)
|
|
125
|
+
|
|
126
|
+
if args.check_bi or run_all:
|
|
127
|
+
checker.check_bi_processes(import_bundle.bi_processes)
|
|
128
|
+
|
|
129
|
+
if checker.errors_detected:
|
|
130
|
+
sys.exit(1)
|
|
131
|
+
|
|
132
|
+
sys.exit(0)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
if __name__ == "__main__":
|
|
136
|
+
main()
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import logging
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from biophony import DEFAULT_RATE, MutSimParams
|
|
6
|
+
|
|
7
|
+
from genelastic.common.cli import add_verbose_control_args, add_version_arg
|
|
8
|
+
from genelastic.import_data.logger import configure_logging
|
|
9
|
+
from genelastic.import_data.random_bundle import (
|
|
10
|
+
RandomBundle,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger("genelastic")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def read_args() -> argparse.Namespace:
|
|
17
|
+
"""Read arguments from the command line."""
|
|
18
|
+
parser = argparse.ArgumentParser(
|
|
19
|
+
description="Random bundle generator. "
|
|
20
|
+
"A bundle is a YAML file format used to import genetic data into an Elasticsearch database. "
|
|
21
|
+
"It can contain one or more analyses; "
|
|
22
|
+
"each analysis including metadata, references to "
|
|
23
|
+
"a wet lab and bioinformatics process "
|
|
24
|
+
"and paths to a VCF file and optionally to a coverage file.",
|
|
25
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
26
|
+
allow_abbrev=False,
|
|
27
|
+
)
|
|
28
|
+
add_version_arg(parser)
|
|
29
|
+
add_verbose_control_args(parser)
|
|
30
|
+
parser.add_argument(
|
|
31
|
+
"output_dir",
|
|
32
|
+
help="Path where analyses VCF and coverage files will be generated.",
|
|
33
|
+
type=Path,
|
|
34
|
+
)
|
|
35
|
+
parser.add_argument("--log-file", help="Path to a log file.")
|
|
36
|
+
parser.add_argument(
|
|
37
|
+
"-n",
|
|
38
|
+
"--chrom-nb",
|
|
39
|
+
type=int,
|
|
40
|
+
default=5,
|
|
41
|
+
help="Number of chromosomes to include in the generated VCF file.",
|
|
42
|
+
)
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"-o",
|
|
45
|
+
"--output-bundle",
|
|
46
|
+
default=None,
|
|
47
|
+
help="Path where the YAML bundle file will be written. "
|
|
48
|
+
"If no path is provided, the bundle is written to stdout.",
|
|
49
|
+
type=Path,
|
|
50
|
+
)
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"-l",
|
|
53
|
+
"--sequence-length",
|
|
54
|
+
type=int,
|
|
55
|
+
default=2000,
|
|
56
|
+
help="Sequence length (number of nucleotides) generated for each chromosome.",
|
|
57
|
+
)
|
|
58
|
+
parser.add_argument(
|
|
59
|
+
"-c",
|
|
60
|
+
"--coverage",
|
|
61
|
+
action="store_true",
|
|
62
|
+
help="Generate a coverage file for each analysis.",
|
|
63
|
+
)
|
|
64
|
+
parser.add_argument(
|
|
65
|
+
"-a",
|
|
66
|
+
"--analyses",
|
|
67
|
+
help="Number of analyses to generate. "
|
|
68
|
+
"Each analysis will reference a wet lab and bioinformatics process, "
|
|
69
|
+
"a VCF file and optionally a coverage file.",
|
|
70
|
+
default=1,
|
|
71
|
+
type=int,
|
|
72
|
+
)
|
|
73
|
+
parser.add_argument(
|
|
74
|
+
"-p",
|
|
75
|
+
"--processes",
|
|
76
|
+
help="Number of wet lab and bioinformatics processes to generate.",
|
|
77
|
+
default=1,
|
|
78
|
+
type=int,
|
|
79
|
+
)
|
|
80
|
+
parser.add_argument(
|
|
81
|
+
"-s",
|
|
82
|
+
"--snp-rate",
|
|
83
|
+
help="Generated VCF SNP rate.",
|
|
84
|
+
type=float,
|
|
85
|
+
default=DEFAULT_RATE,
|
|
86
|
+
)
|
|
87
|
+
parser.add_argument(
|
|
88
|
+
"-i",
|
|
89
|
+
"--ins-rate",
|
|
90
|
+
help="Generated VCF insertion rate.",
|
|
91
|
+
type=float,
|
|
92
|
+
default=DEFAULT_RATE,
|
|
93
|
+
)
|
|
94
|
+
parser.add_argument(
|
|
95
|
+
"-d",
|
|
96
|
+
"--del-rate",
|
|
97
|
+
help="Generated VCF deletion rate.",
|
|
98
|
+
type=float,
|
|
99
|
+
default=DEFAULT_RATE,
|
|
100
|
+
)
|
|
101
|
+
return parser.parse_args()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def main() -> None:
|
|
105
|
+
"""Entry point of the gen-data script."""
|
|
106
|
+
# Read command line arguments
|
|
107
|
+
args = read_args()
|
|
108
|
+
output_dir = args.output_dir.resolve()
|
|
109
|
+
|
|
110
|
+
if not output_dir.is_dir():
|
|
111
|
+
msg = f"ERROR: '{output_dir}' does not exist or is not a directory."
|
|
112
|
+
raise SystemExit(msg)
|
|
113
|
+
|
|
114
|
+
if args.analyses < 1:
|
|
115
|
+
msg = "Analyses count must be at least 1."
|
|
116
|
+
raise SystemExit(msg)
|
|
117
|
+
|
|
118
|
+
if args.processes < 1:
|
|
119
|
+
msg = "Processes count must be at least 1."
|
|
120
|
+
raise SystemExit(msg)
|
|
121
|
+
|
|
122
|
+
# Configure logging
|
|
123
|
+
configure_logging(args.verbose, log_file=args.log_file)
|
|
124
|
+
logger.debug("Arguments: %s", args)
|
|
125
|
+
|
|
126
|
+
# Write to stdout or file
|
|
127
|
+
RandomBundle(
|
|
128
|
+
output_dir,
|
|
129
|
+
args.analyses,
|
|
130
|
+
args.processes,
|
|
131
|
+
args.chrom_nb,
|
|
132
|
+
args.sequence_length,
|
|
133
|
+
MutSimParams(
|
|
134
|
+
snp_rate=args.snp_rate,
|
|
135
|
+
ins_rate=args.ins_rate,
|
|
136
|
+
del_rate=args.del_rate,
|
|
137
|
+
),
|
|
138
|
+
do_gen_coverage=args.coverage,
|
|
139
|
+
).to_yaml(args.output_bundle)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
if __name__ == "__main__":
|
|
143
|
+
main()
|