pmotools 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pmotools/__init__.py +26 -0
- pmotools/cli.py +374 -0
- pmotools/pmo_builder/__init__.py +0 -0
- pmotools/pmo_builder/json_convert_utils.py +5 -0
- pmotools/pmo_builder/merge_to_pmo.py +209 -0
- pmotools/pmo_builder/metatable_to_pmo.py +443 -0
- pmotools/pmo_builder/mhap_table_to_pmo.py +413 -0
- pmotools/pmo_builder/panel_information_to_pmo.py +385 -0
- pmotools/pmo_engine/__init__.py +0 -0
- pmotools/pmo_engine/pmo_checker.py +64 -0
- pmotools/pmo_engine/pmo_processor.py +1487 -0
- pmotools/pmo_engine/pmo_reader.py +541 -0
- pmotools/pmo_engine/pmo_writer.py +52 -0
- pmotools/schemas/portable_microhaplotype_object_v0.1.0.schema.json +1822 -0
- pmotools/scripts/__init__.py +0 -0
- pmotools/scripts/convertors_to_pmo/__init__.py +0 -0
- pmotools/scripts/convertors_to_pmo/excel_meta_to_json_meta.py +54 -0
- pmotools/scripts/convertors_to_pmo/microhaplotype_table_to_json_file.py +102 -0
- pmotools/scripts/convertors_to_pmo/terra_amp_output_to_json.py +192 -0
- pmotools/scripts/convertors_to_pmo/text_meta_to_json_meta.py +54 -0
- pmotools/scripts/extract_info_from_pmo/__init__.py +0 -0
- pmotools/scripts/extract_info_from_pmo/count_library_samples_per_target.py +69 -0
- pmotools/scripts/extract_info_from_pmo/count_specimen_meta.py +69 -0
- pmotools/scripts/extract_info_from_pmo/count_targets_per_library_sample.py +69 -0
- pmotools/scripts/extract_info_from_pmo/extract_insert_of_panels.py +78 -0
- pmotools/scripts/extract_info_from_pmo/extract_refseq_of_inserts_of_panels.py +43 -0
- pmotools/scripts/extract_info_from_pmo/list_bioinformatics_run_names.py +43 -0
- pmotools/scripts/extract_info_from_pmo/list_library_sample_names_per_specimen_name.py +60 -0
- pmotools/scripts/extract_info_from_pmo/list_specimen_meta_fields.py +60 -0
- pmotools/scripts/extractors_from_pmo/__init__.py +0 -0
- pmotools/scripts/extractors_from_pmo/extract_allele_table.py +153 -0
- pmotools/scripts/extractors_from_pmo/extract_pmo_with_read_filter.py +52 -0
- pmotools/scripts/extractors_from_pmo/extract_pmo_with_select_library_sample_names.py +61 -0
- pmotools/scripts/extractors_from_pmo/extract_pmo_with_select_specimen_names.py +57 -0
- pmotools/scripts/extractors_from_pmo/extract_pmo_with_select_targets.py +57 -0
- pmotools/scripts/extractors_from_pmo/extract_pmo_with_selected_meta.py +63 -0
- pmotools/scripts/pmo_utils/__init__.py +0 -0
- pmotools/scripts/pmo_utils/combine_pmos.py +57 -0
- pmotools/scripts/pmo_utils/validate_pmo.py +47 -0
- pmotools/utils/__init__.py +0 -0
- pmotools/utils/color_text.py +153 -0
- pmotools/utils/schema_loader.py +29 -0
- pmotools/utils/small_utils.py +399 -0
- pmotools-0.1.0.dist-info/METADATA +794 -0
- pmotools-0.1.0.dist-info/RECORD +47 -0
- pmotools-0.1.0.dist-info/WHEEL +4 -0
- pmotools-0.1.0.dist-info/entry_points.txt +3 -0
pmotools/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# pmotools/__init__.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
# Python 3.8+
|
|
6
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
7
|
+
except Exception: # pragma: no cover
|
|
8
|
+
# Very old Pythons can fallback to pkg_resources if you ever needed it
|
|
9
|
+
from pkg_resources import get_distribution as _gd # type: ignore
|
|
10
|
+
|
|
11
|
+
class PackageNotFoundError(Exception):
|
|
12
|
+
...
|
|
13
|
+
|
|
14
|
+
def version(pkg: str) -> str: # type: ignore
|
|
15
|
+
try:
|
|
16
|
+
return _gd(pkg).version
|
|
17
|
+
except Exception as e: # noqa: BLE001
|
|
18
|
+
raise PackageNotFoundError from e
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
# Use the installed distribution name (matches [project].name)
|
|
23
|
+
__version__ = version("pmotools")
|
|
24
|
+
except PackageNotFoundError:
|
|
25
|
+
# When running from a source tree without being installed
|
|
26
|
+
__version__ = "0+local"
|
pmotools/cli.py
ADDED
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Callable, Dict, Tuple
|
|
6
|
+
|
|
7
|
+
from pmotools import __version__
|
|
8
|
+
from pmotools.utils.color_text import ColorText as CT
|
|
9
|
+
|
|
10
|
+
# convertors_to_pmo
|
|
11
|
+
from pmotools.scripts.convertors_to_pmo.text_meta_to_json_meta import (
|
|
12
|
+
text_meta_to_json_meta,
|
|
13
|
+
)
|
|
14
|
+
from pmotools.scripts.convertors_to_pmo.excel_meta_to_json_meta import (
|
|
15
|
+
excel_meta_to_json_meta,
|
|
16
|
+
)
|
|
17
|
+
from pmotools.scripts.convertors_to_pmo.microhaplotype_table_to_json_file import (
|
|
18
|
+
microhaplotype_table_to_json_file,
|
|
19
|
+
)
|
|
20
|
+
from pmotools.scripts.convertors_to_pmo.terra_amp_output_to_json import (
|
|
21
|
+
terra_amp_output_to_json,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# extractors_from_pmo
|
|
25
|
+
from pmotools.scripts.extractors_from_pmo.extract_pmo_with_selected_meta import (
|
|
26
|
+
extract_pmo_with_selected_meta,
|
|
27
|
+
)
|
|
28
|
+
from pmotools.scripts.extractors_from_pmo.extract_pmo_with_select_specimen_names import (
|
|
29
|
+
extract_pmo_with_select_specimen_names,
|
|
30
|
+
)
|
|
31
|
+
from pmotools.scripts.extractors_from_pmo.extract_pmo_with_select_library_sample_names import (
|
|
32
|
+
extract_pmo_with_select_library_sample_names,
|
|
33
|
+
)
|
|
34
|
+
from pmotools.scripts.extractors_from_pmo.extract_pmo_with_select_targets import (
|
|
35
|
+
extract_pmo_with_select_targets,
|
|
36
|
+
)
|
|
37
|
+
from pmotools.scripts.extractors_from_pmo.extract_pmo_with_read_filter import (
|
|
38
|
+
extract_pmo_with_read_filter,
|
|
39
|
+
)
|
|
40
|
+
from pmotools.scripts.extractors_from_pmo.extract_allele_table import (
|
|
41
|
+
extract_for_allele_table,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# pmo_utils
|
|
45
|
+
from pmotools.scripts.pmo_utils.combine_pmos import combine_pmos
|
|
46
|
+
from pmotools.scripts.pmo_utils.validate_pmo import validate_pmo
|
|
47
|
+
|
|
48
|
+
# extract_info_from_pmo
|
|
49
|
+
from pmotools.scripts.extract_info_from_pmo.list_library_sample_names_per_specimen_name import (
|
|
50
|
+
list_library_sample_names_per_specimen_name,
|
|
51
|
+
)
|
|
52
|
+
from pmotools.scripts.extract_info_from_pmo.list_specimen_meta_fields import (
|
|
53
|
+
list_specimen_meta_fields,
|
|
54
|
+
)
|
|
55
|
+
from pmotools.scripts.extract_info_from_pmo.list_bioinformatics_run_names import (
|
|
56
|
+
list_bioinformatics_run_names,
|
|
57
|
+
)
|
|
58
|
+
from pmotools.scripts.extract_info_from_pmo.count_specimen_meta import (
|
|
59
|
+
count_specimen_meta,
|
|
60
|
+
)
|
|
61
|
+
from pmotools.scripts.extract_info_from_pmo.count_targets_per_library_sample import (
|
|
62
|
+
count_targets_per_library_sample,
|
|
63
|
+
)
|
|
64
|
+
from pmotools.scripts.extract_info_from_pmo.count_library_samples_per_target import (
|
|
65
|
+
count_library_samples_per_target,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# panel info subset
|
|
69
|
+
from pmotools.scripts.extract_info_from_pmo.extract_insert_of_panels import (
|
|
70
|
+
extract_insert_of_panels,
|
|
71
|
+
)
|
|
72
|
+
from pmotools.scripts.extract_info_from_pmo.extract_refseq_of_inserts_of_panels import (
|
|
73
|
+
extract_refseq_of_inserts_of_panels,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass(frozen=True)
|
|
78
|
+
class PmoCommand:
|
|
79
|
+
func: Callable[[], None]
|
|
80
|
+
help: str
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
REGISTRY: Dict[str, Dict[str, PmoCommand]] = {
|
|
84
|
+
"convertors_to_json": {
|
|
85
|
+
"text_meta_to_json_meta": PmoCommand(
|
|
86
|
+
text_meta_to_json_meta, "Convert text file meta to JSON Meta"
|
|
87
|
+
),
|
|
88
|
+
"excel_meta_to_json_meta": PmoCommand(
|
|
89
|
+
excel_meta_to_json_meta, "Convert Excel file meta to JSON Meta"
|
|
90
|
+
),
|
|
91
|
+
"microhaplotype_table_to_json_file": PmoCommand(
|
|
92
|
+
microhaplotype_table_to_json_file,
|
|
93
|
+
"Convert microhaplotype table to a JSON file",
|
|
94
|
+
),
|
|
95
|
+
"terra_amp_output_to_json": PmoCommand(
|
|
96
|
+
terra_amp_output_to_json, "Convert Terra output to JSON sequence table"
|
|
97
|
+
),
|
|
98
|
+
},
|
|
99
|
+
"extractors_from_pmo": {
|
|
100
|
+
"extract_pmo_with_selected_meta": PmoCommand(
|
|
101
|
+
extract_pmo_with_selected_meta,
|
|
102
|
+
"Extract samples + haplotypes using selected meta",
|
|
103
|
+
),
|
|
104
|
+
"extract_pmo_with_select_specimen_names": PmoCommand(
|
|
105
|
+
extract_pmo_with_select_specimen_names,
|
|
106
|
+
"Extract specific samples from the specimens table",
|
|
107
|
+
),
|
|
108
|
+
"extract_pmo_with_select_library_sample_names": PmoCommand(
|
|
109
|
+
extract_pmo_with_select_library_sample_names,
|
|
110
|
+
"Extract experiment sample names from experiment_info table",
|
|
111
|
+
),
|
|
112
|
+
"extract_pmo_with_select_targets": PmoCommand(
|
|
113
|
+
extract_pmo_with_select_targets, "Extract specific targets"
|
|
114
|
+
),
|
|
115
|
+
"extract_pmo_with_read_filter": PmoCommand(
|
|
116
|
+
extract_pmo_with_read_filter, "Extract with a read filter"
|
|
117
|
+
),
|
|
118
|
+
"extract_allele_table": PmoCommand(
|
|
119
|
+
extract_for_allele_table,
|
|
120
|
+
"Extract allele tables for tools like dcifer or moire",
|
|
121
|
+
),
|
|
122
|
+
"extract_insert_of_panels": PmoCommand(
|
|
123
|
+
extract_insert_of_panels, "Extract inserts of panels from a PMO"
|
|
124
|
+
),
|
|
125
|
+
"extract_refseq_of_inserts_of_panels": PmoCommand(
|
|
126
|
+
extract_refseq_of_inserts_of_panels,
|
|
127
|
+
"Extract ref_seq of panel inserts from a PMO",
|
|
128
|
+
),
|
|
129
|
+
},
|
|
130
|
+
"working_with_multiple_pmos": {
|
|
131
|
+
"combine_pmos": PmoCommand(
|
|
132
|
+
combine_pmos, "Combine multiple PMOs of the same panel"
|
|
133
|
+
),
|
|
134
|
+
},
|
|
135
|
+
"extract_basic_info_from_pmo": {
|
|
136
|
+
"list_library_sample_names_per_specimen_name": PmoCommand(
|
|
137
|
+
list_library_sample_names_per_specimen_name,
|
|
138
|
+
"List experiment_sample_ids per specimen_id",
|
|
139
|
+
),
|
|
140
|
+
"list_specimen_meta_fields": PmoCommand(
|
|
141
|
+
list_specimen_meta_fields,
|
|
142
|
+
"List specimen meta fields in the specimen_info section",
|
|
143
|
+
),
|
|
144
|
+
"list_bioinformatics_run_names": PmoCommand(
|
|
145
|
+
list_bioinformatics_run_names,
|
|
146
|
+
"List all tar_amp_bioinformatics_info_ids in a PMO",
|
|
147
|
+
),
|
|
148
|
+
"count_specimen_meta": PmoCommand(
|
|
149
|
+
count_specimen_meta, "Count values of selected specimen meta fields"
|
|
150
|
+
),
|
|
151
|
+
"count_targets_per_library_sample": PmoCommand(
|
|
152
|
+
count_targets_per_library_sample, "Count number of targets per sample"
|
|
153
|
+
),
|
|
154
|
+
"count_library_samples_per_target": PmoCommand(
|
|
155
|
+
count_library_samples_per_target, "Count number of samples per target"
|
|
156
|
+
),
|
|
157
|
+
},
|
|
158
|
+
"validation": {
|
|
159
|
+
"validate_pmo": PmoCommand(
|
|
160
|
+
validate_pmo, "Validate a PMO file against a JSON Schema"
|
|
161
|
+
)
|
|
162
|
+
},
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _iter_all_commands():
|
|
167
|
+
for group, commands in REGISTRY.items():
|
|
168
|
+
for name, cmd in commands.items():
|
|
169
|
+
yield group, name, cmd.help
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _print_catalog_plain():
|
|
173
|
+
"""
|
|
174
|
+
Print commands in a machine-friendly, no-color format:
|
|
175
|
+
'<command>\t<group>\t<help>'
|
|
176
|
+
One per line; used by bash completion.
|
|
177
|
+
"""
|
|
178
|
+
import sys
|
|
179
|
+
|
|
180
|
+
for group, name, cmdhelp in _iter_all_commands():
|
|
181
|
+
sys.stdout.write(f"{name}\t{group}\t{cmdhelp}\n")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _print_catalog() -> None:
|
|
185
|
+
"""Print all groups and their commands like your previous version."""
|
|
186
|
+
import sys
|
|
187
|
+
|
|
188
|
+
sys.stdout.write(
|
|
189
|
+
f"pmotools-python v{__version__} - A suite of tools for interacting with "
|
|
190
|
+
+ CT.boldGreen("Portable Microhaplotype Object (PMO)")
|
|
191
|
+
+ " file format\n\n"
|
|
192
|
+
)
|
|
193
|
+
sys.stdout.write("Available functions organized by groups are\n")
|
|
194
|
+
for group, commands in REGISTRY.items():
|
|
195
|
+
sys.stdout.write(CT.boldBlue(group) + "\n")
|
|
196
|
+
for name, cmd in commands.items():
|
|
197
|
+
sys.stdout.write(f"\t{name} - {cmd.help}\n")
|
|
198
|
+
sys.stdout.write("\n")
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _print_group(group: str) -> int:
|
|
202
|
+
"""Print a single group's commands (blue header) if it exists."""
|
|
203
|
+
import sys
|
|
204
|
+
|
|
205
|
+
if group not in REGISTRY:
|
|
206
|
+
sys.stdout.write(
|
|
207
|
+
CT.boldRed("Did not find group ") + CT.boldWhite(group) + "\n\n"
|
|
208
|
+
)
|
|
209
|
+
_print_catalog()
|
|
210
|
+
return 2
|
|
211
|
+
|
|
212
|
+
sys.stdout.write(CT.boldBlue(group) + "\n")
|
|
213
|
+
for name, cmd in REGISTRY[group].items():
|
|
214
|
+
sys.stdout.write(f"\t{name} - {cmd.help}\n")
|
|
215
|
+
sys.stdout.write("\n")
|
|
216
|
+
return 0
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _print_bash_completion():
|
|
220
|
+
# NOTE: this uses --list-plain to avoid ANSI color parsing and be stable.
|
|
221
|
+
script = r"""# bash completion for pmotools-python
|
|
222
|
+
# add the below to your ~/.bash_completion
|
|
223
|
+
|
|
224
|
+
_pmotools_python_complete()
|
|
225
|
+
{
|
|
226
|
+
local cur prev
|
|
227
|
+
COMPREPLY=()
|
|
228
|
+
cur="${COMP_WORDS[COMP_CWORD]}"
|
|
229
|
+
prev="${COMP_WORDS[COMP_CWORD-1]}"
|
|
230
|
+
|
|
231
|
+
# 1) Completing the command name (1st arg): list all commands
|
|
232
|
+
if [[ ${COMP_CWORD} -eq 1 ]]; then
|
|
233
|
+
# Our CLI prints machine-friendly list via --list-plain:
|
|
234
|
+
# "<command>\t<group>\t<help>"
|
|
235
|
+
local lines cmds
|
|
236
|
+
lines="$(${COMP_WORDS[0]} --list-plain 2>/dev/null)"
|
|
237
|
+
cmds="$(printf '%s\n' "${lines}" | awk -F'\t' '{print $1}')"
|
|
238
|
+
COMPREPLY=( $(compgen -W "${cmds}" -- "${cur}") )
|
|
239
|
+
return 0
|
|
240
|
+
fi
|
|
241
|
+
|
|
242
|
+
# 2) Completing flags for a leaf command: scrape leaf -h
|
|
243
|
+
if [[ "${cur}" == -* ]]; then
|
|
244
|
+
local helps opts
|
|
245
|
+
helps="$(${COMP_WORDS[0]} ${COMP_WORDS[1]} -h 2>/dev/null)"
|
|
246
|
+
# Pull out flag tokens and split comma-separated forms
|
|
247
|
+
opts="$(printf '%s\n' "${helps}" \
|
|
248
|
+
| sed -n 's/^[[:space:]]\{0,\}\(-[-[:alnum:]][-[:alnum:]]*\)\(, *-[[:alnum:]][-[:alnum:]]*\)\{0,\}.*/\1/p' \
|
|
249
|
+
| sed 's/, / /g')"
|
|
250
|
+
COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") )
|
|
251
|
+
return 0
|
|
252
|
+
fi
|
|
253
|
+
|
|
254
|
+
# 3) Otherwise, fall back to filename completion for positional args
|
|
255
|
+
COMPREPLY=( $(compgen -f -- "${cur}") )
|
|
256
|
+
return 0
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
complete -F _pmotools_python_complete pmotools-python
|
|
260
|
+
"""
|
|
261
|
+
import sys
|
|
262
|
+
|
|
263
|
+
sys.stdout.write(script)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _build_parser() -> (
|
|
267
|
+
Tuple[argparse.ArgumentParser, Dict[str, Tuple[str, PmoCommand]]]
|
|
268
|
+
):
|
|
269
|
+
"""
|
|
270
|
+
Build a flat CLI:
|
|
271
|
+
pmotools-python <command> [args...]
|
|
272
|
+
Returns the parser and an index mapping command_name -> (group, PmoCommand)
|
|
273
|
+
"""
|
|
274
|
+
description = (
|
|
275
|
+
f"pmotools-python v{__version__} – A suite of tools for interacting with "
|
|
276
|
+
f"{CT.boldGreen('Portable Microhaplotype Object (PMO)')} files"
|
|
277
|
+
)
|
|
278
|
+
parser = argparse.ArgumentParser(
|
|
279
|
+
prog="pmotools-python",
|
|
280
|
+
description=description,
|
|
281
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
282
|
+
)
|
|
283
|
+
parser.add_argument(
|
|
284
|
+
"--list-plain",
|
|
285
|
+
action="store_true",
|
|
286
|
+
help=argparse.SUPPRESS, # keep it hidden; for completion script
|
|
287
|
+
)
|
|
288
|
+
parser.add_argument(
|
|
289
|
+
"--bash-completion",
|
|
290
|
+
action="store_true",
|
|
291
|
+
help="Print bash completion script for pmotools-python",
|
|
292
|
+
)
|
|
293
|
+
parser.add_argument(
|
|
294
|
+
"-V", "--version", action="version", version=f"%(prog)s {__version__}"
|
|
295
|
+
)
|
|
296
|
+
parser.add_argument(
|
|
297
|
+
"--list",
|
|
298
|
+
nargs="?",
|
|
299
|
+
const="__ALL__",
|
|
300
|
+
metavar="[group]",
|
|
301
|
+
help="List all commands, or only those within a specific group",
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
subparsers = parser.add_subparsers(
|
|
305
|
+
title="Commands", dest="command", metavar="<command>"
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
command_index: Dict[str, Tuple[str, PmoCommand]] = {}
|
|
309
|
+
|
|
310
|
+
for group, commands in REGISTRY.items():
|
|
311
|
+
for cmd_name, cmd in commands.items():
|
|
312
|
+
if cmd_name in command_index:
|
|
313
|
+
# Hard fail early if duplicate command names exist across groups
|
|
314
|
+
raise RuntimeError(
|
|
315
|
+
f"Duplicate command name detected: '{cmd_name}'. "
|
|
316
|
+
f"Please rename one of the commands or add an alias."
|
|
317
|
+
)
|
|
318
|
+
sp = subparsers.add_parser(
|
|
319
|
+
cmd_name,
|
|
320
|
+
help=f"{cmd.help} [{group}]",
|
|
321
|
+
description=f"{cmd.help} (group: {group})",
|
|
322
|
+
add_help=False,
|
|
323
|
+
)
|
|
324
|
+
sp.set_defaults(_handler=cmd.func, _group=group, _cmd_name=cmd_name)
|
|
325
|
+
command_index[cmd_name] = (group, cmd)
|
|
326
|
+
|
|
327
|
+
return parser, command_index
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def main(argv: list[str] | None = None) -> int:
|
|
331
|
+
parser, command_index = _build_parser()
|
|
332
|
+
args, unknown = parser.parse_known_args(argv)
|
|
333
|
+
|
|
334
|
+
if getattr(args, "bash_completion", False):
|
|
335
|
+
_print_bash_completion()
|
|
336
|
+
return 0
|
|
337
|
+
|
|
338
|
+
if getattr(args, "list_plain", False):
|
|
339
|
+
_print_catalog_plain()
|
|
340
|
+
return 0
|
|
341
|
+
|
|
342
|
+
if getattr(args, "list", None):
|
|
343
|
+
group = args.list
|
|
344
|
+
if group == "__ALL__":
|
|
345
|
+
_print_catalog()
|
|
346
|
+
return 0
|
|
347
|
+
else:
|
|
348
|
+
return _print_group(group)
|
|
349
|
+
|
|
350
|
+
# No command provided: show the catalog
|
|
351
|
+
if not getattr(args, "command", None):
|
|
352
|
+
_print_catalog()
|
|
353
|
+
return 0
|
|
354
|
+
|
|
355
|
+
# Dispatch to the leaf and forward remaining args to its own argparse
|
|
356
|
+
handler = getattr(args, "_handler", None)
|
|
357
|
+
if handler is None:
|
|
358
|
+
parser.error("No handler bound for this command (internal error).")
|
|
359
|
+
|
|
360
|
+
import sys
|
|
361
|
+
|
|
362
|
+
leaf_prog = f"pmotools-python {getattr(args, '_cmd_name', 'unknown')}"
|
|
363
|
+
old_argv = sys.argv[:]
|
|
364
|
+
try:
|
|
365
|
+
sys.argv = [leaf_prog, *unknown]
|
|
366
|
+
handler()
|
|
367
|
+
finally:
|
|
368
|
+
sys.argv = old_argv
|
|
369
|
+
|
|
370
|
+
return 0
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
if __name__ == "__main__":
|
|
374
|
+
raise SystemExit(main())
|
|
File without changes
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
from datetime import date
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def merge_to_pmo(
|
|
7
|
+
specimen_info: list,
|
|
8
|
+
library_sample_info: list,
|
|
9
|
+
sequencing_info: list,
|
|
10
|
+
panel_info: dict,
|
|
11
|
+
mhap_info: dict,
|
|
12
|
+
bioinfo_method_info: list,
|
|
13
|
+
bioinfo_run_info: list,
|
|
14
|
+
project_info: list,
|
|
15
|
+
):
|
|
16
|
+
"""
|
|
17
|
+
Merge components into PMO, replacing names with indeces.
|
|
18
|
+
|
|
19
|
+
:param specimen_info (list): a list of all the specimens within this project
|
|
20
|
+
:param library_sample_info (list) : a list of library samples within this project
|
|
21
|
+
:param sequencing_info (list) : a list of sequencing info for this project
|
|
22
|
+
:param panel_info (list) : a dictionary containing the panel and target information for this project
|
|
23
|
+
:param mhap_info (list) : a dictionary containing the microhaplotypes within this project, both detected and representative
|
|
24
|
+
:param bioinfo_method_info (list) : the bioinformatics pipeline/methods used to generated the amplicon analysis for this project
|
|
25
|
+
:param bioinfo_run_info (list) : the runtime info for the bioinformatics pipeline used to generated the amplicon analysis for this project
|
|
26
|
+
:param project_info (list) : the information about the projects stored in this PMO
|
|
27
|
+
|
|
28
|
+
:return: a json formatted PMO string.
|
|
29
|
+
"""
|
|
30
|
+
# Make copies to avoid editing input
|
|
31
|
+
specimen_info = [dict(d) for d in specimen_info]
|
|
32
|
+
library_sample_info = [dict(d) for d in library_sample_info]
|
|
33
|
+
sequencing_info = [dict(d) for d in sequencing_info]
|
|
34
|
+
bioinfo_method_info = [dict(d) for d in bioinfo_method_info]
|
|
35
|
+
bioinfo_run_info = [dict(d) for d in bioinfo_run_info]
|
|
36
|
+
project_info = [dict(d) for d in project_info]
|
|
37
|
+
panel_info = json.loads(json.dumps(panel_info))
|
|
38
|
+
mhap_info = json.loads(json.dumps(mhap_info))
|
|
39
|
+
|
|
40
|
+
_replace_names_with_IDs(
|
|
41
|
+
specimen_info,
|
|
42
|
+
project_info,
|
|
43
|
+
library_sample_info,
|
|
44
|
+
sequencing_info,
|
|
45
|
+
panel_info,
|
|
46
|
+
mhap_info,
|
|
47
|
+
bioinfo_run_info,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Build PMO
|
|
51
|
+
pmo_header = _generate_pmo_header()
|
|
52
|
+
pmo = (
|
|
53
|
+
{
|
|
54
|
+
"pmo_header": pmo_header,
|
|
55
|
+
"library_sample_info": library_sample_info,
|
|
56
|
+
"specimen_info": specimen_info,
|
|
57
|
+
"sequencing_info": sequencing_info,
|
|
58
|
+
"bioinformatics_methods_info": bioinfo_method_info,
|
|
59
|
+
"bioinformatics_run_info": bioinfo_run_info,
|
|
60
|
+
"project_info": project_info,
|
|
61
|
+
}
|
|
62
|
+
| panel_info
|
|
63
|
+
| mhap_info
|
|
64
|
+
)
|
|
65
|
+
return pmo
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _make_lookup(dict, key):
|
|
69
|
+
lookup = {entry[key]: idx for idx, entry in enumerate(dict)}
|
|
70
|
+
return lookup
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _replace_key_with_id(target_list, reference_list, name_key, id_key, lookup=None):
|
|
74
|
+
"""
|
|
75
|
+
Replaces name_key in target_list with id_key, based on lookup from reference_list.
|
|
76
|
+
"""
|
|
77
|
+
if not lookup:
|
|
78
|
+
lookup = _make_lookup(reference_list, name_key)
|
|
79
|
+
unique_names = set()
|
|
80
|
+
for entry in target_list:
|
|
81
|
+
name = str(entry.pop(name_key))
|
|
82
|
+
unique_names.add(name)
|
|
83
|
+
entry[id_key] = lookup.get(name)
|
|
84
|
+
missing_items = list(unique_names - lookup.keys())
|
|
85
|
+
return missing_items
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _generate_pmo_header():
|
|
89
|
+
today = date.today().isoformat()
|
|
90
|
+
# TODO: update to grab pmo version - will put this in a seperate PR
|
|
91
|
+
pmo_header = {
|
|
92
|
+
"pmo_version": "1.0.0",
|
|
93
|
+
"creation_date": today,
|
|
94
|
+
"generation_method": {
|
|
95
|
+
"program_name": "pmotools-python",
|
|
96
|
+
"program_version": "1.0.0",
|
|
97
|
+
},
|
|
98
|
+
}
|
|
99
|
+
return pmo_header
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _report_missing_IDs(
|
|
103
|
+
missing_projects,
|
|
104
|
+
missing_sequencing,
|
|
105
|
+
missing_specimen,
|
|
106
|
+
missing_panels,
|
|
107
|
+
missing_targets,
|
|
108
|
+
missing_bioinfo_runs,
|
|
109
|
+
missing_libs,
|
|
110
|
+
):
|
|
111
|
+
if any(
|
|
112
|
+
[
|
|
113
|
+
missing_projects,
|
|
114
|
+
missing_sequencing,
|
|
115
|
+
missing_specimen,
|
|
116
|
+
missing_panels,
|
|
117
|
+
missing_targets,
|
|
118
|
+
missing_bioinfo_runs,
|
|
119
|
+
missing_libs,
|
|
120
|
+
]
|
|
121
|
+
):
|
|
122
|
+
error_message = (
|
|
123
|
+
"The following fields were found in one table and not another:\n"
|
|
124
|
+
)
|
|
125
|
+
if missing_projects:
|
|
126
|
+
error_message += f"Project names in Specimen Info not in Project Info: {missing_projects}\n"
|
|
127
|
+
if missing_sequencing:
|
|
128
|
+
error_message += f"Sequencing names in Library Sample Info not in Sequencing Info: {missing_sequencing}\n"
|
|
129
|
+
if missing_specimen:
|
|
130
|
+
error_message += f"Specimen names in Library Sample Info not in Specimen Info: {missing_specimen}\n"
|
|
131
|
+
if missing_panels:
|
|
132
|
+
error_message += f"Panel names in Library Sample Info not in Panel Info: {missing_panels}\n"
|
|
133
|
+
if missing_targets:
|
|
134
|
+
error_message += f"Target names in Representative Microhaplotypes not in Target Info: {missing_targets}\n"
|
|
135
|
+
if missing_bioinfo_runs:
|
|
136
|
+
error_message += f"Bioinformatics run names in Detected Microhaplotypes not in Bioinformatic Run Info: {missing_bioinfo_runs}\n"
|
|
137
|
+
if missing_libs:
|
|
138
|
+
error_message += f"Library Sample names in Detected Microhaplotypes not in Library Sample Info: {missing_libs}\n"
|
|
139
|
+
raise ValueError(error_message)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _replace_names_with_IDs(
|
|
143
|
+
specimen_info,
|
|
144
|
+
project_info,
|
|
145
|
+
library_sample_info,
|
|
146
|
+
sequencing_info,
|
|
147
|
+
panel_info,
|
|
148
|
+
mhap_info,
|
|
149
|
+
bioinfo_run_info,
|
|
150
|
+
):
|
|
151
|
+
# SPECIMEN INFO
|
|
152
|
+
# replace name with project ID
|
|
153
|
+
missing_projects = _replace_key_with_id(
|
|
154
|
+
specimen_info, project_info, "project_name", "project_id"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# LIBRARY SAMPLE INFO
|
|
158
|
+
# replace with sequencing_info_id, specimen_id, panel_id
|
|
159
|
+
missing_sequencing = _replace_key_with_id(
|
|
160
|
+
library_sample_info,
|
|
161
|
+
sequencing_info,
|
|
162
|
+
"sequencing_info_name",
|
|
163
|
+
"sequencing_info_id",
|
|
164
|
+
)
|
|
165
|
+
missing_specimen = _replace_key_with_id(
|
|
166
|
+
library_sample_info, specimen_info, "specimen_name", "specimen_id"
|
|
167
|
+
)
|
|
168
|
+
missing_panels = _replace_key_with_id(
|
|
169
|
+
library_sample_info, panel_info["panel_info"], "panel_name", "panel_id"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# REP MHAPS
|
|
173
|
+
# replace target_name with ID
|
|
174
|
+
missing_targets = _replace_key_with_id(
|
|
175
|
+
mhap_info["representative_microhaplotypes"]["targets"],
|
|
176
|
+
panel_info["target_info"],
|
|
177
|
+
"target_name",
|
|
178
|
+
"target_id",
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# DETECTED MHAPS
|
|
182
|
+
# Replace library_sample_name and bioinformatics_run_name
|
|
183
|
+
missing_bioinfo_runs = _replace_key_with_id(
|
|
184
|
+
mhap_info["detected_microhaplotypes"],
|
|
185
|
+
bioinfo_run_info,
|
|
186
|
+
"bioinformatics_run_name",
|
|
187
|
+
"bioinformatics_run_id",
|
|
188
|
+
)
|
|
189
|
+
lib_sample_lookup = _make_lookup(library_sample_info, "library_sample_name")
|
|
190
|
+
missing_libs = []
|
|
191
|
+
for detected in mhap_info["detected_microhaplotypes"]:
|
|
192
|
+
missing_libs += _replace_key_with_id(
|
|
193
|
+
detected["library_samples"],
|
|
194
|
+
library_sample_info,
|
|
195
|
+
"library_sample_name",
|
|
196
|
+
"library_sample_id",
|
|
197
|
+
lookup=lib_sample_lookup,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# If any names were missing from reference tables error
|
|
201
|
+
_report_missing_IDs(
|
|
202
|
+
missing_projects,
|
|
203
|
+
missing_sequencing,
|
|
204
|
+
missing_specimen,
|
|
205
|
+
missing_panels,
|
|
206
|
+
missing_targets,
|
|
207
|
+
missing_bioinfo_runs,
|
|
208
|
+
missing_libs,
|
|
209
|
+
)
|