pmotools 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. pmotools/__init__.py +26 -0
  2. pmotools/cli.py +374 -0
  3. pmotools/pmo_builder/__init__.py +0 -0
  4. pmotools/pmo_builder/json_convert_utils.py +5 -0
  5. pmotools/pmo_builder/merge_to_pmo.py +209 -0
  6. pmotools/pmo_builder/metatable_to_pmo.py +443 -0
  7. pmotools/pmo_builder/mhap_table_to_pmo.py +413 -0
  8. pmotools/pmo_builder/panel_information_to_pmo.py +385 -0
  9. pmotools/pmo_engine/__init__.py +0 -0
  10. pmotools/pmo_engine/pmo_checker.py +64 -0
  11. pmotools/pmo_engine/pmo_processor.py +1487 -0
  12. pmotools/pmo_engine/pmo_reader.py +541 -0
  13. pmotools/pmo_engine/pmo_writer.py +52 -0
  14. pmotools/schemas/portable_microhaplotype_object_v0.1.0.schema.json +1822 -0
  15. pmotools/scripts/__init__.py +0 -0
  16. pmotools/scripts/convertors_to_pmo/__init__.py +0 -0
  17. pmotools/scripts/convertors_to_pmo/excel_meta_to_json_meta.py +54 -0
  18. pmotools/scripts/convertors_to_pmo/microhaplotype_table_to_json_file.py +102 -0
  19. pmotools/scripts/convertors_to_pmo/terra_amp_output_to_json.py +192 -0
  20. pmotools/scripts/convertors_to_pmo/text_meta_to_json_meta.py +54 -0
  21. pmotools/scripts/extract_info_from_pmo/__init__.py +0 -0
  22. pmotools/scripts/extract_info_from_pmo/count_library_samples_per_target.py +69 -0
  23. pmotools/scripts/extract_info_from_pmo/count_specimen_meta.py +69 -0
  24. pmotools/scripts/extract_info_from_pmo/count_targets_per_library_sample.py +69 -0
  25. pmotools/scripts/extract_info_from_pmo/extract_insert_of_panels.py +78 -0
  26. pmotools/scripts/extract_info_from_pmo/extract_refseq_of_inserts_of_panels.py +43 -0
  27. pmotools/scripts/extract_info_from_pmo/list_bioinformatics_run_names.py +43 -0
  28. pmotools/scripts/extract_info_from_pmo/list_library_sample_names_per_specimen_name.py +60 -0
  29. pmotools/scripts/extract_info_from_pmo/list_specimen_meta_fields.py +60 -0
  30. pmotools/scripts/extractors_from_pmo/__init__.py +0 -0
  31. pmotools/scripts/extractors_from_pmo/extract_allele_table.py +153 -0
  32. pmotools/scripts/extractors_from_pmo/extract_pmo_with_read_filter.py +52 -0
  33. pmotools/scripts/extractors_from_pmo/extract_pmo_with_select_library_sample_names.py +61 -0
  34. pmotools/scripts/extractors_from_pmo/extract_pmo_with_select_specimen_names.py +57 -0
  35. pmotools/scripts/extractors_from_pmo/extract_pmo_with_select_targets.py +57 -0
  36. pmotools/scripts/extractors_from_pmo/extract_pmo_with_selected_meta.py +63 -0
  37. pmotools/scripts/pmo_utils/__init__.py +0 -0
  38. pmotools/scripts/pmo_utils/combine_pmos.py +57 -0
  39. pmotools/scripts/pmo_utils/validate_pmo.py +47 -0
  40. pmotools/utils/__init__.py +0 -0
  41. pmotools/utils/color_text.py +153 -0
  42. pmotools/utils/schema_loader.py +29 -0
  43. pmotools/utils/small_utils.py +399 -0
  44. pmotools-0.1.0.dist-info/METADATA +794 -0
  45. pmotools-0.1.0.dist-info/RECORD +47 -0
  46. pmotools-0.1.0.dist-info/WHEEL +4 -0
  47. pmotools-0.1.0.dist-info/entry_points.txt +3 -0
pmotools/__init__.py ADDED
@@ -0,0 +1,26 @@
1
+ # pmotools/__init__.py
2
+ from __future__ import annotations
3
+
4
+ try:
5
+ # Python 3.8+
6
+ from importlib.metadata import version, PackageNotFoundError
7
+ except Exception: # pragma: no cover
8
+ # Very old Pythons can fallback to pkg_resources if you ever needed it
9
+ from pkg_resources import get_distribution as _gd # type: ignore
10
+
11
+ class PackageNotFoundError(Exception):
12
+ ...
13
+
14
+ def version(pkg: str) -> str: # type: ignore
15
+ try:
16
+ return _gd(pkg).version
17
+ except Exception as e: # noqa: BLE001
18
+ raise PackageNotFoundError from e
19
+
20
+
21
+ try:
22
+ # Use the installed distribution name (matches [project].name)
23
+ __version__ = version("pmotools")
24
+ except PackageNotFoundError:
25
+ # When running from a source tree without being installed
26
+ __version__ = "0+local"
pmotools/cli.py ADDED
@@ -0,0 +1,374 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import argparse
4
+ from dataclasses import dataclass
5
+ from typing import Callable, Dict, Tuple
6
+
7
+ from pmotools import __version__
8
+ from pmotools.utils.color_text import ColorText as CT
9
+
10
+ # convertors_to_pmo
11
+ from pmotools.scripts.convertors_to_pmo.text_meta_to_json_meta import (
12
+ text_meta_to_json_meta,
13
+ )
14
+ from pmotools.scripts.convertors_to_pmo.excel_meta_to_json_meta import (
15
+ excel_meta_to_json_meta,
16
+ )
17
+ from pmotools.scripts.convertors_to_pmo.microhaplotype_table_to_json_file import (
18
+ microhaplotype_table_to_json_file,
19
+ )
20
+ from pmotools.scripts.convertors_to_pmo.terra_amp_output_to_json import (
21
+ terra_amp_output_to_json,
22
+ )
23
+
24
+ # extractors_from_pmo
25
+ from pmotools.scripts.extractors_from_pmo.extract_pmo_with_selected_meta import (
26
+ extract_pmo_with_selected_meta,
27
+ )
28
+ from pmotools.scripts.extractors_from_pmo.extract_pmo_with_select_specimen_names import (
29
+ extract_pmo_with_select_specimen_names,
30
+ )
31
+ from pmotools.scripts.extractors_from_pmo.extract_pmo_with_select_library_sample_names import (
32
+ extract_pmo_with_select_library_sample_names,
33
+ )
34
+ from pmotools.scripts.extractors_from_pmo.extract_pmo_with_select_targets import (
35
+ extract_pmo_with_select_targets,
36
+ )
37
+ from pmotools.scripts.extractors_from_pmo.extract_pmo_with_read_filter import (
38
+ extract_pmo_with_read_filter,
39
+ )
40
+ from pmotools.scripts.extractors_from_pmo.extract_allele_table import (
41
+ extract_for_allele_table,
42
+ )
43
+
44
+ # pmo_utils
45
+ from pmotools.scripts.pmo_utils.combine_pmos import combine_pmos
46
+ from pmotools.scripts.pmo_utils.validate_pmo import validate_pmo
47
+
48
+ # extract_info_from_pmo
49
+ from pmotools.scripts.extract_info_from_pmo.list_library_sample_names_per_specimen_name import (
50
+ list_library_sample_names_per_specimen_name,
51
+ )
52
+ from pmotools.scripts.extract_info_from_pmo.list_specimen_meta_fields import (
53
+ list_specimen_meta_fields,
54
+ )
55
+ from pmotools.scripts.extract_info_from_pmo.list_bioinformatics_run_names import (
56
+ list_bioinformatics_run_names,
57
+ )
58
+ from pmotools.scripts.extract_info_from_pmo.count_specimen_meta import (
59
+ count_specimen_meta,
60
+ )
61
+ from pmotools.scripts.extract_info_from_pmo.count_targets_per_library_sample import (
62
+ count_targets_per_library_sample,
63
+ )
64
+ from pmotools.scripts.extract_info_from_pmo.count_library_samples_per_target import (
65
+ count_library_samples_per_target,
66
+ )
67
+
68
+ # panel info subset
69
+ from pmotools.scripts.extract_info_from_pmo.extract_insert_of_panels import (
70
+ extract_insert_of_panels,
71
+ )
72
+ from pmotools.scripts.extract_info_from_pmo.extract_refseq_of_inserts_of_panels import (
73
+ extract_refseq_of_inserts_of_panels,
74
+ )
75
+
76
+
77
+ @dataclass(frozen=True)
78
+ class PmoCommand:
79
+ func: Callable[[], None]
80
+ help: str
81
+
82
+
83
+ REGISTRY: Dict[str, Dict[str, PmoCommand]] = {
84
+ "convertors_to_json": {
85
+ "text_meta_to_json_meta": PmoCommand(
86
+ text_meta_to_json_meta, "Convert text file meta to JSON Meta"
87
+ ),
88
+ "excel_meta_to_json_meta": PmoCommand(
89
+ excel_meta_to_json_meta, "Convert Excel file meta to JSON Meta"
90
+ ),
91
+ "microhaplotype_table_to_json_file": PmoCommand(
92
+ microhaplotype_table_to_json_file,
93
+ "Convert microhaplotype table to a JSON file",
94
+ ),
95
+ "terra_amp_output_to_json": PmoCommand(
96
+ terra_amp_output_to_json, "Convert Terra output to JSON sequence table"
97
+ ),
98
+ },
99
+ "extractors_from_pmo": {
100
+ "extract_pmo_with_selected_meta": PmoCommand(
101
+ extract_pmo_with_selected_meta,
102
+ "Extract samples + haplotypes using selected meta",
103
+ ),
104
+ "extract_pmo_with_select_specimen_names": PmoCommand(
105
+ extract_pmo_with_select_specimen_names,
106
+ "Extract specific samples from the specimens table",
107
+ ),
108
+ "extract_pmo_with_select_library_sample_names": PmoCommand(
109
+ extract_pmo_with_select_library_sample_names,
110
+ "Extract experiment sample names from experiment_info table",
111
+ ),
112
+ "extract_pmo_with_select_targets": PmoCommand(
113
+ extract_pmo_with_select_targets, "Extract specific targets"
114
+ ),
115
+ "extract_pmo_with_read_filter": PmoCommand(
116
+ extract_pmo_with_read_filter, "Extract with a read filter"
117
+ ),
118
+ "extract_allele_table": PmoCommand(
119
+ extract_for_allele_table,
120
+ "Extract allele tables for tools like dcifer or moire",
121
+ ),
122
+ "extract_insert_of_panels": PmoCommand(
123
+ extract_insert_of_panels, "Extract inserts of panels from a PMO"
124
+ ),
125
+ "extract_refseq_of_inserts_of_panels": PmoCommand(
126
+ extract_refseq_of_inserts_of_panels,
127
+ "Extract ref_seq of panel inserts from a PMO",
128
+ ),
129
+ },
130
+ "working_with_multiple_pmos": {
131
+ "combine_pmos": PmoCommand(
132
+ combine_pmos, "Combine multiple PMOs of the same panel"
133
+ ),
134
+ },
135
+ "extract_basic_info_from_pmo": {
136
+ "list_library_sample_names_per_specimen_name": PmoCommand(
137
+ list_library_sample_names_per_specimen_name,
138
+ "List experiment_sample_ids per specimen_id",
139
+ ),
140
+ "list_specimen_meta_fields": PmoCommand(
141
+ list_specimen_meta_fields,
142
+ "List specimen meta fields in the specimen_info section",
143
+ ),
144
+ "list_bioinformatics_run_names": PmoCommand(
145
+ list_bioinformatics_run_names,
146
+ "List all tar_amp_bioinformatics_info_ids in a PMO",
147
+ ),
148
+ "count_specimen_meta": PmoCommand(
149
+ count_specimen_meta, "Count values of selected specimen meta fields"
150
+ ),
151
+ "count_targets_per_library_sample": PmoCommand(
152
+ count_targets_per_library_sample, "Count number of targets per sample"
153
+ ),
154
+ "count_library_samples_per_target": PmoCommand(
155
+ count_library_samples_per_target, "Count number of samples per target"
156
+ ),
157
+ },
158
+ "validation": {
159
+ "validate_pmo": PmoCommand(
160
+ validate_pmo, "Validate a PMO file against a JSON Schema"
161
+ )
162
+ },
163
+ }
164
+
165
+
166
+ def _iter_all_commands():
167
+ for group, commands in REGISTRY.items():
168
+ for name, cmd in commands.items():
169
+ yield group, name, cmd.help
170
+
171
+
172
+ def _print_catalog_plain():
173
+ """
174
+ Print commands in a machine-friendly, no-color format:
175
+ '<command>\t<group>\t<help>'
176
+ One per line; used by bash completion.
177
+ """
178
+ import sys
179
+
180
+ for group, name, cmdhelp in _iter_all_commands():
181
+ sys.stdout.write(f"{name}\t{group}\t{cmdhelp}\n")
182
+
183
+
184
+ def _print_catalog() -> None:
185
+ """Print all groups and their commands like your previous version."""
186
+ import sys
187
+
188
+ sys.stdout.write(
189
+ f"pmotools-python v{__version__} - A suite of tools for interacting with "
190
+ + CT.boldGreen("Portable Microhaplotype Object (PMO)")
191
+ + " file format\n\n"
192
+ )
193
+ sys.stdout.write("Available functions organized by groups are\n")
194
+ for group, commands in REGISTRY.items():
195
+ sys.stdout.write(CT.boldBlue(group) + "\n")
196
+ for name, cmd in commands.items():
197
+ sys.stdout.write(f"\t{name} - {cmd.help}\n")
198
+ sys.stdout.write("\n")
199
+
200
+
201
+ def _print_group(group: str) -> int:
202
+ """Print a single group's commands (blue header) if it exists."""
203
+ import sys
204
+
205
+ if group not in REGISTRY:
206
+ sys.stdout.write(
207
+ CT.boldRed("Did not find group ") + CT.boldWhite(group) + "\n\n"
208
+ )
209
+ _print_catalog()
210
+ return 2
211
+
212
+ sys.stdout.write(CT.boldBlue(group) + "\n")
213
+ for name, cmd in REGISTRY[group].items():
214
+ sys.stdout.write(f"\t{name} - {cmd.help}\n")
215
+ sys.stdout.write("\n")
216
+ return 0
217
+
218
+
219
+ def _print_bash_completion():
220
+ # NOTE: this uses --list-plain to avoid ANSI color parsing and be stable.
221
+ script = r"""# bash completion for pmotools-python
222
+ # add the below to your ~/.bash_completion
223
+
224
+ _pmotools_python_complete()
225
+ {
226
+ local cur prev
227
+ COMPREPLY=()
228
+ cur="${COMP_WORDS[COMP_CWORD]}"
229
+ prev="${COMP_WORDS[COMP_CWORD-1]}"
230
+
231
+ # 1) Completing the command name (1st arg): list all commands
232
+ if [[ ${COMP_CWORD} -eq 1 ]]; then
233
+ # Our CLI prints machine-friendly list via --list-plain:
234
+ # "<command>\t<group>\t<help>"
235
+ local lines cmds
236
+ lines="$(${COMP_WORDS[0]} --list-plain 2>/dev/null)"
237
+ cmds="$(printf '%s\n' "${lines}" | awk -F'\t' '{print $1}')"
238
+ COMPREPLY=( $(compgen -W "${cmds}" -- "${cur}") )
239
+ return 0
240
+ fi
241
+
242
+ # 2) Completing flags for a leaf command: scrape leaf -h
243
+ if [[ "${cur}" == -* ]]; then
244
+ local helps opts
245
+ helps="$(${COMP_WORDS[0]} ${COMP_WORDS[1]} -h 2>/dev/null)"
246
+ # Pull out flag tokens and split comma-separated forms
247
+ opts="$(printf '%s\n' "${helps}" \
248
+ | sed -n 's/^[[:space:]]\{0,\}\(-[-[:alnum:]][-[:alnum:]]*\)\(, *-[[:alnum:]][-[:alnum:]]*\)\{0,\}.*/\1/p' \
249
+ | sed 's/, / /g')"
250
+ COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") )
251
+ return 0
252
+ fi
253
+
254
+ # 3) Otherwise, fall back to filename completion for positional args
255
+ COMPREPLY=( $(compgen -f -- "${cur}") )
256
+ return 0
257
+ }
258
+
259
+ complete -F _pmotools_python_complete pmotools-python
260
+ """
261
+ import sys
262
+
263
+ sys.stdout.write(script)
264
+
265
+
266
+ def _build_parser() -> (
267
+ Tuple[argparse.ArgumentParser, Dict[str, Tuple[str, PmoCommand]]]
268
+ ):
269
+ """
270
+ Build a flat CLI:
271
+ pmotools-python <command> [args...]
272
+ Returns the parser and an index mapping command_name -> (group, PmoCommand)
273
+ """
274
+ description = (
275
+ f"pmotools-python v{__version__} – A suite of tools for interacting with "
276
+ f"{CT.boldGreen('Portable Microhaplotype Object (PMO)')} files"
277
+ )
278
+ parser = argparse.ArgumentParser(
279
+ prog="pmotools-python",
280
+ description=description,
281
+ formatter_class=argparse.RawTextHelpFormatter,
282
+ )
283
+ parser.add_argument(
284
+ "--list-plain",
285
+ action="store_true",
286
+ help=argparse.SUPPRESS, # keep it hidden; for completion script
287
+ )
288
+ parser.add_argument(
289
+ "--bash-completion",
290
+ action="store_true",
291
+ help="Print bash completion script for pmotools-python",
292
+ )
293
+ parser.add_argument(
294
+ "-V", "--version", action="version", version=f"%(prog)s {__version__}"
295
+ )
296
+ parser.add_argument(
297
+ "--list",
298
+ nargs="?",
299
+ const="__ALL__",
300
+ metavar="[group]",
301
+ help="List all commands, or only those within a specific group",
302
+ )
303
+
304
+ subparsers = parser.add_subparsers(
305
+ title="Commands", dest="command", metavar="<command>"
306
+ )
307
+
308
+ command_index: Dict[str, Tuple[str, PmoCommand]] = {}
309
+
310
+ for group, commands in REGISTRY.items():
311
+ for cmd_name, cmd in commands.items():
312
+ if cmd_name in command_index:
313
+ # Hard fail early if duplicate command names exist across groups
314
+ raise RuntimeError(
315
+ f"Duplicate command name detected: '{cmd_name}'. "
316
+ f"Please rename one of the commands or add an alias."
317
+ )
318
+ sp = subparsers.add_parser(
319
+ cmd_name,
320
+ help=f"{cmd.help} [{group}]",
321
+ description=f"{cmd.help} (group: {group})",
322
+ add_help=False,
323
+ )
324
+ sp.set_defaults(_handler=cmd.func, _group=group, _cmd_name=cmd_name)
325
+ command_index[cmd_name] = (group, cmd)
326
+
327
+ return parser, command_index
328
+
329
+
330
+ def main(argv: list[str] | None = None) -> int:
331
+ parser, command_index = _build_parser()
332
+ args, unknown = parser.parse_known_args(argv)
333
+
334
+ if getattr(args, "bash_completion", False):
335
+ _print_bash_completion()
336
+ return 0
337
+
338
+ if getattr(args, "list_plain", False):
339
+ _print_catalog_plain()
340
+ return 0
341
+
342
+ if getattr(args, "list", None):
343
+ group = args.list
344
+ if group == "__ALL__":
345
+ _print_catalog()
346
+ return 0
347
+ else:
348
+ return _print_group(group)
349
+
350
+ # No command provided: show the catalog
351
+ if not getattr(args, "command", None):
352
+ _print_catalog()
353
+ return 0
354
+
355
+ # Dispatch to the leaf and forward remaining args to its own argparse
356
+ handler = getattr(args, "_handler", None)
357
+ if handler is None:
358
+ parser.error("No handler bound for this command (internal error).")
359
+
360
+ import sys
361
+
362
+ leaf_prog = f"pmotools-python {getattr(args, '_cmd_name', 'unknown')}"
363
+ old_argv = sys.argv[:]
364
+ try:
365
+ sys.argv = [leaf_prog, *unknown]
366
+ handler()
367
+ finally:
368
+ sys.argv = old_argv
369
+
370
+ return 0
371
+
372
+
373
+ if __name__ == "__main__":
374
+ raise SystemExit(main())
File without changes
@@ -0,0 +1,5 @@
1
+ def check_additional_columns_exist(df, additional_column_list):
2
+ if additional_column_list:
3
+ missing_cols = set(additional_column_list) - set(df.columns)
4
+ if missing_cols:
5
+ raise ValueError(f"Missing additional columns: {missing_cols}")
@@ -0,0 +1,209 @@
1
+ #!/usr/bin/env python3
2
+ from datetime import date
3
+ import json
4
+
5
+
6
+ def merge_to_pmo(
7
+ specimen_info: list,
8
+ library_sample_info: list,
9
+ sequencing_info: list,
10
+ panel_info: dict,
11
+ mhap_info: dict,
12
+ bioinfo_method_info: list,
13
+ bioinfo_run_info: list,
14
+ project_info: list,
15
+ ):
16
+ """
17
+ Merge components into PMO, replacing names with indeces.
18
+
19
+ :param specimen_info (list): a list of all the specimens within this project
20
+ :param library_sample_info (list) : a list of library samples within this project
21
+ :param sequencing_info (list) : a list of sequencing info for this project
22
+ :param panel_info (list) : a dictionary containing the panel and target information for this project
23
+ :param mhap_info (list) : a dictionary containing the microhaplotypes within this project, both detected and representative
24
+ :param bioinfo_method_info (list) : the bioinformatics pipeline/methods used to generated the amplicon analysis for this project
25
+ :param bioinfo_run_info (list) : the runtime info for the bioinformatics pipeline used to generated the amplicon analysis for this project
26
+ :param project_info (list) : the information about the projects stored in this PMO
27
+
28
+ :return: a json formatted PMO string.
29
+ """
30
+ # Make copies to avoid editing input
31
+ specimen_info = [dict(d) for d in specimen_info]
32
+ library_sample_info = [dict(d) for d in library_sample_info]
33
+ sequencing_info = [dict(d) for d in sequencing_info]
34
+ bioinfo_method_info = [dict(d) for d in bioinfo_method_info]
35
+ bioinfo_run_info = [dict(d) for d in bioinfo_run_info]
36
+ project_info = [dict(d) for d in project_info]
37
+ panel_info = json.loads(json.dumps(panel_info))
38
+ mhap_info = json.loads(json.dumps(mhap_info))
39
+
40
+ _replace_names_with_IDs(
41
+ specimen_info,
42
+ project_info,
43
+ library_sample_info,
44
+ sequencing_info,
45
+ panel_info,
46
+ mhap_info,
47
+ bioinfo_run_info,
48
+ )
49
+
50
+ # Build PMO
51
+ pmo_header = _generate_pmo_header()
52
+ pmo = (
53
+ {
54
+ "pmo_header": pmo_header,
55
+ "library_sample_info": library_sample_info,
56
+ "specimen_info": specimen_info,
57
+ "sequencing_info": sequencing_info,
58
+ "bioinformatics_methods_info": bioinfo_method_info,
59
+ "bioinformatics_run_info": bioinfo_run_info,
60
+ "project_info": project_info,
61
+ }
62
+ | panel_info
63
+ | mhap_info
64
+ )
65
+ return pmo
66
+
67
+
68
+ def _make_lookup(dict, key):
69
+ lookup = {entry[key]: idx for idx, entry in enumerate(dict)}
70
+ return lookup
71
+
72
+
73
+ def _replace_key_with_id(target_list, reference_list, name_key, id_key, lookup=None):
74
+ """
75
+ Replaces name_key in target_list with id_key, based on lookup from reference_list.
76
+ """
77
+ if not lookup:
78
+ lookup = _make_lookup(reference_list, name_key)
79
+ unique_names = set()
80
+ for entry in target_list:
81
+ name = str(entry.pop(name_key))
82
+ unique_names.add(name)
83
+ entry[id_key] = lookup.get(name)
84
+ missing_items = list(unique_names - lookup.keys())
85
+ return missing_items
86
+
87
+
88
+ def _generate_pmo_header():
89
+ today = date.today().isoformat()
90
+ # TODO: update to grab pmo version - will put this in a seperate PR
91
+ pmo_header = {
92
+ "pmo_version": "1.0.0",
93
+ "creation_date": today,
94
+ "generation_method": {
95
+ "program_name": "pmotools-python",
96
+ "program_version": "1.0.0",
97
+ },
98
+ }
99
+ return pmo_header
100
+
101
+
102
+ def _report_missing_IDs(
103
+ missing_projects,
104
+ missing_sequencing,
105
+ missing_specimen,
106
+ missing_panels,
107
+ missing_targets,
108
+ missing_bioinfo_runs,
109
+ missing_libs,
110
+ ):
111
+ if any(
112
+ [
113
+ missing_projects,
114
+ missing_sequencing,
115
+ missing_specimen,
116
+ missing_panels,
117
+ missing_targets,
118
+ missing_bioinfo_runs,
119
+ missing_libs,
120
+ ]
121
+ ):
122
+ error_message = (
123
+ "The following fields were found in one table and not another:\n"
124
+ )
125
+ if missing_projects:
126
+ error_message += f"Project names in Specimen Info not in Project Info: {missing_projects}\n"
127
+ if missing_sequencing:
128
+ error_message += f"Sequencing names in Library Sample Info not in Sequencing Info: {missing_sequencing}\n"
129
+ if missing_specimen:
130
+ error_message += f"Specimen names in Library Sample Info not in Specimen Info: {missing_specimen}\n"
131
+ if missing_panels:
132
+ error_message += f"Panel names in Library Sample Info not in Panel Info: {missing_panels}\n"
133
+ if missing_targets:
134
+ error_message += f"Target names in Representative Microhaplotypes not in Target Info: {missing_targets}\n"
135
+ if missing_bioinfo_runs:
136
+ error_message += f"Bioinformatics run names in Detected Microhaplotypes not in Bioinformatic Run Info: {missing_bioinfo_runs}\n"
137
+ if missing_libs:
138
+ error_message += f"Library Sample names in Detected Microhaplotypes not in Library Sample Info: {missing_libs}\n"
139
+ raise ValueError(error_message)
140
+
141
+
142
+ def _replace_names_with_IDs(
143
+ specimen_info,
144
+ project_info,
145
+ library_sample_info,
146
+ sequencing_info,
147
+ panel_info,
148
+ mhap_info,
149
+ bioinfo_run_info,
150
+ ):
151
+ # SPECIMEN INFO
152
+ # replace name with project ID
153
+ missing_projects = _replace_key_with_id(
154
+ specimen_info, project_info, "project_name", "project_id"
155
+ )
156
+
157
+ # LIBRARY SAMPLE INFO
158
+ # replace with sequencing_info_id, specimen_id, panel_id
159
+ missing_sequencing = _replace_key_with_id(
160
+ library_sample_info,
161
+ sequencing_info,
162
+ "sequencing_info_name",
163
+ "sequencing_info_id",
164
+ )
165
+ missing_specimen = _replace_key_with_id(
166
+ library_sample_info, specimen_info, "specimen_name", "specimen_id"
167
+ )
168
+ missing_panels = _replace_key_with_id(
169
+ library_sample_info, panel_info["panel_info"], "panel_name", "panel_id"
170
+ )
171
+
172
+ # REP MHAPS
173
+ # replace target_name with ID
174
+ missing_targets = _replace_key_with_id(
175
+ mhap_info["representative_microhaplotypes"]["targets"],
176
+ panel_info["target_info"],
177
+ "target_name",
178
+ "target_id",
179
+ )
180
+
181
+ # DETECTED MHAPS
182
+ # Replace library_sample_name and bioinformatics_run_name
183
+ missing_bioinfo_runs = _replace_key_with_id(
184
+ mhap_info["detected_microhaplotypes"],
185
+ bioinfo_run_info,
186
+ "bioinformatics_run_name",
187
+ "bioinformatics_run_id",
188
+ )
189
+ lib_sample_lookup = _make_lookup(library_sample_info, "library_sample_name")
190
+ missing_libs = []
191
+ for detected in mhap_info["detected_microhaplotypes"]:
192
+ missing_libs += _replace_key_with_id(
193
+ detected["library_samples"],
194
+ library_sample_info,
195
+ "library_sample_name",
196
+ "library_sample_id",
197
+ lookup=lib_sample_lookup,
198
+ )
199
+
200
+ # If any names were missing from reference tables error
201
+ _report_missing_IDs(
202
+ missing_projects,
203
+ missing_sequencing,
204
+ missing_specimen,
205
+ missing_panels,
206
+ missing_targets,
207
+ missing_bioinfo_runs,
208
+ missing_libs,
209
+ )