genelastic 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. genelastic/api/.env +4 -0
  2. genelastic/api/cli_start_api.py +18 -0
  3. genelastic/api/errors.py +52 -0
  4. genelastic/api/extends/example.py +0 -6
  5. genelastic/api/extends/example.yml +0 -0
  6. genelastic/api/routes.py +313 -181
  7. genelastic/api/server.py +34 -26
  8. genelastic/api/settings.py +5 -9
  9. genelastic/api/specification.yml +512 -0
  10. genelastic/common/__init__.py +0 -39
  11. genelastic/common/cli.py +100 -0
  12. genelastic/common/elastic.py +374 -46
  13. genelastic/common/exceptions.py +34 -2
  14. genelastic/common/server.py +59 -0
  15. genelastic/common/types.py +1 -14
  16. genelastic/import_data/__init__.py +0 -27
  17. genelastic/import_data/checker.py +99 -0
  18. genelastic/import_data/checker_observer.py +13 -0
  19. genelastic/import_data/cli/__init__.py +0 -0
  20. genelastic/import_data/cli/cli_check.py +136 -0
  21. genelastic/import_data/cli/gen_data.py +143 -0
  22. genelastic/import_data/cli/import_data.py +346 -0
  23. genelastic/import_data/cli/info.py +247 -0
  24. genelastic/import_data/{cli_integrity.py → cli/integrity.py} +29 -7
  25. genelastic/import_data/cli/validate.py +146 -0
  26. genelastic/import_data/collect.py +185 -0
  27. genelastic/import_data/constants.py +136 -11
  28. genelastic/import_data/import_bundle.py +102 -59
  29. genelastic/import_data/import_bundle_factory.py +70 -149
  30. genelastic/import_data/importers/__init__.py +0 -0
  31. genelastic/import_data/importers/importer_base.py +131 -0
  32. genelastic/import_data/importers/importer_factory.py +85 -0
  33. genelastic/import_data/importers/importer_types.py +223 -0
  34. genelastic/import_data/logger.py +2 -1
  35. genelastic/import_data/models/__init__.py +0 -0
  36. genelastic/import_data/models/analyses.py +178 -0
  37. genelastic/import_data/models/analysis.py +144 -0
  38. genelastic/import_data/models/data_file.py +110 -0
  39. genelastic/import_data/models/process.py +45 -0
  40. genelastic/import_data/models/processes.py +84 -0
  41. genelastic/import_data/models/tags.py +170 -0
  42. genelastic/import_data/models/unique_list.py +109 -0
  43. genelastic/import_data/models/validate.py +26 -0
  44. genelastic/import_data/patterns.py +90 -0
  45. genelastic/import_data/random_bundle.py +79 -54
  46. genelastic/import_data/resolve.py +157 -0
  47. genelastic/ui/.env +1 -0
  48. genelastic/ui/cli_start_ui.py +20 -0
  49. genelastic/ui/routes.py +333 -0
  50. genelastic/ui/server.py +9 -82
  51. genelastic/ui/settings.py +2 -6
  52. genelastic/ui/static/cea-cnrgh.ico +0 -0
  53. genelastic/ui/static/cea.ico +0 -0
  54. genelastic/ui/static/layout.ico +0 -0
  55. genelastic/ui/static/novaseq6000.png +0 -0
  56. genelastic/ui/static/style.css +430 -0
  57. genelastic/ui/static/ui.js +458 -0
  58. genelastic/ui/templates/analyses.html +98 -0
  59. genelastic/ui/templates/analysis_detail.html +44 -0
  60. genelastic/ui/templates/bi_process_detail.html +129 -0
  61. genelastic/ui/templates/bi_processes.html +116 -0
  62. genelastic/ui/templates/explorer.html +356 -0
  63. genelastic/ui/templates/home.html +207 -0
  64. genelastic/ui/templates/layout.html +153 -0
  65. genelastic/ui/templates/version.html +21 -0
  66. genelastic/ui/templates/wet_process_detail.html +131 -0
  67. genelastic/ui/templates/wet_processes.html +116 -0
  68. genelastic-0.9.0.dist-info/METADATA +686 -0
  69. genelastic-0.9.0.dist-info/RECORD +76 -0
  70. genelastic-0.9.0.dist-info/WHEEL +4 -0
  71. genelastic-0.9.0.dist-info/entry_points.txt +10 -0
  72. genelastic-0.9.0.dist-info/licenses/LICENSE +519 -0
  73. genelastic/import_data/analyses.py +0 -69
  74. genelastic/import_data/analysis.py +0 -205
  75. genelastic/import_data/bi_process.py +0 -27
  76. genelastic/import_data/bi_processes.py +0 -49
  77. genelastic/import_data/cli_gen_data.py +0 -116
  78. genelastic/import_data/cli_import.py +0 -379
  79. genelastic/import_data/cli_info.py +0 -256
  80. genelastic/import_data/cli_validate.py +0 -54
  81. genelastic/import_data/data_file.py +0 -87
  82. genelastic/import_data/filename_pattern.py +0 -57
  83. genelastic/import_data/tags.py +0 -123
  84. genelastic/import_data/wet_process.py +0 -28
  85. genelastic/import_data/wet_processes.py +0 -53
  86. genelastic-0.7.0.dist-info/METADATA +0 -105
  87. genelastic-0.7.0.dist-info/RECORD +0 -40
  88. genelastic-0.7.0.dist-info/WHEEL +0 -5
  89. genelastic-0.7.0.dist-info/entry_points.txt +0 -6
  90. genelastic-0.7.0.dist-info/top_level.txt +0 -1
@@ -1,6 +1,4 @@
1
- import copy
2
1
  import random
3
- import shutil
4
2
  import sys
5
3
  import tempfile
6
4
  import typing
@@ -8,9 +6,16 @@ from abc import ABC, abstractmethod
8
6
  from pathlib import Path
9
7
 
10
8
  import yaml
11
- from biophony import BioSeqGen, CovGen, Elements, FastaWriter, MutSim
9
+ from biophony import (
10
+ BioSeqGen,
11
+ CovGen,
12
+ Elements,
13
+ FastaWriter,
14
+ MutSim,
15
+ MutSimParams,
16
+ )
12
17
 
13
- from genelastic.common import (
18
+ from genelastic.common.types import (
14
19
  RandomAnalysisData,
15
20
  RandomBiProcessData,
16
21
  RandomWetProcessData,
@@ -164,13 +169,16 @@ class RandomBiProcess(RandomBundleItem):
164
169
 
165
170
  def _generate_steps(self) -> None:
166
171
  steps_count = random.randint(1, 5)
167
- random_steps = copy.deepcopy(random.sample(self.STEPS, steps_count))
168
- for random_step in random_steps:
169
- random_step["version"] = self._generate_version(
170
- random.choice(range(1, 5))
172
+ random_steps = random.sample(self.STEPS, steps_count)
173
+ for rs in random_steps:
174
+ v = self._generate_version(random.choice(range(1, 5)))
175
+ self._steps.append(
176
+ {
177
+ "version": v,
178
+ "name": str(rs["name"]),
179
+ "cmd": random.choice(rs["cmd"]),
180
+ }
171
181
  )
172
- random_step["cmd"] = random.choice(random_step["cmd"])
173
- self._steps.append(random_step)
174
182
 
175
183
  def to_dict(self) -> RandomBiProcessData:
176
184
  """Return the generated bi informatics process as a dictionary."""
@@ -184,25 +192,39 @@ class RandomBiProcess(RandomBundleItem):
184
192
 
185
193
 
186
194
  class RandomAnalysis(RandomBundleItem):
187
- """Generate a random analysis."""
195
+ """Generate a random analysis.
196
+
197
+ :param fasta_dir: Directory where to create the FASTA file used as a basis to generate the analysis VCF file.
198
+ :param output_dir: Directory where the analysis VCF file
199
+ (and coverage file if `do_gen_coverage` is set to True) is generated.
200
+
201
+ :raises RuntimeError: Could not generate a VCF file with the given simulation parameters.
202
+ """
188
203
 
189
204
  def __init__( # noqa: PLR0913
190
205
  self,
191
- folder: Path,
206
+ fasta_dir: Path,
207
+ output_dir: Path,
192
208
  seq_len: int,
193
209
  nb_chrom: int,
194
210
  wet_proc_id: str,
195
211
  bi_proc_id: str,
212
+ sim_params: MutSimParams,
196
213
  *,
197
214
  do_gen_coverage: bool,
198
215
  ) -> None:
199
- self._folder = folder
216
+ self._fasta_dir = fasta_dir
217
+ self._output_dir = output_dir
200
218
  self._seq_len = seq_len
201
219
  self._nb_chrom = nb_chrom
202
- self._sample_name = "HG0003"
203
- self._source = "CNRGH"
204
220
  self._wet_process_id = wet_proc_id
205
221
  self._bi_process_id = bi_proc_id
222
+
223
+ self._sample_name = "HG000" + str(random.randint(1, 9))
224
+ sim_params.sample_name = self._sample_name
225
+ self._sim_params = sim_params
226
+
227
+ self._source = "CNRGH"
206
228
  self._barcode = self._random_alphanum_str(n=6)
207
229
  self._reference_genome = "hg38"
208
230
  self._prefix = (
@@ -215,37 +237,32 @@ class RandomAnalysis(RandomBundleItem):
215
237
  self.gen_cov_file()
216
238
 
217
239
  def _gen_vcf_file(self) -> None:
218
- """Generate a dummy VCF file."""
219
- temp_dir = Path(tempfile.mkdtemp())
220
-
221
- try:
222
- fasta_out_file = temp_dir / "seq.fasta"
223
- vcf_out_file = self._folder / f"{self._prefix}.vcf"
240
+ """Generate a dummy VCF file.
224
241
 
225
- # 1 - Generate a FASTA file and save it to a temporary directory.
226
- gen = BioSeqGen(
227
- elements=Elements(), seqlen=self._seq_len, count=self._nb_chrom
228
- )
229
- with fasta_out_file.open("w", encoding="utf-8") as f:
230
- FastaWriter(f, header=False).write_seqs(gen)
242
+ :raises RuntimeError: The call to `mutation-simulator` returned a non-zero exit status.
243
+ """
244
+ fasta_out_file = self._fasta_dir / "seq.fasta"
245
+ vcf_out_file = self._output_dir / f"{self._prefix}.vcf"
231
246
 
232
- # 2 - Generate a VCF from the previously created FASTA file.
233
- MutSim(
234
- fasta_file=str(fasta_out_file),
235
- vcf_file=str(vcf_out_file),
236
- snp_rate=0.02,
237
- ins_rate=0.01,
238
- del_rate=0.01,
239
- ).run()
247
+ # 1 - Generate a FASTA file and save it to a temporary directory.
248
+ gen = BioSeqGen(
249
+ elements=Elements(), seqlen=self._seq_len, count=self._nb_chrom
250
+ )
251
+ with fasta_out_file.open("w", encoding="utf-8") as f:
252
+ FastaWriter(f, header=False).write_seqs(gen)
240
253
 
241
- finally:
242
- shutil.rmtree(temp_dir)
254
+ # 2 - Generate a VCF from the previously created FASTA file.
255
+ MutSim(
256
+ fasta_file=str(fasta_out_file),
257
+ vcf_file=str(vcf_out_file),
258
+ sim_params=self._sim_params,
259
+ ).run()
243
260
 
244
261
  def gen_cov_file(self) -> None:
245
262
  """Generate a dummy coverage file."""
246
263
  chrom_end = self._seq_len - 1
247
264
 
248
- output_path = self._folder / f"{self._prefix}.cov.tsv"
265
+ output_path = self._output_dir / f"{self._prefix}.cov.tsv"
249
266
  with output_path.open("w", encoding="utf-8") as f:
250
267
  for chrom in range(1, self._nb_chrom + 1):
251
268
  coverage = CovGen(
@@ -280,7 +297,7 @@ class RandomAnalysis(RandomBundleItem):
280
297
  "DUAL228",
281
298
  "DUAL289",
282
299
  ],
283
- "data_path": str(self._folder),
300
+ "data_path": str(self._output_dir),
284
301
  }
285
302
 
286
303
 
@@ -289,15 +306,16 @@ class RandomBundle(RandomBundleItem):
289
306
 
290
307
  def __init__( # noqa: PLR0913
291
308
  self,
292
- folder: Path,
309
+ output_dir: Path,
293
310
  analyses_count: int,
294
311
  processes_count: int,
295
312
  nb_chrom: int,
296
313
  seq_len: int,
314
+ sim_params: MutSimParams,
297
315
  *,
298
316
  do_gen_coverage: bool,
299
317
  ) -> None:
300
- self._folder = folder
318
+ self._output_dir = output_dir
301
319
  self._analyses_count = analyses_count
302
320
  self._processes_count = processes_count
303
321
  self._nb_chrom = nb_chrom
@@ -319,19 +337,26 @@ class RandomBundle(RandomBundleItem):
319
337
  self._bi_processes, self._analyses_count
320
338
  )
321
339
 
322
- self._analyses.extend(
323
- [
324
- RandomAnalysis(
325
- self._folder,
326
- self._seq_len,
327
- self._nb_chrom,
328
- str(self._assigned_wet_processes[i]["proc_id"]),
329
- str(self._assigned_bi_processes[i]["proc_id"]),
330
- do_gen_coverage=self._do_gen_coverage,
331
- ).to_dict()
332
- for i in range(self._analyses_count)
333
- ]
334
- )
340
+ with tempfile.TemporaryDirectory() as fasta_dir:
341
+ try:
342
+ self._analyses.extend(
343
+ [
344
+ RandomAnalysis(
345
+ Path(fasta_dir),
346
+ self._output_dir,
347
+ self._seq_len,
348
+ self._nb_chrom,
349
+ str(self._assigned_wet_processes[i]["proc_id"]),
350
+ str(self._assigned_bi_processes[i]["proc_id"]),
351
+ sim_params,
352
+ do_gen_coverage=self._do_gen_coverage,
353
+ ).to_dict()
354
+ for i in range(self._analyses_count)
355
+ ]
356
+ )
357
+ except RuntimeError as e:
358
+ msg = f"VCF file generation for one analysis failed. {e}"
359
+ raise SystemExit(msg) from None
335
360
 
336
361
  @staticmethod
337
362
  def _assign_processes(
@@ -0,0 +1,157 @@
1
+ import logging
2
+ import re
3
+
4
+ from genelastic.common.exceptions import (
5
+ FilenamePatternResolveError,
6
+ InvalidFilePrefixError,
7
+ )
8
+ from genelastic.common.types import Metadata
9
+ from genelastic.import_data.constants import (
10
+ FILE_SUFFIXES_RE,
11
+ )
12
+ from genelastic.import_data.models.tags import Tags
13
+ from genelastic.import_data.patterns import FilenamePattern
14
+
15
+ logger = logging.getLogger("genelastic")
16
+
17
+
18
+ def validate_file_prefix(file_prefix: str, tags: Tags) -> None:
19
+ """Validate a filename prefix for correctness.
20
+
21
+ The file prefix must be non-empty and contain only defined tags,
22
+ with no duplicates. If any of these rules are violated, an
23
+ `InvalidFilePrefixError`` is raised.
24
+
25
+ :param file_prefix: The filename prefix containing tags to validate
26
+ (e.g. ``%S_%F_%W_%B_%D_%R_rep-1``).
27
+ :param tags: The tag definitions used to verify whether tags are defined.
28
+ :raises InvalidFilePrefixError: If the file prefix is invalid.
29
+ """
30
+ seen_tags = set()
31
+
32
+ if not file_prefix:
33
+ msg = "File prefix is empty."
34
+ raise InvalidFilePrefixError(msg)
35
+
36
+ # Check all tags in the file prefix:
37
+ # they must be defined and appear only once.
38
+ for match in re.finditer(tags.search_regex, file_prefix):
39
+ tag_name = match.group()
40
+ start = match.start() + 1
41
+ end = match.end()
42
+
43
+ if tag_name not in tags:
44
+ msg = (
45
+ f"File prefix '{file_prefix}' has an unknown tag "
46
+ f"'{tag_name}' at position {start}-{end}."
47
+ )
48
+ raise InvalidFilePrefixError(msg)
49
+
50
+ if tag_name in seen_tags:
51
+ msg = (
52
+ f"File prefix '{file_prefix}' has a duplicated tag "
53
+ f"'{tag_name}' at position {start}-{end}."
54
+ )
55
+ raise InvalidFilePrefixError(msg)
56
+ seen_tags.add(tag_name)
57
+
58
+
59
+ def resolve_analysis_id(
60
+ file_prefix: str, tags: Tags, metadata: Metadata
61
+ ) -> str:
62
+ """Resolve an analysis identifier from a filename prefix and metadata.
63
+
64
+ Each tag in the file prefix is replaced with its corresponding value from
65
+ ``metadata``.
66
+
67
+ :param file_prefix: A filename prefix containing tags
68
+ (e.g. ``%S_%F_%W_%B_%D_%R_rep-1``).
69
+ :param tags: The tag definitions used to map tags to metadata fields.
70
+ :param metadata: A dictionary mapping metadata fields to their values.
71
+ :return: The resolved analysis identifier string where all tags have been
72
+ replaced by their metadata values.
73
+ """
74
+ analysis_id = file_prefix
75
+ for match in re.finditer(tags.search_regex, file_prefix):
76
+ tag_name = match.group()
77
+ tag_field = tags[tag_name]["field"]
78
+ analysis_id = analysis_id.replace(tag_name, str(metadata[tag_field]))
79
+ return analysis_id
80
+
81
+
82
+ def resolve_filename_pattern(
83
+ file_prefix: str,
84
+ tags: Tags,
85
+ metadata: Metadata,
86
+ suffix: str | None = None,
87
+ *,
88
+ strict: bool = False,
89
+ ) -> FilenamePattern:
90
+ """Build a regex pattern from a filename prefix containing tags.
91
+
92
+ Each tag in the file prefix is replaced with a named capturing group.
93
+ The group name corresponds to the metadata field associated with the tag,
94
+ and the group regex is chosen as follows:
95
+
96
+ - If the field has a value in ``metadata``, the tag becomes a group that
97
+ matches exactly this value (e.g. ``(?P<sample_name>HG0003)``).
98
+ - Otherwise, the tag becomes a group that matches the tag's default regex
99
+ (e.g. ``(?P<sample_name>[^_]+)``), unless ``strict=True``,
100
+ in which case a ``FilenamePatternResolveError`` is raised.
101
+
102
+ The resulting pattern is anchored at the start and end of the string,
103
+ includes the optional ``suffix`` if provided, and always appends
104
+ ``FILE_SUFFIXES_RE`` at the end.
105
+
106
+ :param file_prefix: A string containing tags that describe the expected
107
+ structure of filenames (e.g. ``%S_%F_%W_%B_%D_%R_rep-1``).
108
+ :param tags: The tag definitions that map tag names to metadata fields
109
+ and default regexes.
110
+ :param metadata: Known metadata values used to restrict tag matches when
111
+ available.
112
+ :param suffix: Optional suffix to append to the regex after replacing tags.
113
+ :param strict: If True, all tags must have a corresponding value in
114
+ ``metadata``; otherwise a ``FilenamePatternResolveError`` exception is
115
+ raised.
116
+ :raises FilenamePatternResolveError: If ``strict=True`` and some tag fields
117
+ are missing from ``metadata``.
118
+ :return: A ``FilenamePattern`` object encapsulating the compiled regex.
119
+ """
120
+ filename_re = file_prefix
121
+ undefined_fields = []
122
+
123
+ # Expand each tag in the file prefix into a named capturing group.
124
+ # If a metadata value is provided, the group matches it exactly.
125
+ # Otherwise, fall back to the tag's default regex (or record it as
126
+ # undefined if strict).
127
+ for match in re.finditer(tags.search_regex, file_prefix):
128
+ tag_name = match.group()
129
+ tag_field = tags[tag_name]["field"]
130
+ tag_regex = tags[tag_name]["regex"]
131
+
132
+ tag_field_value = metadata.get(tag_field)
133
+ if not tag_field_value and strict:
134
+ undefined_fields.append(tag_field)
135
+
136
+ tag_field_regex = f"(?P<{tag_field}>{tag_field_value or tag_regex})"
137
+ filename_re = filename_re.replace(tag_name, tag_field_regex)
138
+
139
+ if undefined_fields:
140
+ formatted_fields = ", ".join(sorted(undefined_fields))
141
+ msg = (
142
+ f"In file prefix '{file_prefix}': "
143
+ f"no value in metadata found for field(s): {formatted_fields}. "
144
+ f"In single-match mode, "
145
+ f"all fields must have a corresponding value defined."
146
+ )
147
+ raise FilenamePatternResolveError(msg)
148
+
149
+ # Finalize the regex: append the optional suffix, enforce start (^) and end
150
+ # ($) anchors, and include FILE_SUFFIXES_RE to capture allowed file
151
+ # extensions.
152
+ parts = [f"^{filename_re}"]
153
+ if suffix:
154
+ # Avoid double anchors if suffix already ends with '$'.
155
+ parts.append(suffix.rstrip("$"))
156
+ parts.append(f"{FILE_SUFFIXES_RE}$")
157
+ return FilenamePattern("".join(parts))
genelastic/ui/.env ADDED
@@ -0,0 +1 @@
1
+ GENUI_API_URL="http://127.0.0.1:8000/api/"
@@ -0,0 +1,20 @@
1
+ from genelastic.common.cli import parse_server_launch_args
2
+ from genelastic.common.server import start_dev_server, start_prod_server
3
+
4
+
5
+ def main() -> None:
6
+ app_module = "genelastic.ui.server:app"
7
+ args = parse_server_launch_args("Start UI server.", 8001)
8
+ if args.env == "dev":
9
+ start_dev_server(
10
+ app_module, args, reload_includes=["*.html", "*.js", "*.css"]
11
+ )
12
+ elif args.env == "prod":
13
+ start_prod_server(app_module, args)
14
+ else:
15
+ msg = f"Environment '{args.env}' is not implemented."
16
+ raise NotImplementedError(msg)
17
+
18
+
19
+ if __name__ == "__main__":
20
+ main()