genelastic 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. genelastic/api/.env +4 -0
  2. genelastic/api/cli_start_api.py +18 -0
  3. genelastic/api/errors.py +52 -0
  4. genelastic/api/extends/example.py +0 -6
  5. genelastic/api/extends/example.yml +0 -0
  6. genelastic/api/routes.py +313 -181
  7. genelastic/api/server.py +34 -26
  8. genelastic/api/settings.py +5 -9
  9. genelastic/api/specification.yml +512 -0
  10. genelastic/common/__init__.py +0 -39
  11. genelastic/common/cli.py +100 -0
  12. genelastic/common/elastic.py +374 -46
  13. genelastic/common/exceptions.py +34 -2
  14. genelastic/common/server.py +59 -0
  15. genelastic/common/types.py +1 -14
  16. genelastic/import_data/__init__.py +0 -27
  17. genelastic/import_data/checker.py +99 -0
  18. genelastic/import_data/checker_observer.py +13 -0
  19. genelastic/import_data/cli/__init__.py +0 -0
  20. genelastic/import_data/cli/cli_check.py +136 -0
  21. genelastic/import_data/cli/gen_data.py +143 -0
  22. genelastic/import_data/cli/import_data.py +346 -0
  23. genelastic/import_data/cli/info.py +247 -0
  24. genelastic/import_data/{cli_integrity.py → cli/integrity.py} +29 -7
  25. genelastic/import_data/cli/validate.py +146 -0
  26. genelastic/import_data/collect.py +185 -0
  27. genelastic/import_data/constants.py +136 -11
  28. genelastic/import_data/import_bundle.py +102 -59
  29. genelastic/import_data/import_bundle_factory.py +70 -149
  30. genelastic/import_data/importers/__init__.py +0 -0
  31. genelastic/import_data/importers/importer_base.py +131 -0
  32. genelastic/import_data/importers/importer_factory.py +85 -0
  33. genelastic/import_data/importers/importer_types.py +223 -0
  34. genelastic/import_data/logger.py +2 -1
  35. genelastic/import_data/models/__init__.py +0 -0
  36. genelastic/import_data/models/analyses.py +178 -0
  37. genelastic/import_data/models/analysis.py +144 -0
  38. genelastic/import_data/models/data_file.py +110 -0
  39. genelastic/import_data/models/process.py +45 -0
  40. genelastic/import_data/models/processes.py +84 -0
  41. genelastic/import_data/models/tags.py +170 -0
  42. genelastic/import_data/models/unique_list.py +109 -0
  43. genelastic/import_data/models/validate.py +26 -0
  44. genelastic/import_data/patterns.py +90 -0
  45. genelastic/import_data/random_bundle.py +79 -54
  46. genelastic/import_data/resolve.py +157 -0
  47. genelastic/ui/.env +1 -0
  48. genelastic/ui/cli_start_ui.py +20 -0
  49. genelastic/ui/routes.py +333 -0
  50. genelastic/ui/server.py +9 -82
  51. genelastic/ui/settings.py +2 -6
  52. genelastic/ui/static/cea-cnrgh.ico +0 -0
  53. genelastic/ui/static/cea.ico +0 -0
  54. genelastic/ui/static/layout.ico +0 -0
  55. genelastic/ui/static/novaseq6000.png +0 -0
  56. genelastic/ui/static/style.css +430 -0
  57. genelastic/ui/static/ui.js +458 -0
  58. genelastic/ui/templates/analyses.html +98 -0
  59. genelastic/ui/templates/analysis_detail.html +44 -0
  60. genelastic/ui/templates/bi_process_detail.html +129 -0
  61. genelastic/ui/templates/bi_processes.html +116 -0
  62. genelastic/ui/templates/explorer.html +356 -0
  63. genelastic/ui/templates/home.html +207 -0
  64. genelastic/ui/templates/layout.html +153 -0
  65. genelastic/ui/templates/version.html +21 -0
  66. genelastic/ui/templates/wet_process_detail.html +131 -0
  67. genelastic/ui/templates/wet_processes.html +116 -0
  68. genelastic-0.9.0.dist-info/METADATA +686 -0
  69. genelastic-0.9.0.dist-info/RECORD +76 -0
  70. genelastic-0.9.0.dist-info/WHEEL +4 -0
  71. genelastic-0.9.0.dist-info/entry_points.txt +10 -0
  72. genelastic-0.9.0.dist-info/licenses/LICENSE +519 -0
  73. genelastic/import_data/analyses.py +0 -69
  74. genelastic/import_data/analysis.py +0 -205
  75. genelastic/import_data/bi_process.py +0 -27
  76. genelastic/import_data/bi_processes.py +0 -49
  77. genelastic/import_data/cli_gen_data.py +0 -116
  78. genelastic/import_data/cli_import.py +0 -379
  79. genelastic/import_data/cli_info.py +0 -256
  80. genelastic/import_data/cli_validate.py +0 -54
  81. genelastic/import_data/data_file.py +0 -87
  82. genelastic/import_data/filename_pattern.py +0 -57
  83. genelastic/import_data/tags.py +0 -123
  84. genelastic/import_data/wet_process.py +0 -28
  85. genelastic/import_data/wet_processes.py +0 -53
  86. genelastic-0.7.0.dist-info/METADATA +0 -105
  87. genelastic-0.7.0.dist-info/RECORD +0 -40
  88. genelastic-0.7.0.dist-info/WHEEL +0 -5
  89. genelastic-0.7.0.dist-info/entry_points.txt +0 -6
  90. genelastic-0.7.0.dist-info/top_level.txt +0 -1
@@ -1,205 +0,0 @@
1
- import copy
2
- import logging
3
- import re
4
- import typing
5
- from pathlib import Path
6
-
7
- from genelastic.common import AnalysisMetaData
8
-
9
- from .constants import ALLOWED_CATEGORIES
10
- from .data_file import DataFile
11
- from .filename_pattern import FilenamePattern
12
- from .tags import Tags
13
-
14
- logger = logging.getLogger("genelastic")
15
-
16
-
17
- class Analysis:
18
- """Class Analysis that represents an analysis."""
19
-
20
- def __init__( # noqa: PLR0913
21
- self,
22
- tags: Tags,
23
- root_dir: str = ".",
24
- bundle_file: str | None = None,
25
- file_prefix: str | None = None,
26
- files: typing.Sequence[str] | None = None,
27
- data_path: str | None = None,
28
- **metadata: str | int,
29
- ) -> None:
30
- self._bundle_file = Path(bundle_file) if bundle_file else None
31
- self._file_prefix = file_prefix
32
- self._files = files
33
- self._data_path = Analysis._resolve_data_path(
34
- Path(root_dir), Path(data_path) if data_path else None
35
- )
36
- self._tags = tags
37
- self._metadata: AnalysisMetaData = metadata
38
- self._categories: set[str] = set()
39
-
40
- @property
41
- def metadata(self) -> AnalysisMetaData:
42
- """Get metadata."""
43
- return copy.deepcopy(self._metadata)
44
-
45
- @property
46
- def bundle_file(self) -> Path | None:
47
- """Get the bundle file."""
48
- return self._bundle_file
49
-
50
- @property
51
- def filename_regex(self) -> str:
52
- """Resolve placeholders in a file prefix using metadata
53
- and unresolved placeholders are converted to regex groups
54
- """
55
- x: str = r"^.+\.(?P<ext>vcf|cov)(\.gz)?$"
56
-
57
- # Use existing generic prefix
58
- if self._file_prefix:
59
- x = self._file_prefix
60
- # Replace %* tags
61
- for tag_name, tag_attrs in self._tags.items:
62
- field = tag_attrs["field"]
63
- regex = tag_attrs["regex"]
64
-
65
- # Build field regex
66
- field_regex = (
67
- f"(?P<{field}>{self._metadata.get(field)})"
68
- if field in self._metadata
69
- else f"(?P<{field}>{regex})"
70
- )
71
- # Replace tag with field regex
72
- x = x.replace(tag_name, field_regex)
73
-
74
- # Check for tags that were not replaced.
75
- groups = re.findall(self._tags.search_regex, x)
76
- for match in groups:
77
- logger.warning(
78
- "String '%s' in key 'file_prefix' looks like an undefined tag. "
79
- "If this string is not a tag, you can ignore this warning.",
80
- match,
81
- )
82
-
83
- # Add missing start and end markers
84
- if not x.startswith("^"):
85
- x = "^" + x
86
- if not x.endswith("$"):
87
- x += r"\.(?P<ext>" + "|".join(ALLOWED_CATEGORIES) + r")(\.gz)?$"
88
- logger.debug("File regex for %s: %s", self._bundle_file, x)
89
-
90
- return x
91
-
92
- def get_nb_files(self, cat: str | None = None) -> int:
93
- """Returns the total number of files."""
94
- return len(self.get_file_paths(cat=cat))
95
-
96
- def get_data_files(self, cat: str | None = None) -> list[DataFile]:
97
- """Returns the list of matched files as DataFile objects."""
98
- files = self.get_file_paths(cat=cat)
99
- filename_pattern = FilenamePattern(self.filename_regex)
100
-
101
- data_files: list[DataFile] = []
102
-
103
- for f in files:
104
- try:
105
- data_files.append(
106
- DataFile.make_from_bundle(
107
- path=f,
108
- bundle_path=self._bundle_file,
109
- pattern=filename_pattern,
110
- )
111
- )
112
- except (OSError, ValueError) as e:
113
- logger.error("Error processing file %s: %s", f, str(e))
114
-
115
- return data_files
116
-
117
- def get_file_paths(self, cat: str | None = None) -> typing.Sequence[Path]:
118
- """Returns the list of matched files."""
119
- files, _, _ = self._do_get_file_paths(cat=cat)
120
- return files
121
-
122
- def get_unmatched_file_paths(
123
- self, cat: str | None = None
124
- ) -> typing.Sequence[Path]:
125
- """Returns the list of unmatched files."""
126
- _, files, _ = self._do_get_file_paths(cat=cat)
127
- return files
128
-
129
- def get_all_categories(self) -> set[str]:
130
- """Returns all categories of the analysis."""
131
- _, _, categories = self._do_get_file_paths()
132
- return categories
133
-
134
- @staticmethod
135
- def _resolve_data_path(root_dir: Path, data_path: Path | None) -> Path:
136
- resolved_data_path = Path() if data_path is None else data_path
137
-
138
- if not resolved_data_path.is_absolute():
139
- resolved_data_path = (root_dir / resolved_data_path).absolute()
140
-
141
- return resolved_data_path
142
-
143
- def _get_files_with_allowed_categories(self) -> dict[Path, str]:
144
- # Create a dict to store allowed files. Keys are the filepaths,
145
- # and values are their corresponding category.
146
- allowed_files: dict[Path, str] = {}
147
- # If files are listed explicitly in the YAML in the 'files' attribute, process them.
148
- if self._files is not None:
149
- abs_filepaths = [Path(self._data_path) / f for f in self._files]
150
- # Try to retrieve files matching allowed categories by checking their first suffix.
151
- for file in abs_filepaths:
152
- cat = file.suffixes[0][1:]
153
- # Add each matching file and its category to the dict.
154
- if cat in ALLOWED_CATEGORIES:
155
- allowed_files[file] = cat
156
- # Else, look for files on disk using the YAML 'data_path' attribute.
157
- else:
158
- # Try to retrieve files matching allowed categories using glob.
159
- for cat in ALLOWED_CATEGORIES:
160
- glob_res: list[Path] = []
161
- glob_res.extend(self._data_path.glob(f"*.{cat}"))
162
- glob_res.extend(self._data_path.glob(f"*.{cat}.gz"))
163
-
164
- # Add each globed file and its category to the dict.
165
- for g_file in glob_res:
166
- allowed_files[g_file] = cat
167
-
168
- return allowed_files
169
-
170
- def _do_get_file_paths(
171
- self, cat: str | None = None
172
- ) -> tuple[typing.Sequence[Path], typing.Sequence[Path], set[str]]:
173
- # Raise an error if the category given as a parameter is not part of the allowed categories.
174
- if cat is not None and cat not in ALLOWED_CATEGORIES:
175
- msg = f"Unknown category {cat}."
176
- raise ValueError(msg)
177
-
178
- # Obtain a dict of all files matching the allowed categories.
179
- allowed_files = self._get_files_with_allowed_categories()
180
-
181
- if cat is None:
182
- # No category was given as a parameter, so we match all categories.
183
- files_to_match = allowed_files
184
- else:
185
- # A category was given as a parameter, so we match only this specific category.
186
- files_to_match = {
187
- k: v for k, v in allowed_files.items() if v == cat
188
- }
189
-
190
- filename_pattern = FilenamePattern(self.filename_regex)
191
- matching_files: list[Path] = []
192
- non_matching_files: list[Path] = []
193
- categories = set()
194
-
195
- # We filter files by ensuring that they match the filename pattern defined in the analysis.
196
- for file, category in sorted(files_to_match.items()):
197
- if filename_pattern.matches_pattern(file.name):
198
- matching_files.append(file)
199
- logger.info("MATCHED file %s.", file)
200
- # Add the file category to the categories set.
201
- categories.add(category)
202
- else:
203
- logger.warning("UNMATCHED file %s.", file)
204
- non_matching_files.append(file)
205
- return matching_files, non_matching_files, categories
@@ -1,27 +0,0 @@
1
- import copy
2
-
3
- from genelastic.common import BioInfoProcessData
4
-
5
-
6
- class BioInfoProcess:
7
- """Class representing a bio process."""
8
-
9
- def __init__(
10
- self,
11
- proc_id: str,
12
- bundle_file: str | None = None,
13
- **data: str | list[str],
14
- ) -> None:
15
- self._proc_id = proc_id
16
- self._bundle_file = bundle_file
17
- self._data: BioInfoProcessData = data
18
-
19
- @property
20
- def id(self) -> str:
21
- """Get the bio process ID."""
22
- return self._proc_id
23
-
24
- @property
25
- def data(self) -> BioInfoProcessData:
26
- """Get data associated to the bio process."""
27
- return copy.deepcopy(self._data)
@@ -1,49 +0,0 @@
1
- import logging
2
- import typing
3
-
4
- from genelastic.common import BundleDict
5
-
6
- from .bi_process import BioInfoProcess
7
-
8
- logger = logging.getLogger("genelastic")
9
-
10
-
11
- class BioInfoProcesses:
12
- """Class BioInfoProcesses is a container of BioInfoProcess objects."""
13
-
14
- def __init__(self) -> None:
15
- self._dict: dict[str, BioInfoProcess] = {}
16
-
17
- def __len__(self) -> int:
18
- return len(self._dict)
19
-
20
- def __getitem__(self, key: str) -> BioInfoProcess:
21
- return self._dict[key]
22
-
23
- def add(self, process: BioInfoProcess) -> None:
24
- """Add one BioInfoProcess object.
25
- If a BioInfoProcess object with the same ID already exists in the container,
26
- the program exits.
27
- """
28
- if process.id in self._dict:
29
- msg = f"A bi process with the id '{process.id}' is already present."
30
- raise ValueError(msg)
31
-
32
- # Add one WetProcess object.
33
- self._dict[process.id] = process
34
-
35
- def get_process_ids(self) -> set[str]:
36
- """Get a list of the bio processes IDs."""
37
- return set(self._dict.keys())
38
-
39
- @classmethod
40
- def from_array_of_dicts(
41
- cls, arr: typing.Sequence[BundleDict]
42
- ) -> typing.Self:
43
- """Build a BioInfoProcesses instance."""
44
- bi_processes = cls()
45
-
46
- for d in arr:
47
- bi_processes.add(BioInfoProcess(**d))
48
-
49
- return bi_processes
@@ -1,116 +0,0 @@
1
- import argparse
2
- import logging
3
- from pathlib import Path
4
-
5
- from genelastic.common import add_verbose_control_args
6
-
7
- from .logger import configure_logging
8
- from .random_bundle import (
9
- RandomBundle,
10
- )
11
-
12
- logger = logging.getLogger("genelastic")
13
-
14
-
15
- def read_args() -> argparse.Namespace:
16
- """Read arguments from command line."""
17
- parser = argparse.ArgumentParser(
18
- description="Genetics data random generator.",
19
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
20
- allow_abbrev=False,
21
- )
22
- add_verbose_control_args(parser)
23
- parser.add_argument(
24
- "-d",
25
- "--data-folder",
26
- dest="data_folder",
27
- required=True,
28
- help="Data destination folder.",
29
- type=Path,
30
- )
31
- parser.add_argument(
32
- "--log-file", dest="log_file", help="Path to a log file."
33
- )
34
- parser.add_argument(
35
- "-n",
36
- "--chrom-nb",
37
- dest="chrom_nb",
38
- type=int,
39
- default=5,
40
- help="Number of chromosomes to include in the generated VCF file.",
41
- )
42
- parser.add_argument(
43
- "-o",
44
- "--output-yaml-file",
45
- dest="output_file",
46
- default=None,
47
- help="Output YAML file.",
48
- type=Path,
49
- )
50
- parser.add_argument(
51
- "-s",
52
- "--sequence-size",
53
- type=int,
54
- default=2000,
55
- help="Sequence size (number of nucleotides) generated for each chromosome.",
56
- )
57
- parser.add_argument(
58
- "-c",
59
- "--coverage",
60
- action="store_true",
61
- help="Generate a coverage file for each analysis.",
62
- )
63
- parser.add_argument(
64
- "-a",
65
- "--analyses",
66
- help="Number of analyses to generate. "
67
- "Each analysis is composed of a YAML bundle file declaring its wet lab and bioinformatics processes, "
68
- "a VCF file and optionally a coverage file.",
69
- default=1,
70
- type=int,
71
- )
72
- parser.add_argument(
73
- "-p",
74
- "--processes",
75
- help="Number of Wet Lab and Bioinformatics processes to generate.",
76
- default=1,
77
- type=int,
78
- )
79
- return parser.parse_args()
80
-
81
-
82
- def main() -> None:
83
- """Entry point of the gen-data script."""
84
- # Read command line arguments
85
- args = read_args()
86
- folder = args.data_folder.resolve()
87
-
88
- if not folder.is_dir():
89
- msg = f"ERROR: '{folder}' does not exist or is not a directory."
90
- raise SystemExit(msg)
91
-
92
- if args.analyses < 1:
93
- msg = "Analyses count must be at least 1."
94
- raise SystemExit(msg)
95
-
96
- if args.processes < 1:
97
- msg = "Processes count must be at least 1."
98
- raise SystemExit(msg)
99
-
100
- # Configure logging
101
- configure_logging(args.verbose, log_file=args.log_file)
102
- logger.debug("Arguments: %s", args)
103
-
104
- # Write to stdout or file
105
- RandomBundle(
106
- folder,
107
- args.analyses,
108
- args.processes,
109
- args.chrom_nb,
110
- args.sequence_size,
111
- do_gen_coverage=args.coverage,
112
- ).to_yaml(args.output_file)
113
-
114
-
115
- if __name__ == "__main__":
116
- main()