genelastic 0.6.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. genelastic/api/extends/example.py +2 -3
  2. genelastic/api/routes.py +160 -23
  3. genelastic/api/server.py +30 -22
  4. genelastic/api/settings.py +3 -2
  5. genelastic/common/__init__.py +36 -9
  6. genelastic/common/cli.py +51 -23
  7. genelastic/common/elastic.py +80 -49
  8. genelastic/common/exceptions.py +0 -2
  9. genelastic/common/types.py +20 -15
  10. genelastic/import_data/__init__.py +23 -5
  11. genelastic/import_data/analyses.py +17 -20
  12. genelastic/import_data/analysis.py +69 -65
  13. genelastic/import_data/bi_process.py +7 -5
  14. genelastic/import_data/bi_processes.py +8 -8
  15. genelastic/import_data/cli_gen_data.py +116 -0
  16. genelastic/import_data/cli_import.py +379 -0
  17. genelastic/import_data/{info.py → cli_info.py} +104 -75
  18. genelastic/import_data/cli_integrity.py +384 -0
  19. genelastic/import_data/cli_validate.py +54 -0
  20. genelastic/import_data/constants.py +11 -32
  21. genelastic/import_data/data_file.py +23 -20
  22. genelastic/import_data/filename_pattern.py +26 -32
  23. genelastic/import_data/import_bundle.py +56 -47
  24. genelastic/import_data/import_bundle_factory.py +166 -158
  25. genelastic/import_data/logger.py +22 -18
  26. genelastic/import_data/random_bundle.py +402 -0
  27. genelastic/import_data/tags.py +46 -26
  28. genelastic/import_data/wet_process.py +8 -4
  29. genelastic/import_data/wet_processes.py +13 -8
  30. genelastic/ui/__init__.py +0 -0
  31. genelastic/ui/server.py +87 -0
  32. genelastic/ui/settings.py +11 -0
  33. genelastic-0.7.0.dist-info/METADATA +105 -0
  34. genelastic-0.7.0.dist-info/RECORD +40 -0
  35. {genelastic-0.6.1.dist-info → genelastic-0.7.0.dist-info}/WHEEL +1 -1
  36. genelastic-0.7.0.dist-info/entry_points.txt +6 -0
  37. genelastic/import_data/gen_data.py +0 -194
  38. genelastic/import_data/import_data.py +0 -292
  39. genelastic/import_data/integrity.py +0 -290
  40. genelastic/import_data/validate_data.py +0 -43
  41. genelastic-0.6.1.dist-info/METADATA +0 -41
  42. genelastic-0.6.1.dist-info/RECORD +0 -36
  43. genelastic-0.6.1.dist-info/entry_points.txt +0 -6
  44. {genelastic-0.6.1.dist-info → genelastic-0.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,54 @@
1
+ import argparse
2
+ import logging
3
+ from pathlib import Path
4
+
5
+ from schema import SchemaError
6
+
7
+ from genelastic.common import add_verbose_control_args
8
+
9
+ from .import_bundle_factory import make_import_bundle_from_files
10
+ from .logger import configure_logging
11
+
12
+ logger = logging.getLogger("genelastic")
13
+
14
+
15
+ def read_args() -> argparse.Namespace:
16
+ """Read arguments from command line."""
17
+ parser = argparse.ArgumentParser(
18
+ description="Ensure that YAML files "
19
+ "follow the genelastic YAML bundle schema.",
20
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
21
+ allow_abbrev=False,
22
+ )
23
+ add_verbose_control_args(parser)
24
+ parser.add_argument(
25
+ "files",
26
+ type=Path,
27
+ nargs="+",
28
+ default=None,
29
+ help="YAML files to validate.",
30
+ )
31
+ parser.add_argument(
32
+ "-c",
33
+ "--check",
34
+ action="store_true",
35
+ help="In addition to validating the schema, "
36
+ "check for undefined referenced processes.",
37
+ )
38
+ return parser.parse_args()
39
+
40
+
41
+ def main() -> int:
42
+ """Entry point of the validate script."""
43
+ args = read_args()
44
+ configure_logging(args.verbose)
45
+
46
+ try:
47
+ make_import_bundle_from_files(args.files, check=args.check)
48
+ except (ValueError, RuntimeError, TypeError, SchemaError) as e:
49
+ # Catch any exception that can be raised by 'make_import_bundle_from_files'.
50
+ logger.error(e)
51
+ return 1
52
+
53
+ logger.info("All YAML files respect the genelastic YAML bundle format.")
54
+ return 0
@@ -1,11 +1,11 @@
1
- """
2
- Module: constants
1
+ """Module: constants
3
2
 
4
3
  This module contains genelastic constants.
5
4
  """
5
+
6
6
  import typing
7
7
 
8
- ALLOWED_CATEGORIES: typing.Final[typing.List[str]] = ['vcf', 'cov']
8
+ ALLOWED_CATEGORIES: typing.Final[list[str]] = ["vcf", "cov"]
9
9
 
10
10
  BUNDLE_CURRENT_VERSION = 3
11
11
 
@@ -13,33 +13,12 @@ DEFAULT_TAG_REGEX = "[^_-]+"
13
13
  DEFAULT_TAG_PREFIX = "%"
14
14
  DEFAULT_TAG_SUFFIX = ""
15
15
 
16
- DEFAULT_TAG2FIELD: typing.Final[typing.Dict[str, typing.Dict[str, str]]] = {
17
- '%S': {
18
- "field": 'sample_name',
19
- "regex": DEFAULT_TAG_REGEX
20
- },
21
- '%F': {
22
- "field": 'source',
23
- "regex": DEFAULT_TAG_REGEX
24
- },
25
- '%W': {
26
- "field": 'wet_process',
27
- "regex": DEFAULT_TAG_REGEX
28
- },
29
- '%B': {
30
- "field": 'bi_process',
31
- "regex": DEFAULT_TAG_REGEX
32
- },
33
- '%D': {
34
- "field": 'cov_depth',
35
- "regex": DEFAULT_TAG_REGEX
36
- },
37
- '%A': {
38
- "field": 'barcode',
39
- "regex": DEFAULT_TAG_REGEX
40
- },
41
- '%R': {
42
- "field": 'reference_genome',
43
- "regex": DEFAULT_TAG_REGEX
44
- }
16
+ DEFAULT_TAG2FIELD: typing.Final[dict[str, dict[str, str]]] = {
17
+ "%S": {"field": "sample_name", "regex": DEFAULT_TAG_REGEX},
18
+ "%F": {"field": "source", "regex": DEFAULT_TAG_REGEX},
19
+ "%W": {"field": "wet_process", "regex": DEFAULT_TAG_REGEX},
20
+ "%B": {"field": "bi_process", "regex": DEFAULT_TAG_REGEX},
21
+ "%D": {"field": "cov_depth", "regex": DEFAULT_TAG_REGEX},
22
+ "%A": {"field": "barcode", "regex": DEFAULT_TAG_REGEX},
23
+ "%R": {"field": "reference_genome", "regex": DEFAULT_TAG_REGEX},
45
24
  }
@@ -1,5 +1,4 @@
1
- """
2
- This module defines the DataFile class, which handles the representation,
1
+ """This module defines the DataFile class, which handles the representation,
3
2
  management, and extraction of metadata for a data file within a data bundle.
4
3
 
5
4
  It includes functionality to construct DataFile instances from paths and
@@ -8,45 +7,47 @@ for extracting metadata from filenames using specified patterns.
8
7
  """
9
8
 
10
9
  import logging
11
- import os
12
10
  import pathlib
13
- import typing
11
+ from pathlib import Path
14
12
 
15
13
  from genelastic.common import AnalysisMetaData
16
14
 
17
15
  from .filename_pattern import FilenamePattern
18
16
 
19
- logger = logging.getLogger('genelastic')
17
+ logger = logging.getLogger("genelastic")
20
18
 
21
19
 
22
20
  class DataFile:
23
21
  """Class for handling a data file and its metadata."""
24
22
 
25
23
  # Initializer
26
- def __init__(self, path: str, bundle_path: str | None = None,
27
- metadata: typing.Optional[AnalysisMetaData] = None) -> None:
24
+ def __init__(
25
+ self,
26
+ path: Path,
27
+ bundle_path: Path | None = None,
28
+ metadata: AnalysisMetaData | None = None,
29
+ ) -> None:
28
30
  self._path = path
29
31
  self._bundle_path = bundle_path # The bundle YAML file in which this
30
32
  # file was listed.
31
33
  self._metadata = {} if metadata is None else metadata
32
34
 
33
35
  def __repr__(self) -> str:
34
- return (f"File {self._path}, from bundle {self._bundle_path}"
35
- + f", with metadata {self._metadata}")
36
+ return f"File {self._path}, from bundle {self._bundle_path}, with metadata {self._metadata}"
36
37
 
37
38
  # Get path
38
39
  @property
39
- def path(self) -> str:
40
+ def path(self) -> Path:
40
41
  """Retrieve the data file path."""
41
42
  return self._path
42
43
 
43
44
  def exists(self) -> bool:
44
45
  """Tests if the associated file exists on disk."""
45
- return os.path.isfile(self._path)
46
+ return self._path.is_file()
46
47
 
47
48
  # Get bundle path
48
49
  @property
49
- def bundle_path(self) -> str | None:
50
+ def bundle_path(self) -> Path | None:
50
51
  """Retrieve the path to the associated data bundle file."""
51
52
  return self._bundle_path
52
53
 
@@ -59,20 +60,22 @@ class DataFile:
59
60
  # Factory
60
61
  @classmethod
61
62
  def make_from_bundle(
62
- cls,
63
- path: str,
64
- bundle_path: str | None,
65
- pattern: typing.Optional[FilenamePattern] = None) -> 'DataFile':
63
+ cls,
64
+ path: Path,
65
+ bundle_path: Path | None,
66
+ pattern: FilenamePattern | None = None,
67
+ ) -> "DataFile":
66
68
  """Construct a DataFile instance from a bundle path, file path,
67
- and optional filename pattern."""
69
+ and optional filename pattern.
70
+ """
68
71
  # Make absolute path
69
- if not os.path.isabs(path) and not bundle_path is None:
70
- path = os.path.join(os.path.dirname(bundle_path), path)
72
+ if not path.is_absolute() and bundle_path is not None:
73
+ path = bundle_path.parent / path
71
74
 
72
75
  # Extract filename metadata
73
76
  metadata = None
74
77
  if pattern is not None:
75
- metadata = pattern.extract_metadata(os.path.basename(path))
78
+ metadata = pattern.extract_metadata(path.name)
76
79
 
77
80
  if metadata:
78
81
  if "ext" not in metadata:
@@ -1,14 +1,11 @@
1
- """
2
- This module defines the FilenamePattern class, used to define a filename pattern
1
+ """This module defines the FilenamePattern class, used to define a filename pattern
3
2
  and extract metadata from file names using this pattern.
4
3
  """
5
4
 
6
- import logging
7
5
  import re
8
6
 
9
7
  from genelastic.common import AnalysisMetaData
10
8
 
11
- logger = logging.getLogger('genelastic')
12
9
 
13
10
  class FilenamePattern:
14
11
  """Class for defining a filename pattern.
@@ -18,46 +15,43 @@ class FilenamePattern:
18
15
 
19
16
  # Initializer
20
17
  def __init__(self, pattern: str) -> None:
21
- """
22
- Initializes a FilenamePattern instance.
18
+ """Initializes a FilenamePattern instance.
23
19
 
24
- Args:
25
- pattern (str): The pattern string used for defining
26
- the filename pattern.
27
- """
20
+ Args:
21
+ pattern (str): The pattern string used for defining
22
+ the filename pattern.
23
+ """
28
24
  self._re = re.compile(pattern)
29
25
 
30
26
  def extract_metadata(self, filename: str) -> AnalysisMetaData:
31
- """
32
- Extracts metadata from the given filename based
33
- on the defined pattern.
27
+ """Extracts metadata from the given filename based
28
+ on the defined pattern.
34
29
 
35
- Args:
36
- filename (str): The filename from which metadata
37
- needs to be extracted.
30
+ Args:
31
+ filename (str): The filename from which metadata
32
+ needs to be extracted.
38
33
 
39
- Returns:
40
- dict: A dictionary containing the extracted metadata.
34
+ Returns:
35
+ dict: A dictionary containing the extracted metadata.
41
36
 
42
- Raises:
43
- RuntimeError: If parsing of filename fails
44
- with the defined pattern.
45
- """
37
+ Raises:
38
+ RuntimeError: If parsing of filename fails
39
+ with the defined pattern.
40
+ """
46
41
  m = self._re.search(filename)
47
42
  if not m:
48
- raise RuntimeError(f'Failed parsing filename "{filename}"' +
49
- f'with pattern "{self._re.pattern}".')
43
+ msg = f'Failed parsing filename "{filename}" with pattern "{self._re.pattern}".'
44
+ raise RuntimeError(msg)
50
45
  return m.groupdict()
51
46
 
52
47
  def matches_pattern(self, filename: str) -> bool:
53
- """
54
- Checks if the given filename matches the defined pattern.
48
+ """Checks if the given filename matches the defined pattern.
55
49
 
56
- Args:
57
- filename (str): The filename to be checked.
50
+ Args:
51
+ filename (str): The filename to be checked.
58
52
 
59
- Returns:
60
- bool: True if the filename matches the pattern,
61
- False otherwise.
62
- """
53
+ Returns:
54
+ bool: True if the filename matches the pattern,
55
+ False otherwise.
56
+ """
63
57
  return bool(self._re.match(filename))
@@ -1,5 +1,4 @@
1
- """
2
- Module: import_bundle
1
+ """Module: import_bundle
3
2
 
4
3
  This module provides functionality for importing data bundles.
5
4
  """
@@ -10,63 +9,68 @@ import typing
10
9
 
11
10
  from genelastic.common import BundleDict
12
11
 
12
+ from .analyses import Analyses
13
13
  from .bi_processes import BioInfoProcesses
14
- from .data_file import DataFile
15
14
  from .constants import BUNDLE_CURRENT_VERSION
16
- from .analyses import Analyses
15
+ from .data_file import DataFile
17
16
  from .tags import Tags
18
17
  from .wet_processes import WetProcesses
19
18
 
20
- logger = logging.getLogger('genelastic')
19
+ logger = logging.getLogger("genelastic")
21
20
 
22
21
 
23
22
  class ImportBundle:
24
23
  """Class for handling an import bundle description."""
25
24
 
26
- def __init__(self, x: typing.Sequence[BundleDict],
27
- check: bool = False) -> None:
28
-
29
- analyses: typing.List[BundleDict] = []
30
- wet_processes: typing.List[BundleDict] = []
31
- bi_processes: typing.List[BundleDict] = []
25
+ def __init__( # noqa: C901
26
+ self, x: typing.Sequence[BundleDict], *, check: bool = False
27
+ ) -> None:
28
+ analyses: list[BundleDict] = []
29
+ wet_processes: list[BundleDict] = []
30
+ bi_processes: list[BundleDict] = []
32
31
  tags = Tags(x)
33
32
 
34
33
  # Loop on dicts
35
34
  for d in x:
36
35
  # Check version
37
- if 'version' not in d:
38
- raise RuntimeError("No version inside YAML document.")
39
- if int(d['version']) != BUNDLE_CURRENT_VERSION:
40
- raise RuntimeError("")
36
+ if "version" not in d:
37
+ msg = "No version inside YAML document."
38
+ raise RuntimeError(msg)
39
+ if int(d["version"]) != BUNDLE_CURRENT_VERSION:
40
+ raise RuntimeError
41
41
 
42
42
  # Gather all analyses
43
- if 'analyses' in d and d['analyses'] is not None:
43
+ if "analyses" in d and d["analyses"] is not None:
44
44
  # Copy some bundle properties into each analysis
45
- for analysis in d['analyses']:
46
- for key in ['bundle_file', 'root_dir']:
45
+ for analysis in d["analyses"]:
46
+ for key in ["bundle_file", "root_dir"]:
47
47
  if key in d:
48
48
  analysis[key] = d[key]
49
49
 
50
50
  # Add the tags to use.
51
- analysis['tags'] = tags
51
+ analysis["tags"] = tags
52
52
 
53
- analyses.extend(d['analyses'])
53
+ analyses.extend(d["analyses"])
54
54
 
55
55
  # If some wet processes are defined, copy the bundle file path into each of them.
56
- if 'wet_processes' in d and d['wet_processes'] is not None:
57
- for wet_process in d['wet_processes']:
58
- wet_process['bundle_file'] = d['bundle_file']
59
- wet_processes.extend(d['wet_processes'])
56
+ if "wet_processes" in d and d["wet_processes"] is not None:
57
+ for wet_process in d["wet_processes"]:
58
+ wet_process["bundle_file"] = d["bundle_file"]
59
+ wet_processes.extend(d["wet_processes"])
60
60
 
61
61
  # If some bio processes are defined, copy the bundle file path into each of them.
62
- if 'bi_processes' in d and d['bi_processes'] is not None:
63
- for bi_process in d['bi_processes']:
64
- bi_process['bundle_file'] = d['bundle_file']
65
- bi_processes.extend(d['bi_processes'])
62
+ if "bi_processes" in d and d["bi_processes"] is not None:
63
+ for bi_process in d["bi_processes"]:
64
+ bi_process["bundle_file"] = d["bundle_file"]
65
+ bi_processes.extend(d["bi_processes"])
66
66
 
67
67
  # Instantiate all objects
68
- self._wet_processes: WetProcesses = WetProcesses.from_array_of_dicts(wet_processes)
69
- self._bi_processes: BioInfoProcesses = BioInfoProcesses.from_array_of_dicts(bi_processes)
68
+ self._wet_processes: WetProcesses = WetProcesses.from_array_of_dicts(
69
+ wet_processes
70
+ )
71
+ self._bi_processes: BioInfoProcesses = (
72
+ BioInfoProcesses.from_array_of_dicts(bi_processes)
73
+ )
70
74
  self._analyses: Analyses = Analyses.from_array_of_dicts(analyses)
71
75
 
72
76
  if check:
@@ -79,17 +83,27 @@ class ImportBundle:
79
83
  for index, analysis in enumerate(self._analyses):
80
84
  analysis_wet_process = analysis.metadata.get("wet_process")
81
85
 
82
- if (analysis_wet_process and
83
- analysis_wet_process not in self._wet_processes.get_process_ids()):
84
- sys.exit(f"Analysis at index {index} in file {analysis.bundle_file} "
85
- f"is referencing an undefined wet process: {analysis_wet_process}")
86
+ if (
87
+ analysis_wet_process
88
+ and analysis_wet_process
89
+ not in self._wet_processes.get_process_ids()
90
+ ):
91
+ sys.exit(
92
+ f"Analysis at index {index} in file {analysis.bundle_file} "
93
+ f"is referencing an undefined wet process: {analysis_wet_process}"
94
+ )
86
95
 
87
96
  analysis_bi_process = analysis.metadata.get("bi_process")
88
97
 
89
- if (analysis_bi_process and
90
- analysis_bi_process not in self._bi_processes.get_process_ids()):
91
- sys.exit(f"Analysis at index {index} in file {analysis.bundle_file} "
92
- f"is referencing an undefined bi process: {analysis_bi_process}")
98
+ if (
99
+ analysis_bi_process
100
+ and analysis_bi_process
101
+ not in self._bi_processes.get_process_ids()
102
+ ):
103
+ sys.exit(
104
+ f"Analysis at index {index} in file {analysis.bundle_file} "
105
+ f"is referencing an undefined bi process: {analysis_bi_process}"
106
+ )
93
107
 
94
108
  @property
95
109
  def analyses(self) -> Analyses:
@@ -111,10 +125,9 @@ class ImportBundle:
111
125
  files = self.get_files(cat)
112
126
  return len(files)
113
127
 
114
- def get_files(self, cat: str | None = None) -> typing.List[DataFile]:
128
+ def get_files(self, cat: str | None = None) -> list[DataFile]:
115
129
  """Returns all files of a category."""
116
-
117
- files: typing.List[DataFile] = []
130
+ files: list[DataFile] = []
118
131
 
119
132
  # Loop on all analyses
120
133
  for analysis in self.analyses:
@@ -124,12 +137,8 @@ class ImportBundle:
124
137
 
125
138
  def get_nb_matched_files(self) -> int:
126
139
  """Get the number of files that match the pattern."""
127
-
128
- return sum(a.get_nb_files()
129
- for a in self.analyses)
140
+ return sum(a.get_nb_files() for a in self.analyses)
130
141
 
131
142
  def get_nb_unmatched_files(self) -> int:
132
143
  """Get the number of files that do not match."""
133
-
134
- return sum(len(a.get_unmatched_file_paths())
135
- for a in self.analyses)
144
+ return sum(len(a.get_unmatched_file_paths()) for a in self.analyses)