PyPI - genelastic - Versions diffs - 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

genelastic 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

genelastic/__init__.py +0 -13
genelastic/api/__init__.py +0 -0
genelastic/api/extends/__init__.py +0 -0
genelastic/api/extends/example.py +6 -0
genelastic/api/routes.py +221 -0
genelastic/api/server.py +80 -0
genelastic/api/settings.py +14 -0
genelastic/common/__init__.py +39 -0
genelastic/common/cli.py +63 -0
genelastic/common/elastic.py +214 -0
genelastic/common/exceptions.py +4 -0
genelastic/common/types.py +25 -0
genelastic/import_data/__init__.py +27 -0
genelastic/{analyses.py → import_data/analyses.py} +19 -20
genelastic/{analysis.py → import_data/analysis.py} +71 -66
genelastic/{bi_process.py → import_data/bi_process.py} +8 -6
genelastic/{bi_processes.py → import_data/bi_processes.py} +10 -9
genelastic/import_data/cli_gen_data.py +116 -0
genelastic/import_data/cli_import.py +379 -0
genelastic/import_data/cli_info.py +256 -0
genelastic/import_data/cli_integrity.py +384 -0
genelastic/import_data/cli_validate.py +54 -0
genelastic/import_data/constants.py +24 -0
genelastic/{data_file.py → import_data/data_file.py} +26 -21
genelastic/import_data/filename_pattern.py +57 -0
genelastic/{import_bundle.py → import_data/import_bundle.py} +58 -48
genelastic/import_data/import_bundle_factory.py +298 -0
genelastic/{logger.py → import_data/logger.py} +22 -18
genelastic/import_data/random_bundle.py +402 -0
genelastic/{tags.py → import_data/tags.py} +48 -27
genelastic/{wet_process.py → import_data/wet_process.py} +8 -4
genelastic/{wet_processes.py → import_data/wet_processes.py} +15 -9
genelastic/ui/__init__.py +0 -0
genelastic/ui/server.py +87 -0
genelastic/ui/settings.py +11 -0
genelastic-0.7.0.dist-info/METADATA +105 -0
genelastic-0.7.0.dist-info/RECORD +40 -0
{genelastic-0.6.0.dist-info → genelastic-0.7.0.dist-info}/WHEEL +1 -1
genelastic-0.7.0.dist-info/entry_points.txt +6 -0
genelastic/common.py +0 -151
genelastic/constants.py +0 -45
genelastic/filename_pattern.py +0 -62
genelastic/gen_data.py +0 -193
genelastic/import_bundle_factory.py +0 -288
genelastic/import_data.py +0 -294
genelastic/info.py +0 -248
genelastic/integrity.py +0 -324
genelastic/validate_data.py +0 -41
genelastic-0.6.0.dist-info/METADATA +0 -36
genelastic-0.6.0.dist-info/RECORD +0 -25
genelastic-0.6.0.dist-info/entry_points.txt +0 -6
{genelastic-0.6.0.dist-info → genelastic-0.7.0.dist-info}/top_level.txt +0 -0

genelastic/gen_data.py DELETED Viewed

@@ -1,193 +0,0 @@
-# pylint: disable=missing-module-docstring
-import argparse
-import logging
-import os
-import random
-import subprocess  # nosec
-import sys
-from typing import Dict, List, Sequence, Collection
-import yaml
-from genelastic.common import add_verbose_control_args
-from .logger import configure_logging
-logger = logging.getLogger('genelastic')
-def read_args() -> argparse.Namespace:
-    # pylint: disable=R0801
-    """Read arguments from command line."""
-    parser = argparse.ArgumentParser(description='Genetics data random generator.',
-                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    add_verbose_control_args(parser)
-    parser.add_argument('-d', '--data-folder', dest='data_folder', required=True,
-                        help='Data destination folder.')
-    parser.add_argument('--log-file', dest='log_file', help='Path to a log file.')
-    parser.add_argument('-n', '--chrom-nb', dest='chrom_nb', type=int, default=5,
-                        help='Number of chromosomes to generate.')
-    parser.add_argument('-o', '--output-yaml-file', dest='output_file', default='-',
-                        help='Output YAML file.')
-    parser.add_argument('-s', '--chrom-size', dest='chrom_size', type=int, default=2000,
-                        help='Data size (number of nucleotides) for each chromosome.')
-    return parser.parse_args()
-def gen_cov_files(folder: str, nb_chrom: int, chrom_sz: int, prefix: str) -> List[str]:
-    """Generate dummy coverage files. If an error occurs while generating coverage files, exit."""
-    files = []
-    chrom_end = chrom_sz - 1
-    for chrom in range(1, nb_chrom + 1):
-        output_path = os.path.join(folder, f"{prefix}_chr{chrom}_cov.tsv")
-        # gen-cov will output a coverage file to stdout.
-        gen_cov_cmd = ["gen-cov", "-c", str(chrom), "-p", f"0-{chrom_end}", "-d", "5-15",
-                       "-r", "0.1"]
-        try:
-            with open(output_path, "w", encoding="utf-8") as f:
-                # Redirect the gen-cov output to a file.
-                subprocess.run(gen_cov_cmd, stdout=f, check=True)  # nosec
-        except (subprocess.CalledProcessError, FileNotFoundError, OSError) as e:
-            logger.error(e)
-            sys.exit(1)
-        files.append(output_path)
-    return files
-def gen_vcf_files(folder: str, nb_chrom: int, chrom_sz: int, prefix: str) -> List[str]:
-    """Generate dummy VCF files. If an error occurs while generating VCFs, exit."""
-    files = []
-    for chrom in range(1, nb_chrom + 1):
-        output_path = os.path.join(folder, f"{prefix}_chr{chrom}.vcf")
-        # gen-fasta will output a FASTA to stdout.
-        gen_fasta_cmd = ["gen-fasta", "-s", f"chr{chrom}", "-n", str(chrom_sz)]
-        # gen-vcf will output a VCF to stdout.
-        gen_vcf_cmd = ["gen-vcf", "--snp-rate", "0.02", "--ins-rate", "0.01", "--del-rate", "0.01"]
-        try:
-            # Pipe the output of gen-fasta to the stdin of gen-vcf.
-            with subprocess.Popen(gen_fasta_cmd, stdout=subprocess.PIPE) as gen_fasta_proc:  # nosec
-                # Redirect the gen-vcf output to a file.
-                with open(output_path, "w", encoding="utf-8") as f:
-                    subprocess.run(gen_vcf_cmd,
-                                   stdin=gen_fasta_proc.stdout, stdout=f, check=True)  # nosec
-        except (subprocess.CalledProcessError, FileNotFoundError, OSError) as e:
-            logger.error(e)
-            sys.exit(1)
-        files.append(output_path)
-    return files
-def gen_name(chars: str = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', n: int = 4) -> str:
-    """Generate a random alphanumerical name."""
-    return ''.join(random.sample(list(chars), n))
-def gen_data(folder: str, nb_chrom: int, chrom_sz: int) -> (
-        Dict)[str, int | Sequence[Collection[str]]]:
-    """Generate dummy analysis following the V3 YAML schema."""
-    # Set metadata
-    sample_name = "HG0003"
-    source = "CNRGH"
-    barcode = gen_name(n=6)
-    wet_process = "novaseqxplus-10b"
-    bi_process = "dragen-4123"
-    reference_genome = "hg38"
-    prefix = f'{sample_name}_{source}_{wet_process}_{bi_process}_{barcode}_{reference_genome}'
-    wet_processes = [{
-        "proc_id": "novaseqxplus-10b",
-        "manufacturer": "illumina",
-        "sequencer": "novaseqxplus",
-        "generic_kit": "truseq-illumina",
-        "fragmentation": 350,
-        "reads_size": 300,
-        "input_type": "gdna",
-        "amplification": "pcr-free",
-        "flowcell_type": "10b",
-        "sequencing_type": "wgs",
-    }]
-    bi_processes = [{
-        "proc_id": "dragen-4123",
-        "name": "dragen",
-        "pipeline_version": "4.1.2.3",
-        "steps": [
-            {"name": "basecalling", "cmd": "bclconvert", "version": "3.9.3.2"},
-            {"name": "trimming", "cmd": "dragen"},
-            {"name": "mapping", "cmd": "dragmap"},
-            {"name": "postmapping", "cmd": "dragen", "version": "4.1.23"},
-            {"name": "smallvarcalling", "cmd": "dragen", "version": "4.1.23"},
-            {"name": "svcalling", "cmd": "dragen", "version": "4.1.23"},
-            {"name": "secondary_qc", "cmd": "dragen", "version": "4.1.23"}
-        ],
-        "sequencing_type": "wgs"
-    }]
-    analyses = [{
-        'file_prefix': '%S_%F_%W_%B_%A_%R_chr[0-9]+',
-        'sample_name': sample_name,
-        'source': source,
-        'barcode': barcode,
-        'wet_process': "novaseqxplus-10b",
-        'bi_process': "dragen-4123",
-        'reference_genome': reference_genome,
-        'flowcell': gen_name(n=8),
-        'lanes': [random.randint(1, 10)],  # nosec
-        'seq_indices': ['DUAL219', 'DUAL222', 'DUAL225', 'DUAL228', 'DUAL289'],
-        'qc_comment': "",
-        'data_path': folder,
-    }]
-    gen_vcf_files(folder, nb_chrom=nb_chrom, chrom_sz=chrom_sz, prefix=prefix)
-    gen_cov_files(folder, nb_chrom=nb_chrom, chrom_sz=chrom_sz, prefix=prefix)
-    return {
-        'version': 3,
-        'analyses': analyses,
-        'bi_processes': bi_processes,
-        'wet_processes': wet_processes
-    }
-# Write import bundle YAML
-def write_yaml(file: str, data: Dict[str, int | Sequence[Collection[str]]]) -> None:
-    """Write YAML to stdout or in a file."""
-    # Standard output
-    if file == '-':
-        print('---')
-        yaml.dump(data, sys.stdout)
-    # File
-    else:
-        with open(file, 'w', encoding="utf-8") as f:
-            print('---', file=f)
-            yaml.dump(data, f)
-def main() -> None:
-    """Entry point of the gen-data script."""
-    # Read command line arguments
-    args = read_args()
-    # Configure logging
-    configure_logging(args.verbose, log_file=args.log_file)
-    logger.debug("Arguments: %s", args)
-    # Generate data
-    data = gen_data(args.data_folder, nb_chrom=args.chrom_nb, chrom_sz=args.chrom_size)
-    # Write to stdout or file
-    write_yaml(args.output_file, data)
-if __name__ == '__main__':
-    main()

genelastic/import_bundle_factory.py DELETED Viewed

@@ -1,288 +0,0 @@
-"""ImportBundle factory module.
-"""
-import logging
-import os
-import re
-import sys
-import typing
-from yaml.parser import ParserError
-from yaml.scanner import ScannerError
-import schema  # type: ignore[import-untyped]
-import yaml
-from .import_bundle import ImportBundle
-from .common import BundleDict
-from .constants import BUNDLE_CURRENT_VERSION
-logger = logging.getLogger('genelastic')
-def validate_tag_char(s: str) -> bool:
-    """
-    A tag should only contain one special character, excluding the following : (, ), ?, <, >.
-    """
-    if len(s) > 1:
-        return False
-    return re.match(r"^[^\w()<>?]$", s) is not None
-def validate_field_chars(s: str) -> bool:
-    """
-    Fields should only contain word characters.
-    A word character is a character a-z, A-Z, 0-9, including _ (underscore).
-    """
-    return re.match(r"^\w+$", s) is not None
-_SCHEMA_V1 = schema.Schema({
-    'version': 1,
-    schema.Optional("vcf_files"): schema.Or(None, [str])
-})
-_SCHEMA_V2 = schema.Schema({
-    'version': 2,
-    schema.Optional("vcf"): {
-        schema.Optional('filename_pattern'): str,
-        'files': [str]
-    }
-})
-_SCHEMA_V3 = schema.Schema({
-    'version': 3,
-    schema.Optional('analyses'):
-        schema.Or(None, [
-            {
-                schema.Optional('file_prefix'): str,
-                schema.Optional('files'): [str],
-                schema.Optional('sample_name'): str,
-                schema.Optional('source'): str,
-                schema.Optional('barcode'): str,
-                schema.Optional('wet_process'): str,
-                schema.Optional('bi_process'): str,
-                schema.Optional('reference_genome'): str,
-                schema.Optional('flowcell'): str,
-                schema.Optional('lanes'): [int],
-                schema.Optional('seq_indices'): [str],
-                schema.Optional('cov_depth'): int,
-                schema.Optional('qc_comment'): str,
-                schema.Optional('data_path'): str
-            }
-        ]),
-    schema.Optional('wet_processes'):
-        schema.Or(None, [
-            {
-                "proc_id": str,
-                "manufacturer": str,
-                "sequencer": str,
-                "generic_kit": str,
-                "fragmentation": int,
-                "reads_size": int,
-                "input_type": str,
-                "amplification": str,
-                "flowcell_type": str,
-                "sequencing_type": str,
-                schema.Optional("desc"): str,
-                schema.Optional("library_kit"): str,
-                schema.Optional("sequencing_kit"): str,
-                schema.Optional("error_rate_expected"): float
-            }
-        ]),
-    schema.Optional('bi_processes'):
-        schema.Or(None, [
-            {
-                "proc_id": str,
-                "name": str,
-                "pipeline_version": str,
-                "steps": [
-                    {
-                        "name": str,
-                        "cmd": str,
-                        schema.Optional("version"): str,
-                        schema.Optional("output"): str,
-                    }
-                ],
-                "sequencing_type": str,
-                schema.Optional("desc"): str
-            }
-        ]),
-    schema.Optional('tags'): {
-        schema.Optional("format"): {
-            schema.Optional("prefix"):
-                schema.And(str,
-                           validate_tag_char,
-                           error="Key 'prefix' should only contain one special character, "
-                                 "excluding the following : (, ), ?, <, >."
-                           ),
-            schema.Optional("suffix"):
-                schema.And(str,
-                           validate_tag_char,
-                           error="Key 'suffix' should only contain one special character, "
-                                 "excluding the following : (, ), ?, <, >."
-                           ),
-        },
-        "match": {
-            schema.And(str,
-                       validate_field_chars,
-                       error="Tags listed under the 'match' key should only contain "
-                             "word characters. A word character is a character "
-                             "a-z, A-Z, 0-9, including _ (underscore)."
-                       ): {
-                "field": str,
-                "regex": str
-            }
-        }
-    }
-})
-def make_import_bundle_from_files(files: typing.List[str], check: bool = False) -> ImportBundle:
-    """Create an ImportBundle instance from a list of YAML files."""
-    all_documents = []
-    for file in files:
-        # Load documents stored in each file.
-        new_documents = load_import_bundle_file(file)
-        for i, new_document in enumerate(new_documents):
-            # Upgrade each new document to the latest/current version.
-            if new_document['version'] != BUNDLE_CURRENT_VERSION:
-                new_documents[i] = upgrade_bundle_version(new_document, BUNDLE_CURRENT_VERSION)
-            # Set the root directory path in each new document.
-            new_documents[i]['root_dir'] = os.path.dirname(file)
-            # Set the original bundle YAML file path in each new document.
-            new_documents[i]['bundle_file'] = file
-        all_documents.extend(new_documents)
-    # Create bundle instance.
-    return ImportBundle(all_documents, check)
-def set_version(x: BundleDict) -> None:
-    """Set version number.
-    Deduce the version number from the keys present inside the dictionary.
-    """
-    # Empty doc
-    if len(x) == 0:
-        x['version'] = BUNDLE_CURRENT_VERSION
-    # Wrong content in version field
-    elif 'version' in x:
-        if not isinstance(x['version'], int):
-            raise ValueError("Version must be an integer.")
-    # Version 1
-    elif 'vcf_files' in x or 'cov_files' in x:
-        x['version'] = 1
-    # Version 2
-    elif 'vcf' in x and 'filename_pattern' in x['vcf']:
-        x['version'] = 2
-    # Latest version
-    else:
-        x['version'] = BUNDLE_CURRENT_VERSION
-def validate_doc(x: BundleDict) -> None:
-    """Validate the dictionary using its corresponding schema.
-    """
-    # Get schema
-    bundle_schema = globals().get('_SCHEMA_V' + str(x['version']))
-    if bundle_schema is None:
-        raise ValueError((f"Unknown version \"{x['version']}\" for import " +
-                          "bundle file."))
-    # Validate
-    bundle_schema.validate(x)
-def load_import_bundle_file(file: str) -> typing.List[BundleDict]:
-    """Loads a YAML import bundle file."""
-    # Load YAML
-    logger.info('Load YAML data import file "%s".', file)
-    docs: typing.List[BundleDict] = []
-    try:
-        with open(file, "r", encoding="utf-8") as f:
-            for doc in yaml.safe_load_all(f):
-                docs.append(doc)
-    except (IsADirectoryError, FileNotFoundError) as e:
-        logger.error(e)
-        sys.exit(1)
-    except ScannerError as e:
-        logger.error("YAML file lexical analysis failed : %s", e)
-        sys.exit(1)
-    except ParserError as e:
-        logger.error("YAML file syntactic analysis failed : %s", e)
-        sys.exit(1)
-    # Guess/set version
-    if docs is None:
-        docs = [{'version': BUNDLE_CURRENT_VERSION}]
-    else:
-        for i, x in enumerate(docs):
-            if x is None:
-                docs[i] = {'version': BUNDLE_CURRENT_VERSION}
-            else:
-                set_version(x)
-    # Find schema and validate document
-    for x in docs:
-        validate_doc(x)
-    return docs
-def upgrade_bundle_version(x: BundleDict, to_version: int) -> BundleDict:
-    """Upgrade a loaded import bundle dictionary."""
-    # Check version
-    if 'version' not in x:
-        raise ValueError("No version in input bundle dictionary.")
-    if not isinstance(x['version'], int):
-        raise ValueError("Version of input bundle is not an integer.")
-    if x['version'] >= to_version:
-        raise ValueError((f"Original version ({x['version']}) is greater or" +
-                          " equal to target version ({to_version})."))
-    # Loop on upgrades to run
-    y = x.copy()
-    for v in range(x['version'], to_version):
-        upgrade_fct = globals().get(f"_upgrade_from_v{v}_to_v{v + 1}")
-        y = upgrade_fct(y)  # type: ignore[misc]
-    return y
-def _upgrade_from_v1_to_v2(x: BundleDict) -> BundleDict:
-    # Upgrade
-    y = {'version': 2, 'vcf': {'files': []}}
-    if 'vcf_files' in x and x['vcf_files'] is not None:
-        y['vcf']['files'] = x['vcf_files']  # type: ignore[index]
-    # Validate schema
-    _SCHEMA_V2.validate(y)
-    return y
-def _upgrade_from_v2_to_v3(x: BundleDict) -> BundleDict:
-    # Upgrade
-    y: BundleDict = {'version': 3, 'analyses': []}
-    if 'vcf' in x:
-        analysis_entry = {}
-        if 'files' in x['vcf']:
-            analysis_entry['files'] = x['vcf']['files']
-        if 'filename_pattern' in x['vcf']:
-            analysis_entry['file_prefix'] = x['vcf']['filename_pattern']
-        y['analyses'].append(analysis_entry)
-    _SCHEMA_V3.validate(y)
-    return y

genelastic 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

genelastic 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl