PyPI - genelastic - Versions diffs - 0.6.1__tar.gz → 0.7.0__tar.gz - Mend

genelastic 0.6.1tar.gz → 0.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

genelastic-0.7.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,105 @@
+Metadata-Version: 2.2
+Name: genelastic
+Version: 0.7.0
+Summary: Generate and store genetic data into an Elasticsearch database.
+Author: CNRGH
+Author-email: Pierrick ROGER <pierrick.roger@cnrgh.fr>, Maxime BLANCHON <maxime.blanchon@cnrgh.fr>
+License: CeCILL
+Keywords: CNRGH,genelastic,generation,storage,elasticsearch,database
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: CEA CNRS Inria Logiciel Libre License, version 2.1 (CeCILL-2.1)
+Classifier: Programming Language :: Python :: 3.11
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+Requires-Dist: elasticsearch
+Requires-Dist: PyVCF3
+Requires-Dist: schema
+Requires-Dist: PyYAML
+Requires-Dist: biophony>=1.2.1
+Requires-Dist: colorlog
+Provides-Extra: tests
+Requires-Dist: pytest; extra == "tests"
+Requires-Dist: mypy; extra == "tests"
+Requires-Dist: coverage; extra == "tests"
+Requires-Dist: yamllint; extra == "tests"
+Requires-Dist: types-PyYAML; extra == "tests"
+Requires-Dist: ruff<0.9,>=0.8.1; extra == "tests"
+Requires-Dist: pre-commit<4.1,>=4.0.1; extra == "tests"
+Requires-Dist: types-requests; extra == "tests"
+Requires-Dist: ansible<10.5,>=10.4.0; extra == "tests"
+Requires-Dist: ansible-lint<25,>=24.12.2; extra == "tests"
+Provides-Extra: docs
+Requires-Dist: sphinx; extra == "docs"
+Requires-Dist: sphinx-autoapi; extra == "docs"
+Requires-Dist: furo; extra == "docs"
+Provides-Extra: api
+Requires-Dist: flask; extra == "api"
+Requires-Dist: elasticsearch; extra == "api"
+Requires-Dist: environs; extra == "api"
+Requires-Dist: connexion[flask,swagger-ui,uvicorn]; extra == "api"
+Provides-Extra: ui
+Requires-Dist: flask; extra == "ui"
+Requires-Dist: requests; extra == "ui"
+Requires-Dist: environs; extra == "ui"
+# genelastic
+Storing of genetics data into an Elasticsearch database.
+## Prerequisites
+- `python` >= 3.11
+- `make`
+## Installation
+To install dependencies, run the following command:
+```bash
+python -m venv .venv
+source .venv/bin/activate
+make install.deps
+```
+## Configuration
+To start the **API server**, the following environment variables should be defined:
+- `GENAPI_ES_URL`: URL of the Elasticsearch server,
+- `GENAPI_ES_ENCODED_API_KEY`: Encoded API key,
+- `GENAPI_ES_INDEX_PREFIX`: Prefix to identify indices of interest,
+- `GENAPI_ES_CERT_FP`: Certificate fingerprint of the Elasticsearch server.
+Then, run the following command:
+```bash
+make start-api
+```
+To start the **UI server**, the following environment variables should be defined:
+- `GENUI_API_URL`: URL of the API server.
+Then, run the following command:
+```bash
+make start-ui
+```
+## Developers
+This project uses [pre-commit](https://pre-commit.com/) to manage Git hooks scripts. To install project hooks, run:
+```bash
+pre-commit install
+```
+After that, each commit will succeed only if all hooks (defined in `.pre-commit-config.yaml`) pass.
+If necessary (though not recommended),
+you can skip these hooks by using the `--no-verify` / `-n` option when committing:
+```bash
+git commit -m "My commit message" --no-verify # This commit will not run installed hooks.
+```

genelastic-0.7.0/README.md ADDED Viewed

@@ -0,0 +1,60 @@
+# genelastic
+Storing of genetics data into an Elasticsearch database.
+## Prerequisites
+- `python` >= 3.11
+- `make`
+## Installation
+To install dependencies, run the following command:
+```bash
+python -m venv .venv
+source .venv/bin/activate
+make install.deps
+```
+## Configuration
+To start the **API server**, the following environment variables should be defined:
+- `GENAPI_ES_URL`: URL of the Elasticsearch server,
+- `GENAPI_ES_ENCODED_API_KEY`: Encoded API key,
+- `GENAPI_ES_INDEX_PREFIX`: Prefix to identify indices of interest,
+- `GENAPI_ES_CERT_FP`: Certificate fingerprint of the Elasticsearch server.
+Then, run the following command:
+```bash
+make start-api
+```
+To start the **UI server**, the following environment variables should be defined:
+- `GENUI_API_URL`: URL of the API server.
+Then, run the following command:
+```bash
+make start-ui
+```
+## Developers
+This project uses [pre-commit](https://pre-commit.com/) to manage Git hooks scripts. To install project hooks, run:
+```bash
+pre-commit install
+```
+After that, each commit will succeed only if all hooks (defined in `.pre-commit-config.yaml`) pass.
+If necessary (though not recommended),
+you can skip these hooks by using the `--no-verify` / `-n` option when committing:
+```bash
+git commit -m "My commit message" --no-verify # This commit will not run installed hooks.
+```

genelastic-0.7.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,114 @@
+[build-system]
+requires = ["setuptools >= 70.0"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "genelastic"
+version = "0.7.0"
+dependencies = [
+    "elasticsearch",
+    "PyVCF3",
+    "schema",
+    "PyYAML",
+    "biophony>=1.2.1",
+    "colorlog",
+]
+requires-python = ">= 3.11"
+authors = [
+    { name = "CNRGH" },
+    { name = "Pierrick ROGER", email = "pierrick.roger@cnrgh.fr" },
+    { name = "Maxime BLANCHON", email = "maxime.blanchon@cnrgh.fr" }]
+description = "Generate and store genetic data into an Elasticsearch database."
+readme = "README.md"
+license = { text = "CeCILL" }
+keywords = ["CNRGH", "genelastic", "generation", "storage", "elasticsearch", "database"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: CEA CNRS Inria Logiciel Libre License, version 2.1 (CeCILL-2.1)",
+    "Programming Language :: Python :: 3.11"
+]
+[project.optional-dependencies]
+tests = [
+    "pytest",
+    "mypy",
+    "coverage",
+    "yamllint",
+    "types-PyYAML",
+    "ruff >= 0.8.1, < 0.9",
+    "pre-commit >= 4.0.1, < 4.1",
+    "types-requests",
+    "ansible >= 10.4.0, < 10.5",
+    "ansible-lint >= 24.12.2, < 25"
+]
+docs = [
+    "sphinx",
+    "sphinx-autoapi",
+    "furo"
+]
+api = [
+    "flask",
+    "elasticsearch",
+    "environs",
+    "connexion[flask,swagger-ui,uvicorn]"
+]
+ui = [
+    "flask",
+    "requests",
+    "environs"
+]
+[project.scripts]
+gnl-data = "genelastic.import_data.cli_gen_data:main"
+gnl-import = "genelastic.import_data.cli_import:main"
+gnl-info = "genelastic.import_data.cli_info:main"
+gnl-integrity = "genelastic.import_data.cli_integrity:main"
+gnl-validate = "genelastic.import_data.cli_validate:main"
+[tool.ruff]
+line-length = 80
+target-version = "py311"
+[tool.ruff.lint]
+ignore = [
+    "E501",
+    "D100",
+    "D103",
+    "D104",
+    "D105",
+    "D107",
+    "D205",
+    "D415",
+    "TRY400",
+    "S311", # Disable rule 'Standard pseudo-random generators are not suitable for cryptographic purposes'.
+    # Bellow are rules disabled as they conflict with 'ruff format'.
+    # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
+    "W191",
+    "E111",
+    "E114",
+    "E117",
+    "D206",
+    "D300",
+    "Q000",
+    "Q001",
+    "Q002",
+    "Q003",
+    "COM812",
+    "COM819",
+    "ISC001",
+    "ISC002",
+]
+select = ["ALL"]
+[tool.ruff.lint.per-file-ignores]
+"tests/*" = [
+    "S101", # Disable "Use of assert detected" rule.
+    "PLR2004", # Disable "Magic value used in comparison" rule.
+    "SLF001"  # Disable "Private member accessed" rule.
+]
+[tool.ruff.lint.pydocstyle]
+convention = "google"

genelastic-0.7.0/src/genelastic/api/extends/example.py ADDED Viewed

@@ -0,0 +1,6 @@
+from flask import Response, jsonify
+def ping_2() -> Response:
+    """Test route to verify that the server is online."""
+    return jsonify({"message": "pong_2"})

genelastic-0.7.0/src/genelastic/api/routes.py ADDED Viewed

@@ -0,0 +1,221 @@
+from importlib.metadata import version
+from pathlib import Path
+from typing import Any
+from flask import Response, current_app, jsonify
+def ping() -> Response:
+    """Test route to verify that the server is online."""
+    return jsonify({"message": "pong"})
+def list_indices() -> Response:
+    """Route to list Elasticsearch indexes."""
+    return current_app.elastic_query_conn.get_indices()  # type: ignore[attr-defined, no-any-return]
+def retrieve_document(index_id: str, document_id: str) -> Response:
+    """Route to retrieve a document by its ID."""
+    document = current_app.elastic_query_conn.get_document_by_id(  # type: ignore[attr-defined]
+        index_id, document_id
+    )
+    return jsonify(document)
+def list_wet_processes() -> Response:
+    """Route to list wet processes."""
+    wet_processes_index = (
+        f"{current_app.config['GENAPI_ES_INDEX_PREFIX']}-wet_processes"
+    )
+    result = current_app.elastic_query_conn.get_field_values(  # type: ignore[attr-defined]
+        wet_processes_index, "proc_id"
+    )
+    return jsonify(list(result))
+def list_bi_processes() -> Response:
+    """Route to list bi processes."""
+    bi_processes_index = (
+        f"{current_app.config['GENAPI_ES_INDEX_PREFIX']}-bi_processes"
+    )
+    result = current_app.elastic_query_conn.get_field_values(  # type: ignore[attr-defined]
+        bi_processes_index, "proc_id"
+    )
+    return jsonify(list(result))
+def list_analyses() -> Response:
+    """Route to list analyses."""
+    analyses_index = f"{current_app.config['GENAPI_ES_INDEX_PREFIX']}-analyses"
+    result = current_app.elastic_query_conn.get_field_values(  # type: ignore[attr-defined]
+        analyses_index, "path"
+    )
+    filenames = [Path(path).name for path in result]
+    return jsonify(filenames)
+def list_analyses_wet_processes(proc_id: str) -> Response:
+    """Route to list analyses one of specific wet process"""
+    analyses_index = f"{current_app.config['GENAPI_ES_INDEX_PREFIX']}-analyses"
+    search_query = {
+        "query": {
+            "term": {
+                "metadata.wet_process.keyword": proc_id,
+            }
+        }
+    }
+    response = current_app.elastic_query_conn.client.search(  # type: ignore[attr-defined]
+        index=analyses_index, body=search_query
+    )
+    result = [hit["_source"]["path"] for hit in response["hits"]["hits"]]
+    return jsonify(result)
+def list_analyses_bi_processes(proc_id: str) -> Response:
+    """Route to list analyses one of specific bi process"""
+    analyses_index = f"{current_app.config['GENAPI_ES_INDEX_PREFIX']}-analyses"
+    search_query = {
+        "query": {
+            "term": {
+                "metadata.bi_process.keyword": proc_id,
+            }
+        }
+    }
+    response = current_app.elastic_query_conn.client.search(  # type: ignore[attr-defined]
+        index=analyses_index, body=search_query
+    )
+    result = [hit["_source"]["path"] for hit in response["hits"]["hits"]]
+    return jsonify(result)
+def list_snv_documents() -> Response:
+    """Route to list all documents containing a mutation at a single position (SNV)."""
+    index_pattern = "genelastic-file-*"
+    target_value = "SNV"
+    search_query = {
+        "aggs": {
+            "snv_docs": {
+                "composite": {
+                    "sources": [
+                        {"alt_value": {"terms": {"field": "alt.keyword"}}}
+                    ],
+                    "size": 1000,
+                }
+            }
+        },
+        "query": {"term": {"alt.keyword": target_value}},
+        "size": 0,
+    }
+    all_documents = []
+    buckets = current_app.elastic_query_conn.run_composite_aggregation(  # type: ignore[attr-defined]
+        index_pattern, search_query
+    )
+    for bucket in buckets:
+        alt_value = bucket["key"]["alt_value"]
+        search_query_docs = {
+            "query": {"term": {"alt.keyword": alt_value}},
+            "size": 1000,
+        }
+        response = current_app.elastic_query_conn.client.search(  # type: ignore[attr-defined]
+            index=index_pattern, body=search_query_docs
+        )
+        all_documents.extend(response["hits"]["hits"])
+    return jsonify(all_documents)
+def build_snv_search_query(
+    target_alt: str, target_svtype: str
+) -> dict[str, Any]:
+    """Helper function to build the search query for SNV documents with specified alt and SVTYPE."""
+    return {
+        "query": {
+            "bool": {
+                "must": [
+                    {"term": {"alt.keyword": target_alt}},
+                    {"term": {"info.SVTYPE.keyword": target_svtype}},
+                ]
+            }
+        },
+        "size": 1000,
+    }
+def build_snv_mutation_search_query(
+    target_svtypes: list[str],
+) -> dict[str, Any]:
+    """Helper function to build the search query for SNV mutations with specified SVTYPE values."""
+    return {
+        "query": {
+            "bool": {
+                "must": [
+                    {"term": {"alt.keyword": "SNV"}},
+                    {"terms": {"info.SVTYPE.keyword": target_svtypes}},
+                ]
+            }
+        },
+        "size": 1000,
+    }
+def list_snv_insertion_documents() -> Response:
+    """Route to list all documents containing an insertion (INS) at a single position (SNV)."""
+    index_pattern = "genelastic-file-*"
+    search_query = build_snv_search_query(target_alt="SNV", target_svtype="INS")
+    response = current_app.elastic_query_conn.client.search(  # type: ignore[attr-defined]
+        index=index_pattern, body=search_query
+    )
+    all_documents = [hit["_source"] for hit in response["hits"]["hits"]]
+    return jsonify(all_documents)
+def list_snv_deletion_documents() -> Response:
+    """Route to list all documents containing a deletion (DEL) at a single position (SNV)."""
+    index_pattern = "genelastic-file-*"
+    search_query = build_snv_search_query(target_alt="SNV", target_svtype="DEL")
+    response = current_app.elastic_query_conn.client.search(  # type: ignore[attr-defined]
+        index=index_pattern, body=search_query
+    )
+    all_documents = [hit["_source"] for hit in response["hits"]["hits"]]
+    return jsonify(all_documents)
+def list_snv_mutation_documents() -> Response:
+    """Route to list all documents containing a mutation at a single position (SNV)."""
+    index_pattern = "genelastic-file-*"
+    target_svtypes = ["INS", "DEL"]
+    search_query = build_snv_mutation_search_query(
+        target_svtypes=target_svtypes
+    )
+    response = current_app.elastic_query_conn.client.search(  # type: ignore[attr-defined]
+        index=index_pattern, body=search_query
+    )
+    all_documents = [hit["_source"] for hit in response["hits"]["hits"]]
+    return jsonify(all_documents)
+def get_genelastic_version() -> Response:
+    """Retourne la version du package genelastic."""
+    top_level_package = __package__.split(".")[0]
+    return jsonify({"version": version(top_level_package)})

genelastic-0.7.0/src/genelastic/api/server.py ADDED Viewed

@@ -0,0 +1,80 @@
+from pathlib import Path
+from typing import Any
+import connexion
+import yaml
+from genelastic.common import ElasticQueryConn
+def load_yaml(file_path: Path) -> Any:  # noqa: ANN401
+    """Load a YAML file and return its content."""
+    content = None
+    with Path.open(file_path, encoding="utf-8") as f:
+        try:
+            content = yaml.safe_load(f)
+        except yaml.YAMLError as exc:
+            raise SystemExit(exc) from exc
+    return content
+def aggregate_openapi_specs(
+    main_spec_file: Path, additional_spec_path: Path
+) -> Any:  # noqa: ANN401
+    """Aggregate OpenAPI specifications from a main file and a directory
+    of additional specifications.
+    """
+    main_spec = load_yaml(main_spec_file)
+    try:
+        entries = additional_spec_path.iterdir()
+    except OSError as exc:
+        raise SystemExit(exc) from exc
+    if "paths" not in main_spec:
+        main_spec["paths"] = []
+    for entry in entries:
+        if not entry.is_file():
+            continue
+        if entry.suffix not in [".yml", ".yaml"]:
+            continue
+        content = load_yaml(entry)
+        if "paths" in content:
+            main_spec["paths"].update(content["paths"])
+    return main_spec
+# Initialiser l'application Connexion
+connexion_app = connexion.FlaskApp(__name__)
+connexion_app.app.config.from_object("src.genelastic.api.settings.Config")
+# Initialiser le client Elasticsearch
+es_url = connexion_app.app.config["GENAPI_ES_URL"]
+es_cert_fp = connexion_app.app.config["GENAPI_ES_CERT_FP"]
+es_api_key = connexion_app.app.config["GENAPI_ES_ENCODED_API_KEY"]
+connexion_app.app.elastic_query_conn = ElasticQueryConn(
+    es_url, es_cert_fp, api_key=es_api_key
+)
+connexion_app.app.logger.debug(
+    "Successfully connected to Elasticsearch server: %s",
+    connexion_app.app.elastic_query_conn.client.info(),
+)
+# Chemins des fichiers YAML
+main_yaml_file = Path(__file__).parents[0] / "specification.yml"
+additional_yaml_dir = Path(__file__).parents[0] / "extends"
+# Charger et combiner les fichiers YAML
+yaml_spec = aggregate_openapi_specs(main_yaml_file, additional_yaml_dir)
+# Ajouter la spécification vers OpenAPI
+connexion_app.add_api(yaml_spec)
+if __name__ == "__main__":
+    connexion_app.run(debug=True)

{genelastic-0.6.1 → genelastic-0.7.0}/src/genelastic/api/settings.py RENAMED Viewed

@@ -1,11 +1,12 @@
-# pylint: disable=missing-module-docstring
 from environs import Env
 env = Env()
 env.read_env()
-# pylint: disable=missing-class-docstring,too-few-public-methods
 class Config:
+    """Flask config class."""
     # Charger toutes les variables d'environnement nécessaires
     GENAPI_ES_URL = env.url("GENAPI_ES_URL").geturl()
     GENAPI_ES_ENCODED_API_KEY = env.str("GENAPI_ES_ENCODED_API_KEY")

genelastic-0.7.0/src/genelastic/common/__init__.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""Genelastic package for common code between API and import scripts."""
+from .cli import add_es_connection_args, add_verbose_control_args
+from .elastic import ElasticImportConn, ElasticQueryConn
+from .exceptions import DBIntegrityError
+from .types import (
+    AnalysisDocument,
+    AnalysisMetaData,
+    BioInfoProcessData,
+    Bucket,
+    BulkItems,
+    BundleDict,
+    MetadataDocument,
+    ProcessDocument,
+    RandomAnalysisData,
+    RandomBiProcessData,
+    RandomWetProcessData,
+    WetProcessesData,
+)
+__all__ = [
+    "AnalysisDocument",
+    "AnalysisMetaData",
+    "BioInfoProcessData",
+    "Bucket",
+    "BulkItems",
+    "BundleDict",
+    "DBIntegrityError",
+    "ElasticImportConn",
+    "ElasticQueryConn",
+    "MetadataDocument",
+    "ProcessDocument",
+    "RandomAnalysisData",
+    "RandomBiProcessData",
+    "RandomWetProcessData",
+    "WetProcessesData",
+    "add_es_connection_args",
+    "add_verbose_control_args",
+]

genelastic 0.6.1__tar.gz → 0.7.0__tar.gz

genelastic 0.6.1tar.gz → 0.7.0tar.gz