PyPI - genelastic - Versions diffs - 0.6.1__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

genelastic 0.6.1py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

genelastic/api/cli_start_api.py +18 -0
genelastic/api/extends/example.py +2 -3
genelastic/api/extends/example.yml +20 -0
genelastic/api/routes.py +160 -23
genelastic/api/server.py +42 -31
genelastic/api/settings.py +5 -8
genelastic/api/specification.yml +350 -0
genelastic/common/__init__.py +41 -9
genelastic/common/cli.py +103 -23
genelastic/common/elastic.py +80 -49
genelastic/common/exceptions.py +0 -2
genelastic/common/server.py +51 -0
genelastic/common/types.py +20 -15
genelastic/import_data/__init__.py +23 -5
genelastic/import_data/analyses.py +17 -20
genelastic/import_data/analysis.py +69 -65
genelastic/import_data/bi_process.py +7 -5
genelastic/import_data/bi_processes.py +8 -8
genelastic/import_data/cli_gen_data.py +143 -0
genelastic/import_data/cli_import.py +379 -0
genelastic/import_data/{info.py → cli_info.py} +104 -75
genelastic/import_data/cli_integrity.py +384 -0
genelastic/import_data/cli_validate.py +54 -0
genelastic/import_data/constants.py +11 -32
genelastic/import_data/data_file.py +23 -20
genelastic/import_data/filename_pattern.py +26 -32
genelastic/import_data/import_bundle.py +56 -47
genelastic/import_data/import_bundle_factory.py +166 -158
genelastic/import_data/logger.py +22 -18
genelastic/import_data/random_bundle.py +425 -0
genelastic/import_data/tags.py +46 -26
genelastic/import_data/wet_process.py +8 -4
genelastic/import_data/wet_processes.py +13 -8
genelastic/ui/__init__.py +0 -0
genelastic/ui/cli_start_ui.py +18 -0
genelastic/ui/routes.py +86 -0
genelastic/ui/server.py +14 -0
genelastic/ui/settings.py +7 -0
genelastic/ui/templates/analyses.html +11 -0
genelastic/ui/templates/bi_processes.html +11 -0
genelastic/ui/templates/home.html +4 -0
genelastic/ui/templates/layout.html +34 -0
genelastic/ui/templates/version.html +9 -0
genelastic/ui/templates/wet_processes.html +11 -0
genelastic-0.8.0.dist-info/METADATA +109 -0
genelastic-0.8.0.dist-info/RECORD +52 -0
{genelastic-0.6.1.dist-info → genelastic-0.8.0.dist-info}/WHEEL +1 -1
genelastic-0.8.0.dist-info/entry_points.txt +8 -0
genelastic/import_data/gen_data.py +0 -194
genelastic/import_data/import_data.py +0 -292
genelastic/import_data/integrity.py +0 -290
genelastic/import_data/validate_data.py +0 -43
genelastic-0.6.1.dist-info/METADATA +0 -41
genelastic-0.6.1.dist-info/RECORD +0 -36
genelastic-0.6.1.dist-info/entry_points.txt +0 -6
{genelastic-0.6.1.dist-info → genelastic-0.8.0.dist-info}/top_level.txt +0 -0

genelastic/common/elastic.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# pylint: disable=missing-module-docstring
 import datetime
 import logging
 import time
@@ -13,14 +12,15 @@ from elasticsearch import Elasticsearch
 from .exceptions import DBIntegrityError
 from .types import Bucket, BulkItems
-logger = logging.getLogger('genelastic')
+logger = logging.getLogger("genelastic")
-class ElasticConn(ABC):  # pylint: disable=too-few-public-methods
+class ElasticConn(ABC):
     """Abstract class representing a connector for an Elasticsearch server."""
     client: Elasticsearch
-    def __init__(self, url: str, fingerprint: str, **kwargs: Any):
+    def __init__(self, url: str, fingerprint: str, **kwargs: Any) -> None:  # noqa: ANN401
         """Initialize an elasticsearch client instance.
         :url: URL of the Elasticsearch host.
@@ -34,41 +34,49 @@ class ElasticConn(ABC):  # pylint: disable=too-few-public-methods
                 ssl_assert_fingerprint=fingerprint,
                 # Verify cert only when the fingerprint is not None.
                 verify_certs=bool(fingerprint),
-                **kwargs
+                **kwargs,
             )
             self.client.info()
-        except (elastic_transport.TransportError, elasticsearch.AuthenticationException) as e:
+        except (
+            elastic_transport.TransportError,
+            elasticsearch.AuthenticationException,
+        ) as e:
             raise SystemExit(e) from e
-class ElasticImportConn(ElasticConn):  # pylint: disable=too-few-public-methods
+class ElasticImportConn(ElasticConn):
     """Connector to import data into an Elasticsearch database."""
-    def import_items(self, bulk_items: BulkItems,
-                     start_time: float,
-                     total_items: int) -> None:
+    def import_items(
+        self, bulk_items: BulkItems, start_time: float, total_items: int
+    ) -> None:
         """Import items to the Elasticsearch database."""
         if len(bulk_items) > 0:
             elasticsearch.helpers.bulk(self.client, bulk_items)
         elapsed = time.perf_counter() - start_time
-        logger.info("Imported %d items in %s (%f items/s).", total_items,
-                    datetime.timedelta(seconds=elapsed), total_items / elapsed)
+        logger.info(
+            "Imported %d items in %s (%f items/s).",
+            total_items,
+            datetime.timedelta(seconds=elapsed),
+            total_items / elapsed,
+        )
 class ElasticQueryConn(ElasticConn):
     """Connector to query data from an Elasticsearch database."""
-    def get_indices(self) -> Any | str:
+    def get_indices(self) -> Any | str:  # noqa: ANN401
         """Return all indices."""
         return self.client.cat.indices(format="json").body
-    def get_document_by_id(self, index: str, document_id: str) -> Any | str:
+    def get_document_by_id(self, index: str, document_id: str) -> Any | str:  # noqa: ANN401
         """Return a document by its ID."""
         return self.client.get(index=index, id=document_id).body
-    def run_composite_aggregation(self, index: str, query: dict[str, typing.Any]) \
-            -> list[Bucket]:
-        """
-        Executes a composite aggregation on an Elasticsearch index and
+    def run_composite_aggregation(
+        self, index: str, query: dict[str, typing.Any]
+    ) -> list[Bucket]:
+        """Executes a composite aggregation on an Elasticsearch index and
         returns all paginated results.
         :param index: Name of the index to query.
@@ -77,29 +85,39 @@ class ElasticQueryConn(ElasticConn):
         """
         # Extract the aggregation name from the query dict.
         agg_name = next(iter(query["aggs"]))
-        all_buckets: typing.List[Bucket] = []
+        all_buckets: list[Bucket] = []
         try:
-            logger.debug("Running composite aggregation query %s on index '%s'.", query, index)
+            logger.debug(
+                "Running composite aggregation query %s on index '%s'.",
+                query,
+                index,
+            )
             response = self.client.search(index=index, body=query)
         except elasticsearch.NotFoundError as e:
-            raise SystemExit(f"Error: {e.message} for index '{index}'.") from e
+            msg = f"Error: {e.message} for index '{index}'."
+            raise SystemExit(msg) from e
         while True:
             # Extract buckets from the response.
-            buckets: typing.List[Bucket] = response['aggregations'][agg_name]['buckets']
+            buckets: list[Bucket] = response["aggregations"][agg_name][
+                "buckets"
+            ]
             all_buckets.extend(buckets)
             # Check if there are more results to fetch.
-            if 'after_key' in response['aggregations'][agg_name]:
-                after_key = response['aggregations'][agg_name]['after_key']
-                query['aggs'][agg_name]['composite']['after'] = after_key
+            if "after_key" in response["aggregations"][agg_name]:
+                after_key = response["aggregations"][agg_name]["after_key"]
+                query["aggs"][agg_name]["composite"]["after"] = after_key
                 try:
-                    logger.debug("Running query %s on index '%s'.", query, index)
+                    logger.debug(
+                        "Running query %s on index '%s'.", query, index
+                    )
                     # Fetch the next page of results.
                     response = self.client.search(index=index, body=query)
                 except elasticsearch.NotFoundError as e:
-                    raise SystemExit(f"Error: {e.message} for index '{index}'.") from e
+                    msg = f"Error: {e.message} for index '{index}'."
+                    raise SystemExit(msg) from e
             else:
                 break
@@ -114,25 +132,34 @@ class ElasticQueryConn(ElasticConn):
             "aggs": {
                 "get_field_values": {
                     "composite": {
-                        "sources": {"values": {"terms": {"field": f"{field_name}.keyword"}}},
+                        "sources": {
+                            "values": {
+                                "terms": {"field": f"{field_name}.keyword"}
+                            }
+                        },
                         "size": 1000,
                     }
                 }
-            }
+            },
         }
-        buckets: typing.List[Bucket] = self.run_composite_aggregation(index, query)
+        buckets: list[Bucket] = self.run_composite_aggregation(index, query)
         for bucket in buckets:
-            values.add(bucket['key']['values'])
+            values.add(bucket["key"]["values"])
         return values
-    def search_by_field_value(self, index: str, field: str, value: str) -> (
-            typing.Dict[str, typing.Any] | None):
+    def search_by_field_value(
+        self, index: str, field: str, value: str
+    ) -> dict[str, typing.Any] | None:
         """Search a document by a value for a certain field."""
-        logger.info("Searching for field '%s' with value '%s' inside index '%s'.",
-                    field, value, index)
+        logger.info(
+            "Searching for field '%s' with value '%s' inside index '%s'.",
+            field,
+            value,
+            index,
+        )
         search_query = {
             "query": {
                 "term": {
@@ -144,22 +171,23 @@ class ElasticQueryConn(ElasticConn):
         response = self.client.search(index=index, body=search_query)
         try:
-            return response['hits']['hits'][0]['_source']  # type: ignore
+            return response["hits"]["hits"][0]["_source"]  # type: ignore[no-any-return]
         except KeyError:
             return None
     def ensure_unique(self, index: str, field: str) -> None:
-        """
-        Ensure that all values of a field in an index are all unique.
+        """Ensure that all values of a field in an index are all unique.
         :param index: Name of the index.
         :param field: Field name to check for value uniqueness.
         :raises genelastic.common.DBIntegrityError:
             Some values of the given field are duplicated in the index.
         """
-        logger.info("Ensuring that the field '%s' in the index '%s' only contains unique values...",
-                    field, index)
+        logger.info(
+            "Ensuring that the field '%s' in the index '%s' only contains unique values...",
+            field,
+            index,
+        )
         query = {
             "size": 0,
             "aggs": {
@@ -167,17 +195,20 @@ class ElasticQueryConn(ElasticConn):
                     "terms": {
                         "field": f"{field}.keyword",
                         "size": 10000,
-                        "min_doc_count": 2
+                        "min_doc_count": 2,
                     }
                 }
-            }
+            },
+        }
+        buckets: list[Bucket] = self.run_composite_aggregation(index, query)
+        duplicated_processes: set[str] = {
+            str(bucket["key"]) for bucket in buckets
         }
-        buckets: typing.List[Bucket] = self.run_composite_aggregation(index, query)
-        duplicated_processes: typing.Set[str] = set(map(lambda bucket: str(bucket["key"]), buckets))
         if len(duplicated_processes) > 0:
-            raise DBIntegrityError(f"Found non-unique value for field {field} in index '{index}': "
-                                   f"{', '.join(duplicated_processes)}.")
+            msg = f"Found non-unique value for field {field} in index '{index}': {', '.join(duplicated_processes)}."
+            raise DBIntegrityError(msg)
-        logger.info("All values of field '%s' in index '%s' are unique.",
-                    field, index)
+        logger.info(
+            "All values of field '%s' in index '%s' are unique.", field, index
+        )

genelastic/common/exceptions.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# pylint: disable=missing-module-docstring
 class DBIntegrityError(Exception):
     """Represents an integrity error,
     raised when the database content does not match the expected data schema.

genelastic/common/server.py ADDED Viewed

@@ -0,0 +1,51 @@
+import argparse
+import subprocess
+import sys
+import uvicorn
+def start_dev_server(app_module: str, args: argparse.Namespace) -> None:
+    """Start the development server using Uvicorn.
+    :args app_module: The module containing the Flask server to start.
+    :args argparse.Namespace: The parsed arguments.
+    """
+    uvicorn.run(
+        app_module,
+        host=args.host,
+        port=args.port,
+        log_level=args.log_level,
+        reload=True,
+    )
+def start_prod_server(app_module: str, args: argparse.Namespace) -> None:
+    """Start the production server using Gunicorn.
+    It will spawn one primary process and workers
+    :args app_module: The module containing the Flask server to start.
+    :args argparse.Namespace: The parsed arguments.
+    :raises subprocess.CalledProcessError: If gunicorn exits with a non-zero status code.
+    """
+    cmd = [
+        sys.executable,
+        "-m",
+        "gunicorn",
+        "-k",
+        "uvicorn.workers.UvicornWorker",
+        "--workers",
+        str(args.workers),
+        "--log-level",
+        args.log_level,
+        "-b",
+        f"{args.host}:{args.port}",
+        "--capture-output",
+        app_module,
+    ]
+    if args.log_file:
+        cmd.extend(["--log-file", args.log_file])
+    if args.access_logfile:
+        cmd.extend(["--access-logfile", args.access_logfile])
+    subprocess.run(cmd, check=True)  # noqa: S603

genelastic/common/types.py CHANGED Viewed

@@ -1,20 +1,25 @@
-# pylint: disable=missing-module-docstring
 import typing
+# Types related to Elasticsearch data import.
 Bucket: typing.TypeAlias = dict[str, dict[typing.Any, typing.Any]]
+BundleDict: typing.TypeAlias = dict[str, typing.Any]
+AnalysisMetaData: typing.TypeAlias = dict[str, str | int]
+WetProcessesData: typing.TypeAlias = dict[str, str | int | float]
+BioInfoProcessData: typing.TypeAlias = dict[str, str | list[str]]
-AnalysisMetaData: typing.TypeAlias = typing.Dict[str, str | int]
-WetProcessesData: typing.TypeAlias = typing.Dict[str, str | int | float]
-BioInfoProcessData: typing.TypeAlias = typing.Dict[str, str | typing.List[str]]
-BundleDict: typing.TypeAlias = typing.Dict[str, typing.Any]
+AnalysisDocument: typing.TypeAlias = dict[str, str | None | AnalysisMetaData]
+MetadataDocument: typing.TypeAlias = dict[
+    str, int | str | list[typing.Any | None]
+]
+ProcessDocument: typing.TypeAlias = (
+    dict[str, str] | WetProcessesData | BioInfoProcessData
+)
+BulkItems: typing.TypeAlias = list[
+    dict[str, str | MetadataDocument | AnalysisDocument | ProcessDocument]
+]
-AnalysisDocument: typing.TypeAlias = typing.Dict[str, str | None | AnalysisMetaData]
-MetadataDocument: typing.TypeAlias = typing.Dict[str, int | str | typing.List[typing.Any | None]]
-ProcessDocument: typing.TypeAlias = (typing.Dict[str, str] |
-                                     WetProcessesData |
-                                     BioInfoProcessData)
-BulkItems: typing.TypeAlias = typing.List[typing.Dict[str, str |
-                                                           MetadataDocument |
-                                                           AnalysisDocument |
-                                                           ProcessDocument]]
+# Types related to random bundle generation.
+RandomBiProcessData: typing.TypeAlias = dict[str, str | list[dict[str, str]]]
+RandomWetProcessData: typing.TypeAlias = dict[str, str | float]
+RandomAnalysisData: typing.TypeAlias = dict[str, str | list[int | str]]

genelastic/import_data/__init__.py CHANGED Viewed

@@ -1,9 +1,27 @@
 """Genelastic package for importing Genomic data into Elasticsearch."""
 from .analysis import Analysis
-from .import_bundle_factory import (make_import_bundle_from_files,
-                                    load_import_bundle_file)
-from .tags import Tags
 from .import_bundle import ImportBundle
+from .import_bundle_factory import (
+    load_import_bundle_file,
+    make_import_bundle_from_files,
+)
+from .random_bundle import (
+    RandomAnalysis,
+    RandomBiProcess,
+    RandomBundle,
+    RandomWetProcess,
+)
+from .tags import Tags
-__all__ = ['Analysis', 'Tags', 'ImportBundle', 'make_import_bundle_from_files',
-           'load_import_bundle_file']
+__all__ = [
+    "Analysis",
+    "ImportBundle",
+    "RandomAnalysis",
+    "RandomBiProcess",
+    "RandomBundle",
+    "RandomWetProcess",
+    "Tags",
+    "load_import_bundle_file",
+    "make_import_bundle_from_files",
+]

genelastic/import_data/analyses.py CHANGED Viewed

@@ -1,23 +1,22 @@
-# pylint: disable=missing-module-docstring
 import typing
-from  genelastic.common import BundleDict
+from genelastic.common import BundleDict
 from .analysis import Analysis
 from .data_file import DataFile
 class Analyses:
-    """Class Analyses is a container of Analysis objects.
-    """
+    """Class Analyses is a container of Analysis objects."""
     def __init__(self) -> None:
-        self._arr: typing.List[Analysis] = []
+        self._arr: list[Analysis] = []
         self._iter_index: int = 0
     def __len__(self) -> int:
         return len(self._arr)
-    def __iter__(self) -> typing.Generator[Analysis, typing.Any, None]:
+    def __iter__(self) -> typing.Iterator[Analysis]:
         yield from self._arr
     @typing.overload
@@ -25,13 +24,13 @@ class Analyses:
         pass
     @typing.overload
-    def __getitem__(self, k: slice) -> typing.List[Analysis]:
+    def __getitem__(self, k: slice) -> list[Analysis]:
         pass
-    def __getitem__(self, k): # type: ignore
+    def __getitem__(self, k):  # type: ignore[no-untyped-def]
         if isinstance(k, int):
             return self._arr[k]
-        return self._arr[k.start:k.stop]
+        return self._arr[k.start : k.stop]
     def add(self, a: Analysis) -> None:
         """Add one Analysis object."""
@@ -39,20 +38,18 @@ class Analyses:
     def get_nb_files(self, cat: str | None = None) -> int:
         """Get the total number of files as paths."""
-        return len(self.get_data_files(cat = cat))
-    def get_data_files(self, cat: str | None = None) -> typing.List[DataFile]:
-        """Get the total number of files as DataFile objects.
-        """
+        return len(self.get_data_files(cat=cat))
-        data_files: typing.List[DataFile] = []
+    def get_data_files(self, cat: str | None = None) -> list[DataFile]:
+        """Get the total number of files as DataFile objects."""
+        data_files: list[DataFile] = []
         for a in self._arr:
-            data_files.extend(a.get_data_files(cat = cat))
+            data_files.extend(a.get_data_files(cat=cat))
         return data_files
-    def get_all_categories(self) -> typing.Set[str]:
+    def get_all_categories(self) -> set[str]:
         """Return all the categories of the analyses."""
         categories = set()
         for a in self._arr:
@@ -60,10 +57,10 @@ class Analyses:
         return categories
     @classmethod
-    def from_array_of_dicts(cls, arr: typing.Sequence[BundleDict]
-                            ) -> typing.Self:
+    def from_array_of_dicts(
+        cls, arr: typing.Sequence[BundleDict]
+    ) -> typing.Self:
         """Build an Analyses instance."""
         analyses = cls()
         for d in arr:

genelastic 0.6.1__py3-none-any.whl → 0.8.0__py3-none-any.whl

genelastic 0.6.1py3-none-any.whl → 0.8.0py3-none-any.whl