PyPI - genelastic - Versions diffs - 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl - Mend

genelastic 0.6.0py3-none-any.whl → 0.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

genelastic/__init__.py +0 -13
genelastic/api/__init__.py +0 -0
genelastic/api/extends/__init__.py +0 -0
genelastic/api/extends/example.py +7 -0
genelastic/api/routes.py +84 -0
genelastic/api/server.py +72 -0
genelastic/api/settings.py +13 -0
genelastic/common/__init__.py +12 -0
genelastic/common/cli.py +35 -0
genelastic/common/elastic.py +183 -0
genelastic/common/exceptions.py +6 -0
genelastic/common/types.py +20 -0
genelastic/import_data/__init__.py +9 -0
genelastic/{analyses.py → import_data/analyses.py} +3 -1
genelastic/{analysis.py → import_data/analysis.py} +3 -2
genelastic/{bi_process.py → import_data/bi_process.py} +1 -1
genelastic/{bi_processes.py → import_data/bi_processes.py} +2 -1
genelastic/{data_file.py → import_data/data_file.py} +3 -1
genelastic/{filename_pattern.py → import_data/filename_pattern.py} +2 -1
genelastic/{gen_data.py → import_data/gen_data.py} +3 -2
genelastic/{import_bundle.py → import_data/import_bundle.py} +2 -1
genelastic/{import_bundle_factory.py → import_data/import_bundle_factory.py} +3 -1
genelastic/{import_data.py → import_data/import_data.py} +49 -51
genelastic/{info.py → import_data/info.py} +29 -50
genelastic/{integrity.py → import_data/integrity.py} +53 -87
genelastic/{tags.py → import_data/tags.py} +2 -1
genelastic/{validate_data.py → import_data/validate_data.py} +6 -4
genelastic/{wet_processes.py → import_data/wet_processes.py} +2 -1
{genelastic-0.6.0.dist-info → genelastic-0.6.1.dist-info}/METADATA +7 -2
genelastic-0.6.1.dist-info/RECORD +36 -0
{genelastic-0.6.0.dist-info → genelastic-0.6.1.dist-info}/WHEEL +1 -1
genelastic-0.6.1.dist-info/entry_points.txt +6 -0
genelastic/common.py +0 -151
genelastic-0.6.0.dist-info/RECORD +0 -25
genelastic-0.6.0.dist-info/entry_points.txt +0 -6
/genelastic/{constants.py → import_data/constants.py} +0 -0
/genelastic/{logger.py → import_data/logger.py} +0 -0
/genelastic/{wet_process.py → import_data/wet_process.py} +0 -0
{genelastic-0.6.0.dist-info → genelastic-0.6.1.dist-info}/top_level.txt +0 -0

genelastic/{integrity.py → import_data/integrity.py} RENAMED Viewed

@@ -2,24 +2,16 @@
 import argparse
 import logging
 import typing
-import elasticsearch
-import urllib3
 from elasticsearch import NotFoundError
-from .common import (add_verbose_control_args, add_es_connection_args,
-                     connect_to_es, get_process_ids, Bucket, run_composite_aggregation)
-from .logger import configure_logging
+from genelastic.common import (ElasticQueryConn, DBIntegrityError, Bucket,
+                               add_verbose_control_args, add_es_connection_args)
+from .logger import configure_logging
 logger = logging.getLogger('genelastic')
 logging.getLogger('elastic_transport').setLevel(logging.WARNING)  # Disable excessive logging
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-class DBIntegrityError(Exception):
-    """Represents an integrity error,
-    raised when the database content does not match the expected data schema.
-    """
 def read_args() -> argparse.Namespace:
     """Read arguments from command line."""
@@ -32,49 +24,14 @@ def read_args() -> argparse.Namespace:
     return parser.parse_args()
-def ensure_unique(es: elasticsearch.Elasticsearch, index: str, field: str) -> None:
-    """
-    Ensure that all values of a field in an index are all unique.
-    :param es: Elasticsearch database instance.
-    :param index: Name of the index.
-    :param field: Field name to check for value uniqueness.
-    :raises DBIntegrityError: Some values of the given field are duplicated in the index.
-    """
-    logger.info("Ensuring that the field '%s' in the index '%s' only contains unique values...",
-                field, index)
-    query = {
-        "size": 0,
-        "aggs": {
-            "duplicate_proc_ids": {
-                "terms": {
-                    "field": f"{field}.keyword",
-                    "size": 10000,
-                    "min_doc_count": 2
-                }
-            }
-        }
-    }
-    buckets: typing.List[Bucket] = run_composite_aggregation(es, index, query)
-    duplicated_processes: typing.Set[str] = set(map(lambda bucket: str(bucket["key"]), buckets))
-    if len(duplicated_processes) > 0:
-        raise DBIntegrityError(f"Found non-unique value for field {field} in index '{index}': "
-                               f"{", ".join(duplicated_processes)}.")
-    logger.info("All values of field '%s' in index '%s' are unique.",
-                field, index)
-def check_for_undefined_file_indices(es: elasticsearch.Elasticsearch, analyses_index: str) -> None:
+def check_for_undefined_file_indices(es_query_conn: ElasticQueryConn, analyses_index: str) -> None:
     """
     Check for potentially undefined files indices in the analyses index.
-    :param es: Elasticsearch database instance.
+    :param es_query_conn: Elasticsearch database instance.
     :param analyses_index: Name of the index where analyses are stored.
-    :raises DBIntegrityError: Some files indices are used in the analyses index but
-        are undefined.
+    :raises genelastic.common.DBIntegrityError:
+        Some files indices are used in the analyses index but are undefined.
     """
     logger.info("Checking for references to undefined file indices in the index '%s'...",
                 analyses_index)
@@ -93,13 +50,13 @@ def check_for_undefined_file_indices(es: elasticsearch.Elasticsearch, analyses_i
         }
     }
-    buckets: typing.List[Bucket] = run_composite_aggregation(es, analyses_index, query)
+    buckets: typing.List[Bucket] = es_query_conn.run_composite_aggregation(analyses_index, query)
     for bucket in buckets:
         file_index = bucket['key']['file_index']
         try:
-            es.indices.get(index=file_index)
+            es_query_conn.client.indices.get(index=file_index)
             logger.debug("File index %s used in index '%s' is defined.",
                          file_index, analyses_index)
         except NotFoundError:
@@ -115,12 +72,12 @@ def check_for_undefined_file_indices(es: elasticsearch.Elasticsearch, analyses_i
     logger.info("All defined file indices are referenced.")
-def get_undefined_processes(es: elasticsearch.Elasticsearch, analyses_index: str,
+def get_undefined_processes(es_query_conn: ElasticQueryConn, analyses_index: str,
                             process_index: str, field: str) -> typing.Set[str]:
     """
     Return a set of undefined processes IDs in an index.
-    :param es: Elasticsearch database instance.
+    :param es_query_conn: Elasticsearch database instance.
     :param analyses_index: Name of the index where analyses are stored.
     :param process_index: Name of the index to check for undefined processes.
     :param field: Field name used to retrieve the process ID.
@@ -131,37 +88,39 @@ def get_undefined_processes(es: elasticsearch.Elasticsearch, analyses_index: str
         "aggs": {
             "get_analyses_processes": {
                 "composite": {
-                    "sources": { "process": {"terms": {"field": f"{field}.keyword"}}},
+                    "sources": {"process": {"terms": {"field": f"{field}.keyword"}}},
                     "size": 1000,
                 }
             }
         }
     }
-    buckets: typing.List[Bucket] = run_composite_aggregation(es, analyses_index, query)
+    buckets: typing.List[Bucket] = es_query_conn.run_composite_aggregation(analyses_index, query)
     used_processes = set(map(lambda bucket: bucket["key"]["process"], buckets))
     logger.debug("Used values for field '%s' in index '%s': %s",
                  field, analyses_index, used_processes)
-    defined_processes = get_process_ids(es, process_index, "proc_id")
+    defined_processes = es_query_conn.get_field_values(process_index, "proc_id")
     logger.debug("Defined values in index '%s': %s", process_index, defined_processes)
     return used_processes.difference(defined_processes)
-def check_for_undefined_wet_processes(es: elasticsearch.Elasticsearch,
+def check_for_undefined_wet_processes(es_query_conn: ElasticQueryConn,
                                       analyses_index: str, wet_process_index: str) -> None:
     """
     Check that each wet process used in the analyses index is defined.
-    :param es: Elasticsearch database instance.
+    :param es_query_conn: Elasticsearch database instance.
     :param analyses_index: Name of the index where analyses are stored.
     :param wet_process_index: Name of the index where wet processes are stored.
-    :raises DBIntegrityError: Some wet processes used in the analyses index are undefined.
+    :raises genelastic.common.DBIntegrityError:
+        Some wet processes used in the analyses index are undefined.
     """
     logger.info("Checking for undefined wet processes used in index '%s'...", analyses_index)
-    undefined_wet_processes = get_undefined_processes(es, analyses_index, wet_process_index,
+    undefined_wet_processes = get_undefined_processes(es_query_conn,
+                                                      analyses_index, wet_process_index,
                                                       "metadata.wet_process")
     if len(undefined_wet_processes) > 0:
@@ -171,18 +130,21 @@ def check_for_undefined_wet_processes(es: elasticsearch.Elasticsearch,
     logger.info("All wet processes used in index '%s' are defined.", wet_process_index)
-def check_for_undefined_bi_processes(es: elasticsearch.Elasticsearch,
+def check_for_undefined_bi_processes(es_query_conn: ElasticQueryConn,
                                      analyses_index: str, bi_process_index: str) -> None:
     """
     Check that each bio info process used in the analyses index is defined.
-    :param es: Elasticsearch database instance.
+    :param es_query_conn: Elasticsearch database instance.
     :param analyses_index: Name of the index where analyses are stored.
     :param bi_process_index: Name of the index where bio info processes are stored.
-    :raises DBIntegrityError: Some bio info processes used in the analyses index are undefined.
+    :raises genelastic.common.DBIntegrityError:
+        Some bio info processes used in the analyses index are undefined.
     """
-    logger.info("Checking for undefined bio info processes used in index '%s'...", analyses_index)
-    undefined_bi_processes = get_undefined_processes(es, analyses_index, bi_process_index,
+    logger.info("Checking for undefined bio info processes used in index '%s'...",
+                analyses_index)
+    undefined_bi_processes = get_undefined_processes(es_query_conn, analyses_index,
+                                                     bi_process_index,
                                                      "metadata.bi_process")
     if len(undefined_bi_processes) > 0:
@@ -193,18 +155,19 @@ def check_for_undefined_bi_processes(es: elasticsearch.Elasticsearch,
     logger.info("All bio info processes used in index '%s' are defined.", bi_process_index)
-def check_for_unused_file_indices(es: elasticsearch.Elasticsearch,
+def check_for_unused_file_indices(es_query_conn: ElasticQueryConn,
                                   analyses_index: str, index_prefix: str) -> int:
     """
     Check that each of the file indices are used in at least one analysis.
-    :param es: Elasticsearch database instance.
+    :param es_query_conn: Elasticsearch database instance.
     :param analyses_index: Name of the index where analyses are stored.
     :param index_prefix: Prefix given to all the indices of the ElasticSearch database.
     :returns: 1 if some file indices exists but are unused in the analyses index,
         and 0 otherwise.
     """
-    json_indices = es.cat.indices(index=f"{index_prefix}-file-*", format="json").body
+    json_indices = (es_query_conn.client.cat.
+                    indices(index=f"{index_prefix}-file-*", format="json").body)
     found_file_indices = set(map(lambda x: x["index"], json_indices))
     query = {
@@ -219,7 +182,7 @@ def check_for_unused_file_indices(es: elasticsearch.Elasticsearch,
         }
     }
-    buckets: typing.List[Bucket] = run_composite_aggregation(es, analyses_index, query)
+    buckets: typing.List[Bucket] = es_query_conn.run_composite_aggregation(analyses_index, query)
     used_files_indices = set(map(lambda bucket: bucket['key']['file_index'], buckets))
     unused_files_indices = found_file_indices.difference(used_files_indices)
@@ -233,12 +196,12 @@ def check_for_unused_file_indices(es: elasticsearch.Elasticsearch,
     return 0
-def check_for_unused_wet_processes(es: elasticsearch.Elasticsearch, analyses_index: str,
+def check_for_unused_wet_processes(es_query_conn: ElasticQueryConn, analyses_index: str,
                                    wet_proc_index: str) -> int:
     """
     Check for defined wet processes that are not used in the analyses index.
-    :param es: Elasticsearch database instance.
+    :param es_query_conn: Elasticsearch database instance.
     :param analyses_index: Name of the index where analyses are stored.
     :param wet_proc_index: Name of the index where wet processes are stored.
     :returns: 1 if some wet process are defined but unused in the analyses index,
@@ -246,10 +209,10 @@ def check_for_unused_wet_processes(es: elasticsearch.Elasticsearch, analyses_ind
     """
     logger.info("Checking for unused wet processes in the index '%s'...", wet_proc_index)
-    defined_wet_procs = get_process_ids(es, wet_proc_index, "proc_id")
+    defined_wet_procs = es_query_conn.get_field_values(wet_proc_index, "proc_id")
     logger.debug("Found the following defined wet processes: %s", defined_wet_procs)
-    used_wet_procs = get_process_ids(es, analyses_index, "metadata.wet_process")
+    used_wet_procs = es_query_conn.get_field_values(analyses_index, "metadata.wet_process")
     logger.debug("Following processes are used in the index '%s': %s",
                  analyses_index, used_wet_procs)
@@ -262,12 +225,12 @@ def check_for_unused_wet_processes(es: elasticsearch.Elasticsearch, analyses_ind
     return 0
-def check_for_unused_bi_processes(es: elasticsearch.Elasticsearch, analyses_index: str,
+def check_for_unused_bi_processes(es_query_conn: ElasticQueryConn, analyses_index: str,
                                   bi_proc_index: str) -> int:
     """
     Check for defined bio info processes that are not used in the analyses index.
-    :param es: Elasticsearch database instance.
+    :param es_query_conn: Elasticsearch database instance.
     :param analyses_index: Name of the index where analyses are stored.
     :param bi_proc_index: Name of the index where bio info processes are stored.
     :returns: 1 if some wet process are defined but unused in the analyses index,
@@ -275,10 +238,10 @@ def check_for_unused_bi_processes(es: elasticsearch.Elasticsearch, analyses_inde
     """
     logger.info("Checking for unused bio info processes in the index '%s'...", bi_proc_index)
-    defined_bi_procs = get_process_ids(es, bi_proc_index, "proc_id")
+    defined_bi_procs = es_query_conn.get_field_values(bi_proc_index, "proc_id")
     logger.debug("Found the following defined bio info processes: %s", defined_bi_procs)
-    used_bi_procs = get_process_ids(es, analyses_index, "metadata.bi_process")
+    used_bi_procs = es_query_conn.get_field_values(analyses_index, "metadata.bi_process")
     logger.debug("Following processes are used in the index '%s': %s",
                  analyses_index, used_bi_procs)
@@ -302,22 +265,25 @@ def main() -> None:
     wet_processes_index = f"{args.es_index_prefix}-wet_processes"
     bi_processes_index = f"{args.es_index_prefix}-bi_processes"
-    es = connect_to_es(host=args.es_host, port=args.es_port, usr=args.es_usr, pwd=args.es_pwd)
+    addr = f"https://{args.es_host}:{args.es_port}"
+    logger.info("Trying to connect to Elasticsearch at %s...", addr)
+    es_query_conn = ElasticQueryConn(addr, args.es_cert_fp,
+                                     basic_auth=(args.es_usr, args.es_pwd))
     # Fatal errors
     try:
-        ensure_unique(es, wet_processes_index, "proc_id")
-        ensure_unique(es, bi_processes_index, "proc_id")
-        check_for_undefined_file_indices(es, analyses_index)
-        check_for_undefined_wet_processes(es, analyses_index, wet_processes_index)
-        check_for_undefined_bi_processes(es, analyses_index, bi_processes_index)
+        es_query_conn.ensure_unique(wet_processes_index, "proc_id")
+        es_query_conn.ensure_unique(bi_processes_index, "proc_id")
+        check_for_undefined_file_indices(es_query_conn, analyses_index)
+        check_for_undefined_wet_processes(es_query_conn, analyses_index, wet_processes_index)
+        check_for_undefined_bi_processes(es_query_conn, analyses_index, bi_processes_index)
     except DBIntegrityError as e:
         raise SystemExit(e) from e
     # Warnings
-    check_for_unused_wet_processes(es, analyses_index, wet_processes_index)
-    check_for_unused_bi_processes(es, analyses_index, bi_processes_index)
-    check_for_unused_file_indices(es, analyses_index, args.es_index_prefix)
+    check_for_unused_wet_processes(es_query_conn, analyses_index, wet_processes_index)
+    check_for_unused_bi_processes(es_query_conn, analyses_index, bi_processes_index)
+    check_for_unused_file_indices(es_query_conn, analyses_index, args.es_index_prefix)
 if __name__ == '__main__':

genelastic/{tags.py → import_data/tags.py} RENAMED Viewed

@@ -3,7 +3,8 @@ import logging
 import re
 import typing
-from .common import BundleDict
+from genelastic.common import BundleDict
 from .constants import DEFAULT_TAG2FIELD, DEFAULT_TAG_SUFFIX, DEFAULT_TAG_PREFIX
 logger = logging.getLogger('genelastic')

genelastic/{validate_data.py → import_data/validate_data.py} RENAMED Viewed

@@ -2,11 +2,12 @@
 import argparse
 import logging
-from schema import SchemaError  # type: ignore[import-untyped]
+from schema import SchemaError  # type: ignore
+from genelastic.common import add_verbose_control_args
-from . import make_import_bundle_from_files
-from .common import add_verbose_control_args
 from .logger import configure_logging
+from .import_bundle_factory import make_import_bundle_from_files
 logger = logging.getLogger('genelastic')
@@ -15,7 +16,8 @@ def read_args() -> argparse.Namespace:
     """Read arguments from command line."""
     parser = argparse.ArgumentParser(description="Ensure that YAML files "
                                                  "follow the genelastic YAML bundle schema.",
-                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+                                     allow_abbrev=False)
     add_verbose_control_args(parser)
     parser.add_argument('files', type=str, nargs="+", default=None,
                         help="YAML files to validate.")

genelastic/{wet_processes.py → import_data/wet_processes.py} RENAMED Viewed

@@ -2,7 +2,8 @@
 import logging
 import typing
-from .common import BundleDict
+from genelastic.common import BundleDict
 from .wet_process import WetProcess
 logger = logging.getLogger('genelastic')

{genelastic-0.6.0.dist-info → genelastic-0.6.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: genelastic
-Version: 0.6.0
+Version: 0.6.1
 Summary: Generate and store genetic data into an Elasticsearch database.
 Author: CNRGH
 Author-email: Pierrick ROGER <pierrick.roger@cnrgh.fr>, Maxime BLANCHON <maxime.blanchon@cnrgh.fr>
@@ -18,6 +18,11 @@ Requires-Dist: schema
 Requires-Dist: PyYAML
 Requires-Dist: biophony >=1.0.1
 Requires-Dist: colorlog
+Provides-Extra: api
+Requires-Dist: flask ; extra == 'api'
+Requires-Dist: elasticsearch ; extra == 'api'
+Requires-Dist: environs ; extra == 'api'
+Requires-Dist: connexion[flask,swagger-ui,uvicorn] ; extra == 'api'
 Provides-Extra: docs
 Requires-Dist: sphinx ; extra == 'docs'
 Requires-Dist: sphinx-autoapi ; extra == 'docs'
@@ -25,7 +30,7 @@ Requires-Dist: furo ; extra == 'docs'
 Provides-Extra: tests
 Requires-Dist: pytest ; extra == 'tests'
 Requires-Dist: mypy ; extra == 'tests'
-Requires-Dist: pylint <3.3,>=3.2 ; extra == 'tests'
+Requires-Dist: pylint ; extra == 'tests'
 Requires-Dist: bandit ; extra == 'tests'
 Requires-Dist: coverage ; extra == 'tests'
 Requires-Dist: yamllint ; extra == 'tests'

genelastic-0.6.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,36 @@
+genelastic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+genelastic/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+genelastic/api/routes.py,sha256=FicBE_HStV6u8-Q9k6ABNLJNBwRFPsSTjAoTc0JnocU,2882
+genelastic/api/server.py,sha256=oJREb8LfPM9O3vd8grTnZhQptYcIYXY-qFlFH1Z7G-8,2271
+genelastic/api/settings.py,sha256=A6idvtaaT5Q-v78S8EKiE1LjYdLOvaXyxx7KrREq_9c,479
+genelastic/api/extends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+genelastic/api/extends/example.py,sha256=mVOG6HzkxZ2uzAHIlG6OqCJstED6Ie421O6SIBFmU2w,207
+genelastic/common/__init__.py,sha256=cK_dmQbI4pf7GQqTwdqbUqUMQcJuF9tBGpE0JR1EUp0,747
+genelastic/common/cli.py,sha256=t2Lk8I1ZyF5LlLnZu03JT4Z58_Yl5l2UMdKIyDuhqg8,1738
+genelastic/common/elastic.py,sha256=uDnc03jqvflBeUiAkevJq_oZnsKDxXOul6x0pF4d_wg,6956
+genelastic/common/exceptions.py,sha256=YSXqF2f29x9rKZYRT-5wko0ySGgggvNBnUV-8n2hoc4,203
+genelastic/common/types.py,sha256=RBHZwW3wNYIM4KG9APWUuvXp1oztjlMFpuFhzoi26UI,1061
+genelastic/import_data/__init__.py,sha256=uczwevd0ikG6GsA9Lkjei19TPCk0hny6iacKFje1w7w,413
+genelastic/import_data/analyses.py,sha256=jS9dRJveWiE06eRQT0tcra_UWwTVfjK1lDliWnv9nNA,1974
+genelastic/import_data/analysis.py,sha256=qHPi7iAiMxhy9Ljjv1qndmzUX9G3yFfcH4Mu8LX_ujQ,7918
+genelastic/import_data/bi_process.py,sha256=WZ4cqLnD0wyzCQdTpb6Zj11BN9B7ytaX7MBF2CFqlhg,708
+genelastic/import_data/bi_processes.py,sha256=scZgak_Ihp5UYgYBQcdPxVt7bnh7cESt-PJ4xx_pnWw,1416
+genelastic/import_data/constants.py,sha256=Y-3i8VlMFTcS158tsbBjvjsCvnWXVXE-Y3A2QOgcoOE,935
+genelastic/import_data/data_file.py,sha256=e7iEqltECVDTMxBc7JcUHOfv434_thFQlcLlYIEiD_I,2721
+genelastic/import_data/filename_pattern.py,sha256=w4sX9lCcTLcA2zgXE6lMQOHQSMEi5FgW_nVUzlmjpvE,1991
+genelastic/import_data/gen_data.py,sha256=tVms8CsKvxtxXlS1o6jEKpy1AJi1waI5MerZgQQitrc,6979
+genelastic/import_data/import_bundle.py,sha256=FMfw-ZwywWEXkRwaRdsj_E1VmoXiEbPrx5Wf8MUpx1Y,4876
+genelastic/import_data/import_bundle_factory.py,sha256=otaWF8NqimfAf9-1fenDAeU63e_6oR6-Ugdj0JsBt8w,9092
+genelastic/import_data/import_data.py,sha256=86YDW06XcqGCfKuizolDHwGnOjeN_i6x7NnfU6lAENQ,11788
+genelastic/import_data/info.py,sha256=naUAqMqIwo6L36KsLVGcE-d1G35PDvkaV-qrIUcSBQ0,7328
+genelastic/import_data/integrity.py,sha256=7mN-py67k2wVWOxnYCv0orabeL6TZ3O8wn3yk1Rw3vA,12207
+genelastic/import_data/logger.py,sha256=eV_LACPjkIg3G_D5g0oTcIRZL86E_iQ2UM_t0CwEkUI,1835
+genelastic/import_data/tags.py,sha256=815hsW-cpqX09vG3a4W9uWhRCMNMtpedJMrHQxJw6zg,3924
+genelastic/import_data/validate_data.py,sha256=u8-FNcofP0crx_jKdM8NRjfm8WK7_WwkBX_y0pM1TBc,1604
+genelastic/import_data/wet_process.py,sha256=uhsZrpDHUiP6-Y6f6_3xcsvqDl0ew_-9aY8vFr3kB3A,693
+genelastic/import_data/wet_processes.py,sha256=rWHX3RY4_mQd5JXHrzPCno6-uKVx8MmYxAQl_n9xftM,1366
+genelastic-0.6.1.dist-info/METADATA,sha256=uocOr4DpI4aJvimcMdBTt5DBZpBP4o14J9S_vHWMVZw,1537
+genelastic-0.6.1.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
+genelastic-0.6.1.dist-info/entry_points.txt,sha256=tPM55ca4ft8XNNFqRFJFtoQ0gTYmFi4Yww4R4qiVbjw,264
+genelastic-0.6.1.dist-info/top_level.txt,sha256=ra4gCsuKH1d0sXygcnwD_u597ir6bYYxWTS7dkA6vdM,11
+genelastic-0.6.1.dist-info/RECORD,,

{genelastic-0.6.0.dist-info → genelastic-0.6.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.1.0)
+Generator: setuptools (75.4.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

genelastic-0.6.1.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,6 @@
+[console_scripts]
+db_info = genelastic.import_data.info:main
+db_integrity = genelastic.import_data.integrity:main
+gen-data = genelastic.import_data.gen_data:main
+import = genelastic.import_data.import_data:main
+validate = genelastic.import_data.validate_data:main

genelastic/common.py DELETED Viewed

@@ -1,151 +0,0 @@
-"""
-Module: common
-This module contains custom types and functions shared by multiple genelastic scripts.
-"""
-import argparse
-import sys
-import typing
-import logging
-import elastic_transport
-import elasticsearch
-logger = logging.getLogger('genelastic')
-AnalysisMetaData: typing.TypeAlias = typing.Dict[str, str | int]
-WetProcessesData: typing.TypeAlias = typing.Dict[str, str | int | float]
-BioInfoProcessData: typing.TypeAlias = typing.Dict[str, str | typing.List[str]]
-BundleDict: typing.TypeAlias = typing.Dict[str, typing.Any]
-AnalysisDocument: typing.TypeAlias = typing.Dict[str, str | None | AnalysisMetaData]
-MetadataDocument: typing.TypeAlias = typing.Dict[str, int | str | typing.List[typing.Any | None]]
-ProcessDocument: typing.TypeAlias = (typing.Dict[str, str] |
-                                     WetProcessesData |
-                                     BioInfoProcessData)
-BulkItems: typing.TypeAlias = typing.List[typing.Dict[str, str |
-                                                           MetadataDocument |
-                                                           AnalysisDocument |
-                                                           ProcessDocument]]
-Bucket: typing.TypeAlias = typing.Dict[str, typing.Dict[typing.Any, typing.Any]]
-def connect_to_es(host: str, port: int, usr: str, pwd: str) -> elasticsearch.Elasticsearch:
-    """Connect to a remote Elasticsearch database."""
-    addr = f"https://{host}:{port}"
-    logger.info("Trying to connect to Elasticsearch at %s.", addr)
-    try:
-        es = elasticsearch.Elasticsearch(
-            addr,
-            # ssl_assert_fingerprint=args.es_cert_fp,
-            # ca_certs=args.es_cert,
-            verify_certs=False,
-            basic_auth=(usr, pwd)
-        )
-        logger.info(es.info())
-    except elastic_transport.TransportError as e:
-        logger.error(e.message)
-        sys.exit(1)
-    return es
-def run_composite_aggregation(es: elasticsearch.Elasticsearch,
-                              index: str, query: typing.Dict[str, typing.Any]) \
-        -> typing.List[Bucket]:
-    """
-    Executes a composite aggregation on an Elasticsearch index and returns all paginated results.
-    :param es: Elasticsearch client instance.
-    :param index: Name of the index to query.
-    :param query: Aggregation query to run.
-    :return: List of aggregation results.
-    """
-    # Extract the aggregation name from the query dict.
-    agg_name = next(iter(query["aggs"]))
-    all_buckets: typing.List[Bucket] = []
-    try:
-        logger.debug("Running composite aggregation query %s on index '%s'.", query, index)
-        response = es.search(index=index, body=query)
-    except elasticsearch.NotFoundError as e:
-        raise SystemExit(f"Error: {e.message} for index '{index}'.") from e
-    while True:
-        # Extract buckets from the response.
-        buckets: typing.List[Bucket] = response['aggregations'][agg_name]['buckets']
-        all_buckets.extend(buckets)
-        # Check if there are more results to fetch.
-        if 'after_key' in response['aggregations'][agg_name]:
-            after_key = response['aggregations'][agg_name]['after_key']
-            query['aggs'][agg_name]['composite']['after'] = after_key
-            try:
-                logger.debug("Running query %s on index '%s'.", query, index)
-                response = es.search(index=index, body=query)  # Fetch the next page of results.
-            except elasticsearch.NotFoundError as e:
-                raise SystemExit(f"Error: {e.message} for index '{index}'.") from e
-        else:
-            break
-    return all_buckets
-def get_process_ids(es: elasticsearch.Elasticsearch, index: str, proc_field_name: str) \
-        -> typing.Set[str]:
-    """Return a set of process IDs."""
-    process_ids = set()
-    query = {
-        "size": 0,
-        "aggs": {
-            "get_proc_ids": {
-                "composite": {
-                    "sources": {"proc_id": {"terms": {"field": f"{proc_field_name}.keyword"}}},
-                    "size": 1000,
-                }
-            }
-        }
-    }
-    buckets: typing.List[Bucket] = run_composite_aggregation(es, index, query)
-    for bucket in buckets:
-        process_ids.add(bucket['key']['proc_id'])
-    return process_ids
-def add_verbose_control_args(parser: argparse.ArgumentParser) -> None:
-    """
-    Add verbose control arguments to the parser.
-    Arguments are added to the parser by using its reference.
-    """
-    parser.add_argument('-q', '--quiet', dest='verbose', action='store_const',
-                        const=0, default=1,
-                        help='Set verbosity to 0 (quiet mode).')
-    parser.add_argument('-v', '--verbose', dest='verbose', action='count',
-                        default=1,
-                        help=('Verbose level. -v for information, -vv for debug,' +
-                              ' -vvv for trace.'))
-def add_es_connection_args(parser: argparse.ArgumentParser) -> None:
-    """
-    Add arguments to the parser needed to gather ElasticSearch server connection parameters.
-    Arguments are added to the parser by using its reference.
-    """
-    parser.add_argument('--es-host', dest='es_host', default='localhost',
-                        help='Address of Elasticsearch host.')
-    parser.add_argument('--es-port', type=int, default=9200, dest='es_port',
-                        help='Elasticsearch port.')
-    parser.add_argument('--es-usr', dest='es_usr', default='elastic',
-                        help='Elasticsearch user.')
-    parser.add_argument('--es-pwd', dest='es_pwd', required=True,
-                        help='Elasticsearch password.')
-    parser.add_argument('--es-cert', dest='es_cert',
-                        help='Elasticsearch certificate file.')
-    parser.add_argument('--es-cert-fp', dest='es_cert_fp',
-                        help='Elasticsearch certificate fingerprint.')
-    parser.add_argument('--es-index-prefix', dest='es_index_prefix',
-                        help='Add the given prefix to each index created during import.')

genelastic-0.6.0.dist-info/RECORD DELETED Viewed

@@ -1,25 +0,0 @@
-genelastic/__init__.py,sha256=lMTq5VsAuRjNlf3eAEqGE-Yvht63IJ0nIf9z_1hwC00,486
-genelastic/analyses.py,sha256=UTvNIhZpK2zF77zg02ftyAdUNpWhTwQJeqb2scU2b_Y,1961
-genelastic/analysis.py,sha256=N8oo8uXoFbdLb7C1_67rTFEzV962G_CIqlaEE9IPjiM,7876
-genelastic/bi_process.py,sha256=CT4AFFv-pyJceKnYCHKS7SKGhVuSxOJUw5CXSbED15s,698
-genelastic/bi_processes.py,sha256=ciGQyoR4Tuxhoq4FPK6b0O92AzGLgijVGqS19QeMg6I,1405
-genelastic/common.py,sha256=22SDJJmED2bQygO7GjXFfnB-KH0UujoH867bpz2OAQ4,6065
-genelastic/constants.py,sha256=Y-3i8VlMFTcS158tsbBjvjsCvnWXVXE-Y3A2QOgcoOE,935
-genelastic/data_file.py,sha256=QzOOThuCRlWg_iiH3-6FnYZaVgDVfJI0YxZ0Eoz30kc,2709
-genelastic/filename_pattern.py,sha256=IDQ9ffXxISJ6VMineu-qxnxZgjyejhVVesWIyUhbriE,1980
-genelastic/gen_data.py,sha256=s8-wTh7O7tyuszcIQC4dP1_kVyWLFMhtQMhQLL2JlD8,6922
-genelastic/import_bundle.py,sha256=ZqiKi5BYBo4by2FWBsS5qGyDRn7xxLtSb3ks1SqySNc,4865
-genelastic/import_bundle_factory.py,sha256=nK-VlJATgCNnJSTQotOva89j9H5pdJqU58u1QtlqJkA,9080
-genelastic/import_data.py,sha256=SENK1_Khw88Jgs8EXvDwk9jhQidiETxmAVhn9ag6jNs,11489
-genelastic/info.py,sha256=3fk1fPrpfK8oRo1WnABNDSGdEpq1G6wvCW_D8meyHss,7789
-genelastic/integrity.py,sha256=ypXl9kAdnsxa7LgZ9nDgsklBqVlG9I61A5hqfGeGYgs,13090
-genelastic/logger.py,sha256=eV_LACPjkIg3G_D5g0oTcIRZL86E_iQ2UM_t0CwEkUI,1835
-genelastic/tags.py,sha256=xHCLWgnXcLUUKN3zthQXoJ7yjEhPoQi7JLvdMtB6T5c,3913
-genelastic/validate_data.py,sha256=V0f7fFTs5FkVU8NoBfDI7mQDwITzW_QXt3bj5OgsdzQ,1531
-genelastic/wet_process.py,sha256=uhsZrpDHUiP6-Y6f6_3xcsvqDl0ew_-9aY8vFr3kB3A,693
-genelastic/wet_processes.py,sha256=PtV0HFs6rGan_-3-BiXeab-VBX1JQGucktoXE4GuaAk,1355
-genelastic-0.6.0.dist-info/METADATA,sha256=Ad8wOo_mTY3l7RVy9WNdMAzVnWhTxEb2uacXue1CdUU,1335
-genelastic-0.6.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-genelastic-0.6.0.dist-info/entry_points.txt,sha256=ZYi1_Rmjl-9XRywzPdV-U7TxA7Z6yyLVt-W13fZtxsQ,204
-genelastic-0.6.0.dist-info/top_level.txt,sha256=ra4gCsuKH1d0sXygcnwD_u597ir6bYYxWTS7dkA6vdM,11
-genelastic-0.6.0.dist-info/RECORD,,

genelastic-0.6.0.dist-info/entry_points.txt DELETED Viewed

@@ -1,6 +0,0 @@
-[console_scripts]
-db_info = genelastic.info:main
-db_integrity = genelastic.integrity:main
-gen-data = genelastic.gen_data:main
-import = genelastic.import_data:main
-validate = genelastic.validate_data:main

/genelastic/{constants.py → import_data/constants.py} RENAMED Viewed

File without changes

/genelastic/{logger.py → import_data/logger.py} RENAMED Viewed

File without changes

/genelastic/{wet_process.py → import_data/wet_process.py} RENAMED Viewed

File without changes

{genelastic-0.6.0.dist-info → genelastic-0.6.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

genelastic 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

genelastic 0.6.0py3-none-any.whl → 0.6.1py3-none-any.whl