PyPI - gammasimtools - Versions diffs - 0.12.0__py3-none-any.whl → 0.13.0__py3-none-any.whl - Mend

gammasimtools 0.12.0py3-none-any.whl → 0.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

simtools/applications/production_extract_mc_event_data.py ADDED Viewed

@@ -0,0 +1,125 @@
+#!/usr/bin/python3
+"""
+Reduces and compiles event data from multiple input files into a structured dataset with event info.
+Command line arguments
+----------------------
+prefix (str, required)
+    Path prefix for the input files.
+output_file (str, required)
+    Path to save the output file.
+max_files (int, optional, default=100)
+    Maximum number of files to process.
+print_dataset_information (flag)
+    Print information about the datasets in the generated reduced event dataset.
+Example
+-------
+Generate a reduced dataset from input files and save the result.
+.. code-block:: console
+    simtools-production-extract-mc-event-data \
+        --prefix path/to/input_files/ \
+        --wildcard 'gamma_*dark*.simtel.zst' \
+        --output_file output_file.hdf5 \
+        --max_files 50 \
+        --print_dataset_information
+"""
+import logging
+from pathlib import Path
+import simtools.utils.general as gen
+from simtools.configuration import configurator
+from simtools.io_operations import io_handler
+from simtools.production_configuration.extract_mc_event_data import MCEventExtractor
+def _parse(label, description):
+    """
+    Parse command line arguments.
+    Returns
+    -------
+    dict
+        Parsed command-line arguments.
+    """
+    config = configurator.Configurator(label=label, description=description)
+    config.parser.add_argument(
+        "--prefix", type=str, required=True, help="Prefix path for input files."
+    )
+    config.parser.add_argument(
+        "--wildcard",
+        type=str,
+        required=True,
+        help="Wildcard for querying the files in the directory (e.g., 'gamma_*dark*.simtel.zst')",
+    )
+    config.parser.add_argument("--output_file", type=str, required=True, help="Output filename.")
+    config.parser.add_argument(
+        "--max_files", type=int, default=100, help="Maximum number of files to process."
+    )
+    config.parser.add_argument(
+        "--print_dataset_information",
+        action="store_true",
+        help="Print information about the datasets in the generated reduced event dataset.",
+    )
+    return config.initialize(db_config=False)
+def main():
+    """
+    Process event data files and store data in reduced dataset.
+    The reduced dataset contains the following information:
+        - simulated: List of simulated events.
+        - shower_id_triggered: List of triggered shower IDs
+            (as in the telescope definition file used for simulations).
+        - triggered_energies: List of energies for triggered events.
+        - num_triggered_telescopes: Number of triggered telescopes for each event.
+        - core_x: X-coordinate of the shower core (ground coordinates).
+        - core_y: Y-coordinate of the shower core (ground coordinates).
+        - trigger_telescope_list_list: List of lists containing triggered telescope IDs.
+        - file_names: List of input file names.
+        - shower_sim_azimuth: Simulated azimuth angle of the shower.
+        - shower_sim_altitude: Simulated altitude angle of the shower.
+        - array_altitudes: List of altitudes for the array.
+        - array_azimuths: List of azimuths for the array.
+    """
+    label = Path(__file__).stem
+    args_dict, _ = _parse(
+        label=label,
+        description=(
+            "Process files and store reduced dataset with event information, "
+            "array information and triggered telescopes."
+        ),
+    )
+    _logger = logging.getLogger()
+    _logger.setLevel(gen.get_log_level_from_user(args_dict["log_level"]))
+    _logger.info(f"Loading input files with prefix: {args_dict['prefix']}")
+    input_path = Path(args_dict["prefix"])
+    files = list(input_path.glob(args_dict["wildcard"]))
+    if not files:
+        _logger.warning("No matching input files found.")
+        return
+    output_path = io_handler.IOHandler().get_output_directory(label)
+    output_filepath = Path(output_path).joinpath(f"{args_dict['output_file']}")
+    output_filepath.parent.mkdir(parents=True, exist_ok=True)
+    generator = MCEventExtractor(files, output_filepath, args_dict["max_files"])
+    generator.process_files()
+    _logger.info(f"reduced dataset saved to: {output_filepath}")
+    if args_dict["print_dataset_information"]:
+        generator.print_dataset_information()
+if __name__ == "__main__":
+    main()

simtools/applications/run_application.py CHANGED Viewed

@@ -49,7 +49,7 @@ def _parse(label, description, usage):
         required=True,
         default=None,
     )
-    return config.initialize(db_config=False)
+    return config.initialize(db_config=True)
 def run_application(application, configuration, logger):
@@ -111,13 +111,12 @@ def read_application_configuration(configuration_file, logger):
     application_config = gen.collect_data_from_file(configuration_file).get("CTA_SIMPIPE")
     place_holder = "__SETTING_WORKFLOW__"
     workflow_dir, setting_workflow = get_subdirectory_name(configuration_file)
-    output_path = str(workflow_dir).replace("input", "output") + setting_workflow
-    logger.info(f"Setting workflow output path to {output_path}")
-    log_file = (
-        Path(application_config.get("LOG_PATH", "./").replace(place_holder, setting_workflow))
-        / "simtools.log"
+    output_path = (
+        workflow_dir.with_name(workflow_dir.name.replace("input", "output")) / setting_workflow
     )
-    log_file.parent.mkdir(parents=True, exist_ok=True)
+    output_path.mkdir(parents=True, exist_ok=True)
+    logger.info(f"Setting workflow output path to {output_path}")
+    log_file = output_path / "simtools.log"
     configurations = application_config.get("APPLICATIONS")
     for config in configurations:
         for key, value in config.get("CONFIGURATION", {}).items():
@@ -128,13 +127,13 @@ def read_application_configuration(configuration_file, logger):
                     item.replace(place_holder, setting_workflow) for item in value
                 ]
         config["CONFIGURATION"]["USE_PLAIN_OUTPUT_PATH"] = True
-        config["OUTPUT_PATH"] = output_path
+        config["CONFIGURATION"]["OUTPUT_PATH"] = str(output_path)
     return configurations, log_file
 def main():  # noqa: D103
-    args_dict, _ = _parse(
+    args_dict, db_config = _parse(
         Path(__file__).stem,
         description="Run simtools applications from configuration file.",
         usage="simtools-run-application --config_file config_file_name",
@@ -148,7 +147,7 @@ def main():  # noqa: D103
     with log_file.open("w", encoding="utf-8") as file:
         file.write("Running simtools applications\n")
-        file.write(dependencies.get_version_string())
+        file.write(dependencies.get_version_string(db_config))
         for config in configurations:
             logger.info(f"Running application: {config.get('APPLICATION')}")
             config = gen.change_dict_keys_case(config, False)

simtools/applications/submit_data_from_external.py CHANGED Viewed

@@ -112,7 +112,7 @@ def main():  # noqa: D103
     writer.ModelDataWriter.dump(
         args_dict=args_dict,
-        metadata=_metadata.get_top_level_metadata() if _metadata else None,
+        metadata=_metadata,
         product_data=data_validator.validate_and_transform(),
     )

simtools/applications/submit_model_parameter_from_external.py CHANGED Viewed

@@ -85,8 +85,9 @@ def _parse(label, description):
     )
     config.parser.add_argument(
         "--input_meta",
-        help="meta data file associated to input data",
+        help="meta data file(s) associated to input data (wildcards or list of files allowed)",
         type=str,
+        nargs="+",
         required=False,
     )
     config.parser.add_argument(

simtools/camera/single_photon_electron_spectrum.py CHANGED Viewed

@@ -10,7 +10,7 @@ from pathlib import Path
 from astropy.table import Table
 import simtools.data_model.model_data_writer as writer
-from simtools.constants import SCHEMA_PATH
+from simtools.constants import MODEL_PARAMETER_SCHEMA_URL, SCHEMA_PATH
 from simtools.data_model import validate_data
 from simtools.data_model.metadata_collector import MetadataCollector
 from simtools.io_operations import io_handler
@@ -44,6 +44,10 @@ class SinglePhotonElectronSpectrum:
         )
         self.io_handler = io_handler.IOHandler()
         self.data = ""  # Single photon electron spectrum data (as string)
+        self.args_dict["metadata_product_data_name"] = "single_pe_spectrum"
+        self.args_dict["metadata_product_data_url"] = (
+            MODEL_PARAMETER_SCHEMA_URL + "/pm_photoelectron_spectrum.schema.yml"
+        )
         self.metadata = MetadataCollector(args_dict=self.args_dict)
     def derive_single_pe_spectrum(self):
@@ -83,7 +87,7 @@ class SinglePhotonElectronSpectrum:
         writer.ModelDataWriter.dump(
             args_dict=self.args_dict,
-            metadata=self.metadata.top_level_meta,
+            metadata=self.metadata,
             product_data=table,
             validate_schema_file=None,
         )

simtools/constants.py CHANGED Viewed

@@ -6,6 +6,8 @@ from importlib.resources import files
 SCHEMA_PATH = files("simtools") / "schemas"
 # Path to metadata jsonschema
 METADATA_JSON_SCHEMA = SCHEMA_PATH / "metadata.metaschema.yml"
+# Path to plotting configuration json schema
+PLOT_CONFIG_SCHEMA = SCHEMA_PATH / "plot_configuration.metaschema.yml"
 # Path to model parameter metaschema
 MODEL_PARAMETER_METASCHEMA = SCHEMA_PATH / "model_parameter.metaschema.yml"
 # Path to model parameter description metaschema
@@ -14,3 +16,8 @@ MODEL_PARAMETER_DESCRIPTION_METASCHEMA = (
 )
 # Path to model parameter schema files
 MODEL_PARAMETER_SCHEMA_PATH = SCHEMA_PATH / "model_parameters"
+# URL to model parameter schema files
+MODEL_PARAMETER_SCHEMA_URL = (
+    "https://raw.githubusercontent.com/gammasim/simtools/main/src/simtools/schemas/"
+    "/model_parameters"
+)

simtools/data_model/metadata_collector.py CHANGED Viewed

@@ -11,6 +11,8 @@ import logging
 import uuid
 from pathlib import Path
+import yaml
 import simtools.constants
 import simtools.utils.general as gen
 import simtools.version
@@ -68,9 +70,7 @@ class MetadataCollector:
         self.top_level_meta = gen.change_dict_keys_case(
             data_dict=metadata_model.get_default_metadata_dict(), lower_case=True
         )
-        self.input_metadata = self._read_input_metadata_from_file(
-            metadata_file_name=metadata_file_name
-        )
+        self.input_metadata = self._read_input_metadata_from_file(metadata_file_name)
         self.collect_meta_data()
         if clean_meta:
             self.top_level_meta = self.clean_meta_data(self.top_level_meta)
@@ -103,6 +103,74 @@ class MetadataCollector:
             pass
         return self.top_level_meta
+    @staticmethod
+    def dump(args_dict, output_file, add_activity_name=False):
+        """
+        Write metadata to file (static method).
+        Parameters
+        ----------
+        args_dict: dict
+            Command line parameters
+        output_file: str or Path
+            Name of output file.
+        add_activity_name: bool
+            Add activity name to file name.
+        """
+        collector = MetadataCollector(args_dict)
+        collector.write(output_file, add_activity_name=add_activity_name)
+    def write(self, yml_file=None, keys_lower_case=False, add_activity_name=False):
+        """
+        Write toplevel metadata to file (yaml file format).
+        Parameters
+        ----------
+        metadata: dict
+            Metadata to be stored
+        yml_file: str
+            Name of output file.
+        keys_lower_case: bool
+            Write yaml keys in lower case.
+        add_activity_name: bool
+            Add activity name to file name.
+        Returns
+        -------
+        str
+            Name of output file
+        Raises
+        ------
+        FileNotFoundError
+            If yml_file not found.
+        TypeError
+            If yml_file is not defined.
+        """
+        metadata = self.get_top_level_metadata()
+        activity_name = metadata.get("cta", {}).get("activity", {}).get("name", "").rstrip(".")
+        suffix = f".{activity_name}.meta.yml" if add_activity_name else ".meta.yml"
+        if yml_file is None:
+            raise TypeError("No output file for metadata defined")
+        try:
+            yml_file = names.file_name_with_version(yml_file, suffix)
+            with open(yml_file, "w", encoding="UTF-8") as file:
+                yaml.safe_dump(
+                    gen.change_dict_keys_case(
+                        gen.remove_substring_recursively_from_dict(metadata, substring="\n"),
+                        keys_lower_case,
+                    ),
+                    file,
+                    sort_keys=False,
+                    explicit_start=True,
+                )
+            self._logger.info(f"Writing metadata to {yml_file}")
+            return yml_file
+        except FileNotFoundError as exc:
+            raise FileNotFoundError(f"Error writing metadata to {yml_file}") from exc
     def get_data_model_schema_file_name(self):
         """
         Return data model schema file name.
@@ -136,12 +204,12 @@ class MetadataCollector:
             self._logger.debug(f"Schema file from data model name: {self.data_model_name}")
             return str(schema.get_model_parameter_schema_file(self.data_model_name))
-        # from input metadata
+        # from first entry in input metadata (least preferred)
         try:
-            url = self.input_metadata[self.observatory]["product"]["data"]["model"]["url"]
+            url = self.input_metadata[0][self.observatory]["product"]["data"]["model"]["url"]
             self._logger.debug(f"Schema file from input metadata: {url}")
             return url
-        except KeyError:
+        except (KeyError, TypeError):
             pass
         self._logger.warning("No schema file found.")
@@ -170,7 +238,7 @@ class MetadataCollector:
         Parameters
         ----------
         from_input_meta: bool
-            Get site from input metadata (default: False)
+            Get site from first entry of input metadata (default: False)
         Returns
         -------
@@ -182,11 +250,11 @@ class MetadataCollector:
             _site = (
                 self.top_level_meta[self.observatory]["instrument"]["site"]
                 if not from_input_meta
-                else self.input_metadata[self.observatory]["instrument"]["site"]
+                else self.input_metadata[0][self.observatory]["instrument"]["site"]
             )
             if _site is not None:
                 return names.validate_site_name(_site)
-        except KeyError:
+        except (KeyError, TypeError):
             pass
         return None
@@ -202,7 +270,13 @@ class MetadataCollector:
         contact_dict["name"] = contact_dict.get("name") or self.args_dict.get("user_name")
         if contact_dict["name"] is None:
             self._logger.warning("No user name provided, take user info from system level.")
-            contact_dict["name"] = getpass.getuser()
+            try:
+                contact_dict["name"] = getpass.getuser()
+            except Exception as exc:  # pylint: disable=broad-except
+                contact_dict["name"] = "UNKNOWN_USER"
+                self._logger.warning(
+                    f"Failed to get user name: {exc}, setting it to {contact_dict['name']} "
+                )
         meta_dict = {
             "email": "user_mail",
             "orcid": "user_orcid",
@@ -221,17 +295,28 @@ class MetadataCollector:
             Dictionary for context metadata fields.
         """
-        try:  # wide try..except as for some cases we expect that there is no product metadata
-            reduced_product_meta = {
-                key: value
-                for key, value in self.input_metadata[self.observatory]["product"].items()
-                if key in {"description", "id", "creation_time", "valid", "format", "filename"}
-            }
-            self._fill_context_sim_list(context_dict["associated_data"], reduced_product_meta)
-        except (KeyError, TypeError):
-            self._logger.debug("No input product metadata appended to associated data.")
+        input_metadata = (
+            self.input_metadata if isinstance(self.input_metadata, list) else [self.input_metadata]
+        )
-    def _read_input_metadata_from_file(self, metadata_file_name=None):
+        for metadata in input_metadata:
+            try:  # wide try..except as for some cases we expect that there is no product metadata
+                reduced_product_meta = {
+                    key: value
+                    for key, value in metadata[self.observatory]["product"].items()
+                    if key in {"description", "id", "creation_time", "valid", "format", "filename"}
+                }
+                if metadata[self.observatory].get("activity", {}).get("name"):
+                    reduced_product_meta["activity_name"] = metadata[self.observatory][
+                        "activity"
+                    ].get("name")
+                context_dict["associated_data"] = self._fill_context_sim_list(
+                    context_dict["associated_data"], reduced_product_meta
+                )
+            except (KeyError, TypeError):
+                self._logger.debug("No input product metadata appended to associated data.")
+    def _read_input_metadata_from_file(self, metadata_file_name_expression=None):
         """
         Read and validate input metadata from file.
@@ -240,8 +325,8 @@ class MetadataCollector:
         Parameter
         ---------
-        metadata_file_name: str or Path
-            Name of metadata file.
+        metadata_file_name_expression: str or Path
+            Name of metadata file (regular expressions allowed).
         Returns
         -------
@@ -256,31 +341,32 @@ class MetadataCollector:
             if metadata does not exist
         """
-        metadata_file_name = (
-            self.args_dict.get("input_meta", None) or self.args_dict.get("input", None)
-            if metadata_file_name is None
-            else metadata_file_name
+        metadata_file_names = (
+            metadata_file_name_expression
+            or self.args_dict.get("input_meta")
+            or self.args_dict.get("input")
         )
-        if metadata_file_name is None:
+        try:
+            metadata_files = gen.resolve_file_patterns(metadata_file_names)
+        except ValueError:
             self._logger.debug("No input metadata file defined.")
-            return {}
-        self._logger.debug("Reading meta data from %s", metadata_file_name)
-        if Path(metadata_file_name).suffix in (".yaml", ".yml", ".json"):
-            _input_metadata = self._read_input_metadata_from_yml_or_json(metadata_file_name)
-        elif Path(metadata_file_name).suffix == ".ecsv":
-            _input_metadata = self._read_input_metadata_from_ecsv(metadata_file_name)
-        else:
-            self._logger.error("Unknown metadata file format: %s", metadata_file_name)
-            raise gen.InvalidConfigDataError
+            return None
+        metadata = []
+        for metadata_file in metadata_files:
+            self._logger.debug(f"Reading meta data from {metadata_file}")
+            if Path(metadata_file).suffix in (".yaml", ".yml", ".json"):
+                _input_metadata = self._read_input_metadata_from_yml_or_json(metadata_file)
+            elif Path(metadata_file).suffix == ".ecsv":
+                _input_metadata = self._read_input_metadata_from_ecsv(metadata_file)
+            else:
+                raise gen.InvalidConfigDataError(f"Unknown metadata file format: {metadata_file}")
-        schema.validate_dict_using_schema(_input_metadata, schema_file=METADATA_JSON_SCHEMA)
+            schema.validate_dict_using_schema(_input_metadata, schema_file=METADATA_JSON_SCHEMA)
+            metadata.append(gen.change_dict_keys_case(_input_metadata, lower_case=True))
-        return gen.change_dict_keys_case(
-            self._process_metadata_from_file(_input_metadata),
-            lower_case=True,
-        )
+        return metadata
     def _read_input_metadata_from_ecsv(self, metadata_file_name):
         """Read input metadata from ecsv file."""
@@ -348,13 +434,23 @@ class MetadataCollector:
             pass
         # DATA:MODEL
-        helper_dict = {"name": "name", "version": "version", "type": "meta_schema"}
-        for key, value in helper_dict.items():
-            product_dict["data"]["model"][key] = self.schema_dict.get(value, None)
-        product_dict["data"]["model"]["url"] = self.schema_file
+        product_dict["data"]["model"]["name"] = (
+            self.schema_dict.get("name")
+            or self.args_dict.get("metadata_product_data_name")
+            or "undefined_model_name"
+        )
+        product_dict["data"]["model"]["version"] = self.schema_dict.get("version", "0.0.0")
+        product_dict["data"]["model"]["type"] = self.schema_dict.get("meta_schema", None)
+        product_dict["data"]["model"]["url"] = self.schema_file or self.args_dict.get(
+            "metadata_product_data_url"
+        )
-        product_dict["format"] = self.args_dict.get("output_file_format", None)
-        product_dict["filename"] = str(self.args_dict.get("output_file", None))
+        product_dict["filename"] = str(self.args_dict.get("output_file", ""))
+        product_dict["format"] = (
+            self.args_dict.get("output_file_format")
+            or Path(product_dict["filename"]).suffix.lstrip(".")
+            or None
+        )
     def _fill_instrument_meta(self, instrument_dict):
         """
@@ -369,14 +465,17 @@ class MetadataCollector:
             Dictionary for instrument metadata fields.
         """
-        instrument_dict["site"] = self.args_dict.get("site", None)
-        instrument_dict["ID"] = self.args_dict.get("instrument") or self.args_dict.get(
-            "telescope", None
-        )
+        instrument_dict["site"] = self.args_dict.get("site")
+        instrument_dict["ID"] = self.args_dict.get("instrument") or self.args_dict.get("telescope")
         if instrument_dict["ID"]:
             instrument_dict["class"] = names.get_collection_name_from_array_element_name(
                 instrument_dict["ID"], False
             )
+            instrument_dict["type"] = (
+                names.get_array_element_type_from_name(instrument_dict["ID"])
+                if not instrument_dict.get("type")
+                else instrument_dict["type"]
+            )
     def _fill_process_meta(self, process_dict):
         """
@@ -460,15 +559,14 @@ class MetadataCollector:
             Updated meta list.
         """
-        if len(new_entry_dict) == 0:
+        if not new_entry_dict:
             return []
-        try:
-            if self._all_values_none(meta_list[0]):
-                meta_list[0] = new_entry_dict
-            else:
-                meta_list.append(new_entry_dict)
-        except (TypeError, IndexError):
-            meta_list = [new_entry_dict]
+        if meta_list is None or not meta_list:
+            return [new_entry_dict]
+        if self._all_values_none(meta_list[0]):
+            meta_list[0] = new_entry_dict
+        else:
+            meta_list.append(new_entry_dict)
         return meta_list
     def _process_metadata_from_file(self, meta_dict):
@@ -533,7 +631,7 @@ class MetadataCollector:
         """
         try:
             for document in _input_metadata["context"][key]:
-                self._fill_context_sim_list(context_dict[key], document)
+                context_dict[key] = self._fill_context_sim_list(context_dict[key], document)
         except KeyError:
             pass

gammasimtools 0.12.0__py3-none-any.whl → 0.13.0__py3-none-any.whl

gammasimtools 0.12.0py3-none-any.whl → 0.13.0py3-none-any.whl