PyPI - reboost - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

reboost 0.3.0py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

reboost/__init__.py +2 -2
reboost/_version.py +2 -2
reboost/build_glm.py +8 -2
reboost/build_hit.py +64 -55
reboost/build_tcm.py +1 -1
reboost/cli.py +10 -8
reboost/core.py +86 -16
reboost/hpge/psd.py +257 -0
reboost/hpge/surface.py +145 -1
reboost/iterator.py +119 -58
reboost/optmap/cli.py +7 -7
reboost/shape/group.py +1 -1
reboost/utils.py +51 -1
{reboost-0.3.0.dist-info → reboost-0.4.2.dist-info}/METADATA +1 -1
{reboost-0.3.0.dist-info → reboost-0.4.2.dist-info}/RECORD +19 -19
{reboost-0.3.0.dist-info → reboost-0.4.2.dist-info}/WHEEL +0 -0
{reboost-0.3.0.dist-info → reboost-0.4.2.dist-info}/entry_points.txt +0 -0
{reboost-0.3.0.dist-info → reboost-0.4.2.dist-info}/licenses/LICENSE +0 -0
{reboost-0.3.0.dist-info → reboost-0.4.2.dist-info}/top_level.txt +0 -0

reboost/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
-from reboost import build_hit, core, iterator, math, shape
-from reboost._version import version as __version__
+from . import build_hit, core, iterator, math, shape
+from ._version import version as __version__
 __all__ = [
     "__version__",

reboost/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.3.0'
-__version_tuple__ = version_tuple = (0, 3, 0)
+__version__ = version = '0.4.2'
+__version_tuple__ = version_tuple = (0, 4, 2)

reboost/build_glm.py CHANGED Viewed

@@ -9,7 +9,7 @@ from lgdo import Array, Table, lh5
 from lgdo.lh5 import LH5Iterator, LH5Store
 from numpy.typing import ArrayLike
-from reboost import utils
+from . import utils
 log = logging.getLogger(__name__)
@@ -35,6 +35,7 @@ def get_glm_rows(stp_evtids: ArrayLike, vert: ArrayLike, *, start_row: int = 0)
         output = ak.Array({"evtid": vert})
         output["n_rows"] = np.array([0] * len(vert), dtype=float)
         output["start_row"] = np.array([np.nan] * len(vert), dtype=float)
         return output
     if not isinstance(stp_evtids, np.ndarray):
@@ -182,6 +183,7 @@ def get_stp_evtids(
 def build_glm(
     stp_files: str | list[str],
     glm_files: str | list[str] | None,
+    lh5_groups: list | None = None,
     *,
     out_table_name: str = "glm",
     id_name: str = "g4_evtid",
@@ -225,7 +227,11 @@ def build_glm(
         log.info(msg)
         # loop over the lh5_tables
-        lh5_table_list = list(lh5.ls(stp_file, "stp/"))
+        lh5_table_list = [
+            det
+            for det in lh5.ls(stp_file, "stp/")
+            if lh5_groups is None or det.split("/")[1] in lh5_groups
+        ]
         # get rows in the table
         if files.glm[file_idx] is None:

reboost/build_hit.py CHANGED Viewed

@@ -20,7 +20,7 @@ A :func:`build_hit` to parse the following configuration file:
           # this is a list of included detectors (part of the processing group)
           detector_mapping:
-            - output: OBJECTS.lmeta.channglmap(on=ARGS.timestamp)
+            - output: OBJECTS.lmeta.channelmap(on=ARGS.timestamp)
              .group('system').geds
              .group('analysis.status').on
              .map('name').keys()
@@ -153,6 +153,12 @@ A :func:`build_hit` to parse the following configuration file:
              )
             pe_times: ak.concatenate([HITS.pe_times_lar, HITS.pe_times_pen], axis=-1)
+    # can list here some lh5 objects that should just be forwarded to the
+    # output file, without any processing
+    forward:
+      - /vtx
+      - /some/dataset
 """
 from __future__ import annotations
@@ -166,12 +172,11 @@ import awkward as ak
 import dbetto
 from dbetto import AttrsDict
 from lgdo import lh5
-from lgdo.types import Struct
-from reboost.iterator import GLMIterator
-from reboost.profile import ProfileDict
+from lgdo.lh5.exceptions import LH5EncodeError
 from . import core, utils
+from .iterator import GLMIterator
+from .profile import ProfileDict
 log = logging.getLogger(__name__)
@@ -225,19 +230,19 @@ def build_hit(
     # get the arguments
     if not isinstance(args, AttrsDict):
         args = AttrsDict(args)
     time_dict = ProfileDict()
     # get the global objects
-    global_objects = AttrsDict(
-        core.get_global_objects(
-            expressions=config.get("objects", {}), local_dict={"ARGS": args}, time_dict=time_dict
-        )
+    global_objects = core.get_global_objects(
+        expressions=config.get("objects", {}), local_dict={"ARGS": args}, time_dict=time_dict
     )
     # get the input files
     files = utils.get_file_dict(stp_files=stp_files, glm_files=glm_files, hit_files=hit_files)
     output_tables = {}
     # iterate over files
     for file_idx, (stp_file, glm_file) in enumerate(zip(files.stp, files.glm)):
         msg = (
@@ -257,21 +262,14 @@ def build_hit(
                 time_dict[proc_name] = ProfileDict()
             # extract the output detectors and the mapping to input detectors
-            detectors_mapping = utils.merge_dicts(
-                [
-                    core.get_detectors_mapping(
-                        mapping["output"],
-                        input_detector_name=mapping.get("input", None),
-                        objects=global_objects,
-                    )
-                    for mapping in proc_group.get("detector_mapping")
-                ]
+            detectors_mapping = core.get_detector_mapping(
+                proc_group.get("detector_mapping"), global_objects
             )
             # loop over detectors
             for in_det_idx, (in_detector, out_detectors) in enumerate(detectors_mapping.items()):
                 msg = f"... processing {in_detector} (to {out_detectors})"
-                log.info(msg)
+                log.debug(msg)
                 # get detector objects
                 det_objects = core.get_detector_objects(
@@ -283,18 +281,19 @@ def build_hit(
                 )
                 # begin iterating over the glm
-                glm_it = GLMIterator(
+                iterator = GLMIterator(
                     glm_file,
                     stp_file,
                     lh5_group=in_detector,
                     start_row=start_evtid,
                     stp_field=in_field,
                     n_rows=n_evtid,
-                    read_vertices=False,
                     buffer=buffer,
                     time_dict=time_dict[proc_name],
+                    reshaped_files="hit_table_layout" not in proc_group,
                 )
-                for stps, _, chunk_idx, _ in glm_it:
+                for stps, chunk_idx, _ in iterator:
                     # converting to awkward
                     if stps is None:
                         continue
@@ -342,7 +341,7 @@ def build_hit(
                                 time_dict=time_dict[proc_name],
                                 name=field,
                             )
-                            hit_table.add_field(field, col)
+                            core.add_field_with_nesting(hit_table, field, col)
                         # remove unwanted fields
                         if "outputs" in proc_group:
@@ -353,46 +352,56 @@ def build_hit(
                         # assign units in the output table
                         hit_table = utils.assign_units(hit_table, attrs)
-                        new_hit_file = (file_idx == 0) or (
-                            files.hit[file_idx] != files.hit[file_idx - 1]
-                        )
-                        wo_mode = utils.get_wo_mode(
-                            group=group_idx,
-                            out_det=out_det_idx,
-                            in_det=in_det_idx,
-                            chunk=chunk_idx,
-                            new_hit_file=new_hit_file,
-                            overwrite=overwrite,
-                        )
                         # now write
                         if files.hit[file_idx] is not None:
-                            if time_dict is not None:
-                                start_time = time.time()
-                            if wo_mode != "a":
-                                lh5.write(
-                                    Struct({out_detector: hit_table}),
-                                    out_field,
-                                    files.hit[file_idx],
-                                    wo_mode=wo_mode,
-                                )
-                            else:
-                                lh5.write(
-                                    hit_table,
-                                    f"{out_field}/{out_detector}",
-                                    files.hit[file_idx],
-                                    wo_mode=wo_mode,
-                                )
-                            if time_dict is not None:
-                                time_dict[proc_name].update_field("write", start_time)
+                            # get modes to write with
+                            new_hit_file = (file_idx == 0) or (
+                                files.hit[file_idx] != files.hit[file_idx - 1]
+                            )
+                            wo_mode = utils.get_wo_mode(
+                                group=group_idx,
+                                out_det=out_det_idx,
+                                in_det=in_det_idx,
+                                chunk=chunk_idx,
+                                new_hit_file=new_hit_file,
+                                overwrite=overwrite,
+                            )
+                            # write the file
+                            utils.write_lh5(
+                                hit_table,
+                                files.hit[file_idx],
+                                time_dict[proc_name],
+                                out_field=out_field,
+                                out_detector=out_detector,
+                                wo_mode=wo_mode,
+                            )
                         else:
                             output_tables[out_detector] = core.merge(
                                 hit_table, output_tables[out_detector]
                             )
+        # forward some data, if requested
+        # possible improvement: iterate over data if it's a lot
+        if "forward" in config and files.hit[file_idx] is not None:
+            obj_list = config["forward"]
+            if not isinstance(obj_list, list):
+                obj_list = [obj_list]
+            for obj in obj_list:
+                try:
+                    lh5.write(
+                        lh5.read(obj, stp_file),
+                        obj,
+                        files.hit[file_idx],
+                        wo_mode="write_safe",
+                    )
+                except LH5EncodeError as e:
+                    msg = f"cannot forward object {obj} as it has been already processed by reboost"
+                    raise RuntimeError(msg) from e
     # return output table or nothing
     log.info(time_dict)

reboost/build_tcm.py CHANGED Viewed

@@ -6,7 +6,7 @@ import re
 import awkward as ak
 from lgdo import Table, lh5
-from reboost.shape import group
+from .shape import group
 log = logging.getLogger(__name__)

reboost/cli.py CHANGED Viewed

@@ -5,11 +5,10 @@ import logging
 import dbetto
-from reboost.build_glm import build_glm
-from reboost.build_hit import build_hit
-from reboost.utils import _check_input_file, _check_output_file, get_file_list
+from .build_glm import build_glm
+from .build_hit import build_hit
 from .log_utils import setup_log
+from .utils import _check_input_file, _check_output_file, get_file_list
 log = logging.getLogger(__name__)
@@ -88,7 +87,8 @@ def cli(args=None) -> None:
     hit_parser.add_argument(
         "--glm-file",
         type=str,
-        required=True,
+        required=False,
+        default=None,
         help="glm file to process, if multithreaded this will be appended with _t$idx",
     )
     hit_parser.add_argument(
@@ -159,7 +159,9 @@ def cli(args=None) -> None:
         hit_files = get_file_list(args.hit_file, threads=args.threads)
         _check_input_file(parser, stp_files)
-        _check_input_file(parser, glm_files)
+        if args.glm_file is not None:
+            _check_input_file(parser, glm_files)
         if args.overwrite is False:
             _check_output_file(parser, hit_files)
@@ -174,8 +176,8 @@ def cli(args=None) -> None:
         msg += f"    n_evtid:        {args.n_evtid}\n"
         msg += f"    in_field:       {args.in_field}\n"
         msg += f"    out_field:      {args.out_field}\n"
-        msg += f"    buffer:         {args.buffer}"
-        msg += f"    overwrite:      {args.overwrite}"
+        msg += f"    buffer:         {args.buffer} \n"
+        msg += f"    overwrite:      {args.overwrite} \n"
         log.info(msg)

reboost/core.py CHANGED Viewed

@@ -8,9 +8,8 @@ import awkward as ak
 from dbetto import AttrsDict
 from lgdo.types import LGDO, Table
-from reboost.profile import ProfileDict
 from . import utils
+from .profile import ProfileDict
 log = logging.getLogger(__name__)
@@ -119,7 +118,7 @@ def evaluate_object(
 def get_global_objects(
     expressions: dict[str, str], *, local_dict: dict, time_dict: dict | None = None
-) -> dict:
+) -> AttrsDict:
     """Extract global objects used in the processing.
     Parameters
@@ -141,19 +140,42 @@ def get_global_objects(
     msg = f"Getting global objects with {expressions.keys()} and {local_dict}"
     log.info(msg)
+    res = {}
+    for obj_name, expression in expressions.items():
+        res[obj_name] = evaluate_object(
+            expression, local_dict=local_dict | {"OBJECTS": AttrsDict(res)}
+        )
-    res = AttrsDict(
-        {
-            obj_name: evaluate_object(expression, local_dict=local_dict)
-            for obj_name, expression in expressions.items()
-        }
-    )
     if time_dict is not None:
         time_dict.update_field(name="global_objects", time_start=time_start)
-    return res
+    return AttrsDict(res)
+def get_detector_mapping(detector_mapping: dict, global_objects: AttrsDict) -> dict:
+    """Get all the detector mapping using :func:`get_one_detector_mapping`.
-def get_detectors_mapping(
+    Parameters
+    ----------
+    detector_mapping
+        dictionary of detector mapping
+    global_objects
+        dictionary of global objects to use in evaluating the mapping.
+    """
+    return utils.merge_dicts(
+        [
+            get_one_detector_mapping(
+                mapping["output"],
+                input_detector_name=mapping.get("input", None),
+                objects=global_objects,
+            )
+            for mapping in detector_mapping
+        ]
+    )
+def get_one_detector_mapping(
     output_detector_expression: str | list,
     objects: AttrsDict | None = None,
     input_detector_name: str | None = None,
@@ -342,6 +364,55 @@ def evaluate_hit_table_layout(
     return res
+def add_field_with_nesting(tab: Table, col: str, field: LGDO) -> Table:
+    """Add a field handling the nesting."""
+    subfields = col.strip("/").split("___")
+    tab_next = tab
+    for level in subfields:
+        # if we are at the end, just add the field
+        if level == subfields[-1]:
+            tab_next.add_field(level, field)
+            break
+        if not level:
+            msg = f"invalid field name '{field}'"
+            raise RuntimeError(msg)
+        # otherwise, increase nesting
+        if level not in tab:
+            tab_next.add_field(level, Table(size=len(tab)))
+            tab_next = tab[level]
+        else:
+            tab_next = tab[level]
+    return tab
+def _get_table_keys(tab: Table):
+    """Get keys in a table."""
+    existing_cols = list(tab.keys())
+    output_cols = []
+    for col in existing_cols:
+        if isinstance(tab[col], Table):
+            output_cols.extend(
+                [f"{col}___{col_second}" for col_second in _get_table_keys(tab[col])]
+            )
+        else:
+            output_cols.append(col)
+    return output_cols
+def _remove_col(field: str, tab: Table):
+    """Remove column accounting for nesting."""
+    if "___" in field:
+        base_name, sub_field = field.split("___", 1)[0], field.split("___", 1)[1]
+        _remove_col(sub_field, tab[base_name])
+    else:
+        tab.remove_column(field, delete=True)
 def remove_columns(tab: Table, outputs: list) -> Table:
     """Remove columns from the table not found in the outputs.
@@ -356,11 +427,10 @@ def remove_columns(tab: Table, outputs: list) -> Table:
     -------
     the table with columns removed.
     """
-    existing_cols = list(tab.keys())
-    for col in existing_cols:
-        if col not in outputs:
-            tab.remove_column(col, delete=True)
+    cols = _get_table_keys(tab)
+    for col_unrename in cols:
+        if col_unrename not in outputs:
+            _remove_col(col_unrename, tab)
     return tab

reboost 0.3.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

reboost 0.3.0py3-none-any.whl → 0.4.2py3-none-any.whl