PyPI - Simple-Track - Versions diffs - 2.0.0__py3-none-any.whl - Mend

Simple-Track 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

simple_track-2.0.0.dist-info/METADATA +218 -0
simple_track-2.0.0.dist-info/RECORD +17 -0
simple_track-2.0.0.dist-info/WHEEL +5 -0
simple_track-2.0.0.dist-info/entry_points.txt +2 -0
simple_track-2.0.0.dist-info/licenses/LICENSE +373 -0
simple_track-2.0.0.dist-info/top_level.txt +1 -0
simpletrack/__init__.py +1 -0
simpletrack/exceptions.py +51 -0
simpletrack/feature.py +322 -0
simpletrack/flow_solver.py +589 -0
simpletrack/frame.py +521 -0
simpletrack/frame_output.py +295 -0
simpletrack/frame_tracker.py +962 -0
simpletrack/load.py +170 -0
simpletrack/run_simple_track.py +12 -0
simpletrack/track.py +281 -0
simpletrack/utils.py +145 -0

simpletrack/load.py ADDED Viewed

@@ -0,0 +1,170 @@
+import datetime as dt
+from typing import Union
+from numpy.typing import NDArray
+from simpletrack.utils import check_arrays
+class ConfigError(Exception):
+    """
+    Error thrown when one or more config input parameters are not valid
+    """
+def get_loader(loader_key: str):
+    available_loaders = {
+        "MWELoader": MWELoader,
+        "ChilboltonLoader": ChilboltonLoader,
+    }
+    try:
+        loader = available_loaders[loader_key]
+    except KeyError as err:
+        raise KeyError(f"Unknown loader: {loader_key}") from err
+    if not issubclass(loader, BaseLoader):
+        raise TypeError(f"Requested loader ({loader}) is not type BaseLoader")
+    return loader
+class BaseLoader:
+    """
+    Base class for building custom loaders for use with Simple-Track. To use, inherit
+    from this class and implement the `user_definable_load` method, which will take a
+    single input (filename) and should return a list of [datetime, array].
+    The loader should be initialised with a list of filenames, which will be
+    iterated through when the loader is used in Simple-Track.
+    Loaded data is checked for consistency and type before being passed to Simple-Track,
+    so the user only needs to worry about loading the data in the correct format.
+    Loaders should be
+    """
+    def __init__(self, input_data: Union[list[str] | dict]) -> None:
+        self.domain_shape = None
+        self.input_data = input_data
+        # Set the iterating list
+        if not isinstance(input_data, (list, tuple)):
+            raise TypeError(f"Expected input_data type list, got {type(input_data)}")
+    def __iter__(self):
+        self.iter_idx = 0
+        return self
+    def __next__(self) -> list[dt.datetime, NDArray]:
+        if self.iter_idx >= len(self.input_data):
+            raise StopIteration
+        next_fnm = self.input_data[self.iter_idx]
+        self.iter_idx += 1
+        time, data = self.user_definable_load(next_fnm)
+        self._check_loaded_data(time, data)
+        return time, data
+    # TODO: rename this to something better?
+    def user_definable_load(self, filename: str) -> list[dt.datetime, NDArray]:
+        raise NotImplementedError
+    def _check_loaded_data(
+        self,
+        output_time: dt.datetime,
+        output_arr: NDArray,
+    ) -> None:
+        # Check consistency of data shape
+        if self.domain_shape is None:
+            self.domain_shape = output_arr.shape
+        output_arr = check_arrays(output_arr, shape=self.domain_shape, ndim=2)
+        # Check output time is a sensible type
+        if not isinstance(output_time, dt.datetime):
+            raise TypeError(
+                f"Expected 'output_time' to be datetime object, got {type(output_time)}"
+            )
+class DictIterator(BaseLoader):
+    """
+    An alternative loading solution for users wish to load and/or pre-process their data
+    elsewhere and pass it directly to Simple-Track. The input should be a dictionary
+    with datetime keys and 2D array values. This will then iteratre through
+    the dictionary in datetime order.
+    """
+    def __init__(self, input_dict: dict) -> None:
+        self.domain_shape = None
+        self.input_data = input_dict
+        # Set the iterating list
+        if not isinstance(input_dict, dict):
+            raise TypeError(f"Expected input_data type dict, got {type(input_dict)}")
+        self.iterator = sorted(input_dict.keys())
+        if not all([isinstance(key, dt.datetime) for key in self.iterator]):
+            raise TypeError("Expected all input keys to be of type dt.datetime")
+    def __next__(self) -> list[NDArray, dt.datetime]:
+        if self.iter_idx >= len(self.iterator):
+            raise StopIteration
+        time = self.iterator[self.iter_idx]
+        data = self.input_data[time]
+        self.iter_idx += 1
+        self._check_loaded_data(time, data)
+        return time, data
+class MWELoader(BaseLoader):
+    def __init__(self, filenames: list):
+        super().__init__(filenames)
+    def user_definable_load(self, filename):
+        import numpy as np
+        base_time = dt.datetime(2024, 1, 1, 0, 0, 0)
+        data = np.loadtxt(filename)
+        self.file_id = str(filename)
+        mwe_idx = str(filename)[-7]
+        time = base_time + dt.timedelta(minutes=5 * int(mwe_idx))
+        return time, data
+class ChilboltonLoader(BaseLoader):
+    def __init__(self, filenames: list):
+        super().__init__(filenames)
+    def user_definable_load(self, filename):
+        import numpy as np
+        from netCDF4 import Dataset as ncfile
+        nc = ncfile(filename)
+        data = nc.variables["var"][200:600, 250:550] / 32
+        data = np.flipud(np.transpose(data))
+        date_id = str(filename)[-18:-11]
+        time_id = str(filename)[-9:-5]
+        time = dt.datetime(
+            year=int(date_id[0:4]),
+            month=int(date_id[4:6]),
+            day=int(date_id[6:]),
+            hour=int(time_id[0:2]),
+            minute=int(time_id[2:4]),
+        )
+        return time, data
+class LoadingBar:
+    """
+    Class for displaying a loading bar in the terminal. Initialised with the total
+    number of items to load and the length of the loading bar, The
+    "update_progress" method is then called to update the current progress
+    """
+    def __init__(self, total, bar_length=20):
+        self.total = total
+        self.bar_length = bar_length
+        init_padding = int(self.bar_length) * " "
+        print(f"Simple-Track Progress: [{init_padding}] 0/{self.total} (0%)", end="\r")
+    def update_progress(self, current):
+        fraction = current / self.total
+        arrow = int(fraction * self.bar_length - 1) * "-" + ">"
+        padding = int(self.bar_length - len(arrow)) * " "
+        ending = "\n" if current == self.total else "\r"
+        print(
+            f"Simple-Track Progress: [{arrow}{padding}] {current}/{self.total} ({int(fraction * 100)}%) ",
+            end=ending,
+        )

simpletrack/run_simple_track.py ADDED Viewed

@@ -0,0 +1,12 @@
+import sys
+from track import Tracker
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        raise Exception("Running SimpleTrack requires path to at least one config")
+    config_paths = sys.argv[1:]
+    for config_path in config_paths:
+        # With None passed into run method, uses input path in config
+        Tracker(config_path).run()

simpletrack/track.py ADDED Viewed

@@ -0,0 +1,281 @@
+"""
+Run the SimpleTrack algorithm to track objects through a sequence of images
+"""
+from pathlib import Path
+from typing import Union
+from yaml import safe_load
+from simpletrack.flow_solver import FlowSolver
+from simpletrack.frame import Frame, Timeline
+from simpletrack.frame_output import FrameOutputManager
+from simpletrack.frame_tracker import FrameTracker
+from simpletrack.load import ConfigError, DictIterator, LoadingBar, get_loader
+class Tracker:
+    """
+    Simple-Track manager controlling inputs, processing, outputs
+    """
+    def __init__(self, config_input: Union[str | dict]) -> None:
+        """
+        Initialize SimpleTrack with configuration file
+        Args:
+            config_iput (str|dict):
+                If str, provides Path to the configuration file
+                If dict, containts pre-loaded config parameters
+        """
+        if isinstance(config_input, str):
+            config_path = config_input
+            self.config = self._read_config(config_input)
+        elif isinstance(config_input, dict):
+            config_path = None
+            self._check_config(config_input)
+            self.config = config_input
+        else:
+            raise TypeError(
+                f"Expected config_input type str or dict, got {type(config_input)}"
+            )
+        self.start_time = None  # Will be set during run()
+        self.timeline = Timeline()
+        if "INPUT" in self.config:
+            self.file_type = self.config["INPUT"].get("file_type", None)
+        if "FLOW_SOLVER" in self.config:
+            self.flow_solver = FlowSolver(**self.config["FLOW_SOLVER"])
+        else:
+            self.flow_solver = FlowSolver()
+        if "TRACKING" in self.config:
+            self.frame_tracker = FrameTracker(**self.config["TRACKING"])
+            self.skip_tracking = self.config["TRACKING"].get("skip_tracking", False)
+        else:
+            self.frame_tracker = FrameTracker()
+            self.skip_tracking = False
+        if "OUTPUT" in self.config:
+            output_path = self.config["OUTPUT"].get("path", "./output")
+            expt_name = self.config["OUTPUT"].get(
+                "experiment_name", "Simple-Track Experiment"
+            )
+        # Output only if flagged in config
+        self.frame_output = None
+        if "OUTPUT" in self.config:
+            if self.config["OUTPUT"]["save_data"]:
+                self.frame_output = FrameOutputManager(
+                    output_path,
+                    expt_name,
+                    self.start_time,
+                    config_path,
+                )
+    def run(self, input_data: Union[list[str] | dict] = None) -> Timeline:
+        """
+        Runs SimpleTrack using the designated config options.
+        Input data can either be read in from filenames (list(str)) or provided
+        as input using dictionary
+        If input_data is None, SimpleTrack finds all valid files in ["PATH"]["data]
+        config input using "SimpleTrack.get_filenames_from_input_path"
+        If data is being read in using filenames, there must also be an associated
+        Loader class argument in config["PATH"]["loader"] that defines how the data
+        should be pre-processed and how the validity time should be determined.
+        Filenames should be ordered by time. Loaded data will be checked for consistent
+        array shapes. See docs or src.load.py for more.
+        If data is being provided as input using dict, it should be passed
+        with the respective datetime object as the key, and the numpy array to run
+        tracking on as the value. This will not use a predetermined Loader class to
+        load the data, although the same checks on consistent array shapes
+        will be applied.
+        Returns Timeline object containing Frames of data and tracked Features.
+        """
+        # Get input files to load if inputs not provided
+        if input_data is None:
+            input_data = self.get_filenames_from_input_path(file_type=self.file_type)
+        # Check type of input data and set up loader accordingly
+        if isinstance(input_data, list):
+            valid_types = (str, Path)
+            if not all([isinstance(fnm, valid_types) for fnm in input_data]):
+                types = [type(fnm) for fnm in input_data]
+                raise TypeError(
+                    f"If input_data is list it must only contain str, got {types}"
+                )
+            self.loading_bar = LoadingBar(total=len(input_data))
+            self.loader = get_loader(self.config["INPUT"]["loader"])(input_data)
+        elif isinstance(input_data, dict):
+            self.loading_bar = LoadingBar(total=len(input_data.values()))
+            self.loader = DictIterator(input_data)
+        else:
+            raise TypeError(
+                f"Expected input_data type list(str) or dict, got {type(input_data)}"
+            )
+        # print(f"Hello from proc {mp.current_process().name} with arg {filenames}\n")
+        # Iterate through sorted input data, perform tracking, output results if flagged
+        for fnm_idx, time_and_data in enumerate(self.loader):
+            if self.start_time is None:
+                self.start_time = time_and_data[0]
+            # Import data to Frame and add to Timeline
+            frame = Frame()
+            frame.import_time_and_data(*time_and_data)
+            frame.identify_features(**self.config["FEATURE"])
+            self.timeline.add_to_timelime(frame)
+            # If this is the first frame or tracking is disabled, skip tracking
+            if len(self.timeline.timeline) == 1 or self.skip_tracking:
+                self.loading_bar.update_progress(fnm_idx + 1)
+                # Output frame data to text file or npy file if flagged
+                if self.frame_output is not None:
+                    self.frame_output.features_to_txt(frame)
+                    self.frame_output.features_to_csv(frame)
+                    self.frame_output.fields_to_npy(frame)
+                continue
+            # Now run flow solver between previous and current frame
+            prev_frame = self.timeline.get_previous_frame(frame.time)
+            # Set max id for assigning to new features
+            frame.max_id = prev_frame.max_id
+            # Get the flow field that translates features between the two frames
+            y_flow, x_flow = self.flow_solver.analyse_flow(prev_frame, frame)
+            # Update the current Frame with these displacements
+            if y_flow is not None or x_flow is not None:
+                frame.assign_displacements(y_flow, x_flow)
+            # Match Features between Frames
+            self.frame_tracker.run(prev_frame, frame)
+            # Output frame data to text file and field to npy if flagged
+            if self.frame_output is not None:
+                self.frame_output.features_to_txt(frame)
+                self.frame_output.features_to_csv(frame)
+                self.frame_output.fields_to_npy(frame)
+            self.loading_bar.update_progress(fnm_idx + 1)
+        # Output additional fields if flagged
+        if self.frame_output is not None:
+            self.frame_output.output_density_field(
+                self.timeline, "init", centroid_only=False
+            )
+            self.frame_output.output_density_field(
+                self.timeline, "dissipation", centroid_only=False
+            )
+        return self.timeline
+    # def run_parallel(self, processes=4):
+    #     # Split filenames into chunks for each process
+    #     chunk_size = len(self.filenames) // processes
+    #     filename_chunks = [
+    #         self.filenames[i : i + chunk_size]
+    #         for i in range(0, len(self.filenames), chunk_size)
+    #     ]
+    #     with mp.Pool(processes=processes) as pool:
+    #         # TODO: figure out how to do this with the new version of run above, where
+    #         # not having filename inputs means it tries to get it from config...
+    #         pool.map(self.run, filename_chunks)
+    #     # TODO: then need a way to make the results consistent between
+    #     # different chunks.
+    #     # I.e., if the last event of chunk 1 contains a storm that is
+    #     # also present in the first event of chunk 2, then the chunk 2
+    #     # storm needs to have a consistent ID, needs to have updated lifetimes
+    #     # etc.
+    #     # This is apparently already solved in Will Keats/Callum Scullion MO
+    #     # code so don't need to reinvent the wheel here.
+    def get_filenames_from_input_path(
+        self, input_path: str = None, file_type: str = None
+    ) -> list:
+        """
+        Get a list of filenames from a given input path matching a given
+        file type
+        Args:
+            input_path (str, optional):
+                Input path to search for filenames
+                Defaults to self.config["INPUT"]["path"]
+            file_type (str, optional):
+                File type to search input_path for
+                Defaults to .nc
+        """
+        if input_path is None:
+            input_path = self.config["INPUT"]["path"]
+        supported_filetypes = [".nc"]
+        if file_type is not None:
+            if isinstance(file_type, str):
+                supported_filetypes.append(file_type)
+            elif isinstance(file_type, list):
+                if not all([isinstance(val, str) for val in file_type]):
+                    types = [type(val) for val in file_type]
+                    raise TypeError(f"Expected list to contain only str, got {types}")
+                for ftype in file_type:
+                    supported_filetypes.append(ftype)
+            else:
+                raise TypeError(f"Expected list or str, got {type(file_type)}")
+        filenames = sorted(
+            [
+                p
+                for p in Path(input_path).iterdir()
+                if p.is_file() and p.suffix in supported_filetypes
+            ]
+        )
+        if len(filenames) == 0:
+            raise FileNotFoundError(f"No files found in directory: {input_path}")
+        return filenames
+    def _read_config(self, config_path: str) -> dict:
+        """
+        Read config, check for necessary arguments (threshold, data paths, loader),
+        return dict of parameters.
+        Args:
+            config_path (str):
+                Path to config
+        Returns:
+            dict:
+                Simple-Track parameters
+        """
+        with open(config_path) as input:
+            config = safe_load(input)
+        self._check_config(config)
+        return config
+    def _check_config(self, config: dict) -> None:
+        # Check required top-level sections are present
+        required_sections = ["FEATURE"]
+        input_section = config.keys()
+        section_check = [section in input_section for section in required_sections]
+        if not all(section_check):
+            raise ConfigError(
+                f"config missing one or more required sections: {required_sections}"
+            )
+        # # Check required parameters are present
+        # required_params = ["data"]
+        # input_keys = config["PATH"].keys()
+        # required_input_check = [key in input_keys for key in required_params]
+        # if not all(required_input_check):
+        #     raise ConfigError(
+        #         f"config missing one or more required inputs: {required_params}"
+        #     )
+        if "threshold" not in config["FEATURE"]:
+            raise ConfigError("config missing required threshold input")

simpletrack/utils.py ADDED Viewed

@@ -0,0 +1,145 @@
+import numpy as np
+from simpletrack.exceptions import (
+    ArrayShapeError,
+    ArrayTypeError,
+    FloatIDError,
+    IDError,
+    NegativeIDError,
+    ZeroIDError,
+)
+def check_arrays(
+    *args, shape=None, ndim=None, dtype=None, equal_shape=False, non_negative=False
+):
+    # Check inputs args are array like, convert to numpy array if possible,
+    # otherwise return TypeError
+    modified_args = []
+    for arr in args:
+        if isinstance(arr, np.ndarray):
+            modified_args.append(arr)
+        elif isinstance(arr, (list, tuple)):
+            modified_args.append(np.array(arr))
+        else:
+            raise ArrayTypeError("args must be an array-like (array, list or tuple)")
+    # Check each array has the required shape
+    if shape is not None:
+        for arr in modified_args:
+            if arr.shape != shape:
+                msg = f"""
+                Argument with shape {arr.shape} does not have required shape {shape}
+                """
+                raise ArrayShapeError(msg)
+    # Check each array has required number of dimensions
+    if ndim is not None:
+        for arr in modified_args:
+            if arr.ndim != ndim:
+                msg = (
+                    f"Argument with ndim {arr.ndim} does not have required ndim {ndim}"
+                )
+                raise ArrayShapeError(msg)
+    # Check each array has the required dtype
+    if dtype is not None:
+        # Change python base types to numpy types for looser comparison
+        if dtype is int:
+            np_dtype = np.integer
+        elif dtype is float:
+            np_dtype = np.floating
+        else:
+            raise ArrayTypeError(f"Unsupported dtype {dtype} for check_arrays")
+        for arr in modified_args:
+            if not np.issubdtype(arr.dtype, np_dtype):
+                try:
+                    arr = arr.astype(dtype, casting="same_value")
+                except (ValueError, TypeError):
+                    msg = f"""
+                    Argument with dtype {arr.dtype} does not have and cannot be cast to
+                    required dtype {dtype}
+                    """
+                    raise ArrayTypeError(msg) from None
+    # Check each input array is equal size
+    if equal_shape:
+        arr0_shape = args[0].shape
+        if not all([arr.shape == arr0_shape for arr in modified_args]):
+            msg = f"Input array shapes differ: {[arr.shape for arr in args]}"
+            raise ArrayShapeError(msg)
+    # Check all values are positive
+    if non_negative:
+        if not all([np.all(arr >= 0) for arr in modified_args]):
+            msg = "Expected inputs to contain non-negative values"
+            raise ArrayTypeError(msg)
+    # Don't want to return a single arg input as a list
+    if len(modified_args) == 1:
+        return modified_args[0]
+    else:
+        return modified_args
+def check_valid_ids(*args):
+    """
+    Checks that all inputs (scalar or vector) contain valid id data - each element
+    is a positive, nonzero integer
+    """
+    modified_args = []
+    for arg in args:
+        if isinstance(arg, str):
+            raise IDError("Cannot interpret str as ID")
+        elif np.isscalar(arg):
+            arg_native = native(arg)
+            # Check if turning input into int would not change its value
+            # If so, continue checks with int version
+            if int(arg_native) == arg_native:
+                arg_native = int(arg_native)
+            if not np.issubdtype(type(arg_native), np.integer):
+                raise FloatIDError(f"{arg_native} not an int")
+            if arg_native == 0:
+                raise ZeroIDError("Valid IDs start from 1, got 0")
+            if arg_native < 0:
+                raise NegativeIDError(f"Valid IDs start from 1, got {arg_native}")
+            modified_args.append(arg_native)
+        else:  # Looking at vector inputs
+            arg_array = np.array(arg) if isinstance(arg, (list, tuple)) else arg
+            if len(arg_array) == 0:
+                return []
+            # Check if turning input into int would not change its value
+            # If so, continue checks with int version
+            if np.all(arg_array.astype(int) == arg_array):
+                arg_array = arg_array.astype(int)
+            if not np.issubdtype(arg_array.dtype, np.integer):
+                raise FloatIDError(f"Array must contain ints only: {arg_array}")
+            if any(arg_array < 0):
+                raise NegativeIDError(
+                    f"Array must contain positive ints only: {arg_array}"
+                )
+            modified_args.append(arg_array)
+    # Don't want to return a single arg input as a list
+    if len(modified_args) == 1:
+        return modified_args[0]
+    else:
+        return modified_args
+def native(value):
+    """
+    Convert numpy scalar types to native python types.
+    If argument is already native, return unchanged
+    Args:
+        value (any): Input value
+    Returns:
+        any: Converted value
+    """
+    return getattr(value, "tolist", lambda: value)()