PyPI - ezmsg-baseproc - Versions diffs - 1.0__py3-none-any.whl - Mend

ezmsg-baseproc 1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

ezmsg/baseproc/__init__.py +155 -0
ezmsg/baseproc/__version__.py +34 -0
ezmsg/baseproc/composite.py +323 -0
ezmsg/baseproc/processor.py +209 -0
ezmsg/baseproc/protocols.py +147 -0
ezmsg/baseproc/stateful.py +323 -0
ezmsg/baseproc/units.py +282 -0
ezmsg/baseproc/util/__init__.py +1 -0
ezmsg/baseproc/util/asio.py +138 -0
ezmsg/baseproc/util/message.py +31 -0
ezmsg/baseproc/util/profile.py +171 -0
ezmsg/baseproc/util/typeresolution.py +81 -0
ezmsg_baseproc-1.0.dist-info/METADATA +106 -0
ezmsg_baseproc-1.0.dist-info/RECORD +16 -0
ezmsg_baseproc-1.0.dist-info/WHEEL +4 -0
ezmsg_baseproc-1.0.dist-info/licenses/LICENSE +21 -0

ezmsg/baseproc/processor.py ADDED Viewed

@@ -0,0 +1,209 @@
+"""Base processor classes for ezmsg (non-stateful)."""
+import typing
+from abc import ABC, abstractmethod
+from .protocols import MessageInType, MessageOutType, SettingsType
+from .util.asio import run_coroutine_sync
+from .util.typeresolution import resolve_typevar
+def _get_base_processor_settings_type(cls: type) -> type:
+    try:
+        return resolve_typevar(cls, SettingsType)
+    except TypeError as e:
+        raise TypeError(
+            f"Could not resolve settings type for {cls}. "
+            f"Ensure that the class is properly annotated with a SettingsType."
+        ) from e
+def _get_base_processor_message_in_type(cls: type) -> type:
+    return resolve_typevar(cls, MessageInType)
+def _get_base_processor_message_out_type(cls: type) -> type:
+    return resolve_typevar(cls, MessageOutType)
+def _unify_settings(obj: typing.Any, settings: object | None, *args, **kwargs) -> typing.Any:
+    """Helper function to unify settings for processor initialization."""
+    settings_type = _get_base_processor_settings_type(obj.__class__)
+    if settings is None:
+        if len(args) > 0 and isinstance(args[0], settings_type):
+            settings = args[0]
+        elif len(args) > 0 or len(kwargs) > 0:
+            settings = settings_type(*args, **kwargs)
+        else:
+            settings = settings_type()
+    assert isinstance(settings, settings_type), "Settings must be of type " + str(settings_type)
+    return settings
+class BaseProcessor(ABC, typing.Generic[SettingsType, MessageInType, MessageOutType]):
+    """
+    Base class for processors. You probably do not want to inherit from this class directly.
+    Refer instead to the more specific base classes.
+      * Use :obj:`BaseConsumer` or :obj:`BaseTransformer` for ops that return a result or not, respectively.
+      * Use :obj:`BaseStatefulProcessor` and its children for operations that require state.
+    Note that `BaseProcessor` and its children are sync by default. If you need async by defualt, then
+    override the async methods and call them from the sync methods. Look to `BaseProducer` for examples of
+    calling async methods from sync methods.
+    """
+    settings: SettingsType
+    @classmethod
+    def get_settings_type(cls) -> type[SettingsType]:
+        return _get_base_processor_settings_type(cls)
+    @classmethod
+    def get_message_type(cls, dir: str) -> typing.Any:
+        if dir == "in":
+            return _get_base_processor_message_in_type(cls)
+        elif dir == "out":
+            return _get_base_processor_message_out_type(cls)
+        else:
+            raise ValueError(f"Invalid direction: {dir}. Use 'in' or 'out'.")
+    def __init__(self, *args, settings: SettingsType | None = None, **kwargs) -> None:
+        self.settings = _unify_settings(self, settings, *args, **kwargs)
+    @abstractmethod
+    def _process(self, message: typing.Any) -> typing.Any: ...
+    async def _aprocess(self, message: typing.Any) -> typing.Any:
+        """Override this for native async processing."""
+        return self._process(message)
+    def __call__(self, message: typing.Any) -> typing.Any:
+        # Note: We use the indirection to `_process` because this allows us to
+        #  modify __call__ in derived classes with common functionality while
+        #  minimizing the boilerplate code in derived classes as they only need to
+        #  implement `_process`.
+        return self._process(message)
+    async def __acall__(self, message: typing.Any) -> typing.Any:
+        """
+        In Python 3.12+, we can invoke this method simply with `await obj(message)`,
+        but earlier versions require direct syntax: `await obj.__acall__(message)`.
+        """
+        return await self._aprocess(message)
+    def send(self, message: typing.Any) -> typing.Any:
+        """Alias for __call__."""
+        return self(message)
+    async def asend(self, message: typing.Any) -> typing.Any:
+        """Alias for __acall__."""
+        return await self.__acall__(message)
+class BaseProducer(ABC, typing.Generic[SettingsType, MessageOutType]):
+    """
+    Base class for producers -- processors that generate messages without consuming inputs.
+    Note that `BaseProducer` and its children are async by default, and the sync methods simply wrap
+      the async methods. This is the opposite of :obj:`BaseProcessor` and its children which are sync by default.
+      These classes are designed this way because it is highly likely that a producer, which (probably) does not
+      receive inputs, will require some sort of IO which will benefit from being async.
+    """
+    @classmethod
+    def get_settings_type(cls) -> type[SettingsType]:
+        return _get_base_processor_settings_type(cls)
+    @classmethod
+    def get_message_type(cls, dir: str) -> type[MessageOutType] | None:
+        if dir == "out":
+            return _get_base_processor_message_out_type(cls)
+        elif dir == "in":
+            return None
+        else:
+            raise ValueError(f"Invalid direction: {dir}. Use 'in' or 'out'.")
+    def __init__(self, *args, settings: SettingsType | None = None, **kwargs) -> None:
+        self.settings = _unify_settings(self, settings, *args, **kwargs)
+    @abstractmethod
+    async def _produce(self) -> MessageOutType: ...
+    async def __acall__(self) -> MessageOutType:
+        return await self._produce()
+    def __call__(self) -> MessageOutType:
+        # Warning: This is a bit slow. Override this method in derived classes if performance is critical.
+        return run_coroutine_sync(self.__acall__())
+    def __iter__(self) -> typing.Iterator[MessageOutType]:
+        # Make self an iterator
+        return self
+    async def __anext__(self) -> MessageOutType:
+        # So this can be used as an async generator.
+        return await self.__acall__()
+    def __next__(self) -> MessageOutType:
+        # So this can be used as a generator.
+        return self()
+class BaseConsumer(
+    BaseProcessor[SettingsType, MessageInType, None],
+    ABC,
+    typing.Generic[SettingsType, MessageInType],
+):
+    """
+    Base class for consumers -- processors that receive messages but don't produce output.
+    This base simply overrides type annotations of BaseProcessor to remove the outputs.
+    (We don't bother overriding `send` and `asend` because those are deprecated.)
+    """
+    @classmethod
+    def get_message_type(cls, dir: str) -> type[MessageInType] | None:
+        if dir == "in":
+            return _get_base_processor_message_in_type(cls)
+        elif dir == "out":
+            return None
+        else:
+            raise ValueError(f"Invalid direction: {dir}. Use 'in' or 'out'.")
+    @abstractmethod
+    def _process(self, message: MessageInType) -> None: ...
+    async def _aprocess(self, message: MessageInType) -> None:
+        """Override this for native async processing."""
+        return self._process(message)
+    def __call__(self, message: MessageInType) -> None:
+        return super().__call__(message)
+    async def __acall__(self, message: MessageInType) -> None:
+        return await super().__acall__(message)
+class BaseTransformer(
+    BaseProcessor[SettingsType, MessageInType, MessageOutType],
+    ABC,
+    typing.Generic[SettingsType, MessageInType, MessageOutType],
+):
+    """
+    Base class for transformers -- processors which receive messages and produce output.
+    This base simply overrides type annotations of :obj:`BaseProcessor` to indicate that outputs are not optional.
+    (We don't bother overriding `send` and `asend` because those are deprecated.)
+    """
+    @abstractmethod
+    def _process(self, message: MessageInType) -> MessageOutType: ...
+    async def _aprocess(self, message: MessageInType) -> MessageOutType:
+        """Override this for native async processing."""
+        return self._process(message)
+    def __call__(self, message: MessageInType) -> MessageOutType:
+        return super().__call__(message)
+    async def __acall__(self, message: MessageInType) -> MessageOutType:
+        return await super().__acall__(message)

ezmsg/baseproc/protocols.py ADDED Viewed

@@ -0,0 +1,147 @@
+"""Protocol definitions and type variables for ezmsg processors."""
+import functools
+import typing
+from dataclasses import dataclass
+from .util.message import SampleMessage
+# --- Processor state decorator ---
+processor_state = functools.partial(dataclass, unsafe_hash=True, frozen=False, init=False)
+# --- Type variables for protocols and processors ---
+MessageInType = typing.TypeVar("MessageInType")
+MessageOutType = typing.TypeVar("MessageOutType")
+SettingsType = typing.TypeVar("SettingsType")
+StateType = typing.TypeVar("StateType")
+# --- Protocols for processors ---
+class Processor(typing.Protocol[SettingsType, MessageInType, MessageOutType]):
+    """
+    Protocol for processors.
+    You probably will not implement this protocol directly.
+    Refer instead to the less ambiguous Consumer and Transformer protocols, and the base classes
+    in this module which implement them.
+    Note: In Python 3.12+, we can invoke `__acall__` directly using `await obj(message)`,
+     but to support earlier versions we need to use `await obj.__acall__(message)`.
+    """
+    def __call__(self, message: typing.Any) -> typing.Any: ...
+    async def __acall__(self, message: typing.Any) -> typing.Any: ...
+class Producer(typing.Protocol[SettingsType, MessageOutType]):
+    """
+    Protocol for producers that generate messages.
+    """
+    def __call__(self) -> MessageOutType: ...
+    async def __acall__(self) -> MessageOutType: ...
+class Consumer(Processor[SettingsType, MessageInType, None], typing.Protocol):
+    """
+    Protocol for consumers that receive messages but do not return a result.
+    """
+    def __call__(self, message: MessageInType) -> None: ...
+    async def __acall__(self, message: MessageInType) -> None: ...
+class Transformer(Processor[SettingsType, MessageInType, MessageOutType], typing.Protocol):
+    """Protocol for transformers that receive messages and return a result of the same class."""
+    def __call__(self, message: MessageInType) -> MessageOutType: ...
+    async def __acall__(self, message: MessageInType) -> MessageOutType: ...
+class StatefulProcessor(typing.Protocol[SettingsType, MessageInType, MessageOutType, StateType]):
+    """
+    Base protocol for _stateful_ message processors.
+    You probably will not implement this protocol directly.
+    Refer instead to the less ambiguous StatefulConsumer and StatefulTransformer protocols.
+    """
+    @property
+    def state(self) -> StateType: ...
+    @state.setter
+    def state(self, state: StateType | bytes | None) -> None: ...
+    def __call__(self, message: typing.Any) -> typing.Any: ...
+    async def __acall__(self, message: typing.Any) -> typing.Any: ...
+    def stateful_op(
+        self,
+        state: typing.Any,
+        message: typing.Any,
+    ) -> tuple[typing.Any, typing.Any]: ...
+class StatefulProducer(typing.Protocol[SettingsType, MessageOutType, StateType]):
+    """Protocol for producers that generate messages without consuming inputs."""
+    @property
+    def state(self) -> StateType: ...
+    @state.setter
+    def state(self, state: StateType | bytes | None) -> None: ...
+    def __call__(self) -> MessageOutType: ...
+    async def __acall__(self) -> MessageOutType: ...
+    def stateful_op(
+        self,
+        state: typing.Any,
+    ) -> tuple[typing.Any, typing.Any]: ...
+class StatefulConsumer(StatefulProcessor[SettingsType, MessageInType, None, StateType], typing.Protocol):
+    """Protocol specifically for processors that consume messages without producing output."""
+    def __call__(self, message: MessageInType) -> None: ...
+    async def __acall__(self, message: MessageInType) -> None: ...
+    def stateful_op(
+        self,
+        state: tuple[StateType, int],
+        message: MessageInType,
+    ) -> tuple[tuple[StateType, int], None]: ...
+    """
+    Note: The return type is still a tuple even though the second entry is always None.
+    This is intentional so we can use the same protocol for both consumers and transformers,
+    and chain them together in a pipeline (e.g., `CompositeProcessor`).
+    """
+class StatefulTransformer(
+    StatefulProcessor[SettingsType, MessageInType, MessageOutType, StateType],
+    typing.Protocol,
+):
+    """
+    Protocol specifically for processors that transform messages.
+    """
+    def __call__(self, message: MessageInType) -> MessageOutType: ...
+    async def __acall__(self, message: MessageInType) -> MessageOutType: ...
+    def stateful_op(
+        self,
+        state: tuple[StateType, int],
+        message: MessageInType,
+    ) -> tuple[tuple[StateType, int], MessageOutType]: ...
+class AdaptiveTransformer(StatefulTransformer, typing.Protocol):
+    def partial_fit(self, message: SampleMessage) -> None:
+        """Update transformer state using labeled training data.
+        This method should update the internal state/parameters of the transformer
+        based on the provided labeled samples, without performing any transformation.
+        """
+        ...
+    async def apartial_fit(self, message: SampleMessage) -> None: ...

ezmsg/baseproc/stateful.py ADDED Viewed

@@ -0,0 +1,323 @@
+"""Stateful processor base classes for ezmsg."""
+import pickle
+import typing
+from abc import ABC, abstractmethod
+from .processor import (
+    BaseProcessor,
+    BaseProducer,
+    _get_base_processor_message_in_type,
+)
+from .protocols import MessageInType, MessageOutType, SettingsType, StateType
+from .util.asio import run_coroutine_sync
+from .util.message import SampleMessage, is_sample_message
+from .util.typeresolution import resolve_typevar
+def _get_base_processor_state_type(cls: type) -> type:
+    try:
+        return resolve_typevar(cls, StateType)
+    except TypeError as e:
+        raise TypeError(
+            f"Could not resolve state type for {cls}. Ensure that the class is properly annotated with a StateType."
+        ) from e
+class Stateful(ABC, typing.Generic[StateType]):
+    """
+    Mixin class for stateful processors. DO NOT use this class directly.
+    Used to enforce that the processor/producer has a state attribute and stateful_op method.
+    """
+    _state: StateType
+    @classmethod
+    def get_state_type(cls) -> type[StateType]:
+        return _get_base_processor_state_type(cls)
+    @property
+    def state(self) -> StateType:
+        return self._state
+    @state.setter
+    def state(self, state: StateType | bytes | None) -> None:
+        if state is not None:
+            if isinstance(state, bytes):
+                self._state = pickle.loads(state)
+            else:
+                self._state = state  # type: ignore
+    def _hash_message(self, message: typing.Any) -> int:
+        """
+        Check if the message metadata indicates a need for state reset.
+        This method is not abstract because there are some processors that might only
+        need to reset once but are otherwise insensitive to the message structure.
+        For example, an activation function that benefits greatly from pre-computed values should
+        do this computation in `_reset_state` and attach those values to the processor state,
+        but if it e.g. operates elementwise on the input then it doesn't care if the incoming
+        data changes shape or sample rate so you don't need to reset again.
+        All processors' initial state should have `.hash = -1` then by returning `0` here
+        we force an update on the first message.
+        """
+        return 0
+    @abstractmethod
+    def _reset_state(self, *args: typing.Any, **kwargs: typing.Any) -> None:
+        """
+        Reset internal state based on
+            - new message metadata (processors), or
+            - after first call (producers).
+        """
+        ...
+    @abstractmethod
+    def stateful_op(self, *args: typing.Any, **kwargs: typing.Any) -> tuple: ...
+class BaseStatefulProcessor(
+    BaseProcessor[SettingsType, MessageInType, MessageOutType],
+    Stateful[StateType],
+    ABC,
+    typing.Generic[SettingsType, MessageInType, MessageOutType, StateType],
+):
+    """
+    Base class implementing common stateful processor functionality.
+    You probably do not want to inherit from this class directly.
+    Refer instead to the more specific base classes.
+    Use BaseStatefulConsumer for operations that do not return a result,
+    or BaseStatefulTransformer for operations that do return a result.
+    """
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self._hash = -1
+        state_type = self.__class__.get_state_type()
+        self._state: StateType = state_type()
+        # TODO: Enforce that StateType has .hash: int field.
+    @abstractmethod
+    def _reset_state(self, message: typing.Any) -> None:
+        """
+        Reset internal state based on new message metadata.
+        This method will only be called when there is a significant change in the message metadata,
+        such as sample rate or shape (criteria defined by `_hash_message`), and not for every message,
+        so use it to do all the expensive pre-allocation and caching of variables that can speed up
+        the processing of subsequent messages in `_process`.
+        """
+        ...
+    @abstractmethod
+    def _process(self, message: typing.Any) -> typing.Any: ...
+    def __call__(self, message: typing.Any) -> typing.Any:
+        msg_hash = self._hash_message(message)
+        if msg_hash != self._hash:
+            self._reset_state(message)
+            self._hash = msg_hash
+        return self._process(message)
+    async def __acall__(self, message: typing.Any) -> typing.Any:
+        msg_hash = self._hash_message(message)
+        if msg_hash != self._hash:
+            self._reset_state(message)
+            self._hash = msg_hash
+        return await self._aprocess(message)
+    def stateful_op(
+        self,
+        state: tuple[StateType, int] | None,
+        message: typing.Any,
+    ) -> tuple[tuple[StateType, int], typing.Any]:
+        if state is not None:
+            self.state, self._hash = state
+        result = self(message)
+        return (self.state, self._hash), result
+class BaseStatefulProducer(
+    BaseProducer[SettingsType, MessageOutType],
+    Stateful[StateType],
+    ABC,
+    typing.Generic[SettingsType, MessageOutType, StateType],
+):
+    """
+    Base class implementing common stateful producer functionality.
+      Examples of stateful producers are things that require counters, clocks,
+      or to cycle through a set of values.
+    Unlike BaseStatefulProcessor, this class does not message hashing because there
+      are no input messages. We still use self._hash to simply track the transition from
+      initialization (.hash == -1) to state reset (.hash == 0).
+    """
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)  # .settings
+        self._hash = -1
+        state_type = self.__class__.get_state_type()
+        self._state: StateType = state_type()
+    @abstractmethod
+    def _reset_state(self) -> None:
+        """
+        Reset internal state upon first call.
+        """
+        ...
+    async def __acall__(self) -> MessageOutType:
+        if self._hash == -1:
+            self._reset_state()
+            self._hash = 0
+        return await self._produce()
+    def stateful_op(
+        self,
+        state: tuple[StateType, int] | None,
+    ) -> tuple[tuple[StateType, int], MessageOutType]:
+        if state is not None:
+            self.state, self._hash = state  # Update state via setter
+        result = self()  # Uses synchronous call
+        return (self.state, self._hash), result
+class BaseStatefulConsumer(
+    BaseStatefulProcessor[SettingsType, MessageInType, None, StateType],
+    ABC,
+    typing.Generic[SettingsType, MessageInType, StateType],
+):
+    """
+    Base class for stateful message consumers that don't produce output.
+    This class merely overrides the type annotations of BaseStatefulProcessor.
+    """
+    @classmethod
+    def get_message_type(cls, dir: str) -> type[MessageInType] | None:
+        if dir == "in":
+            return _get_base_processor_message_in_type(cls)
+        elif dir == "out":
+            return None
+        else:
+            raise ValueError(f"Invalid direction: {dir}. Use 'in' or 'out'.")
+    @abstractmethod
+    def _process(self, message: MessageInType) -> None: ...
+    async def _aprocess(self, message: MessageInType) -> None:
+        return self._process(message)
+    def __call__(self, message: MessageInType) -> None:
+        return super().__call__(message)
+    async def __acall__(self, message: MessageInType) -> None:
+        return await super().__acall__(message)
+    def stateful_op(
+        self,
+        state: tuple[StateType, int] | None,
+        message: MessageInType,
+    ) -> tuple[tuple[StateType, int], None]:
+        state, _ = super().stateful_op(state, message)
+        return state, None
+class BaseStatefulTransformer(
+    BaseStatefulProcessor[SettingsType, MessageInType, MessageOutType, StateType],
+    ABC,
+    typing.Generic[SettingsType, MessageInType, MessageOutType, StateType],
+):
+    """
+    Base class for stateful message transformers that produce output.
+    This class merely overrides the type annotations of BaseStatefulProcessor.
+    """
+    @abstractmethod
+    def _process(self, message: MessageInType) -> MessageOutType: ...
+    async def _aprocess(self, message: MessageInType) -> MessageOutType:
+        return self._process(message)
+    def __call__(self, message: MessageInType) -> MessageOutType:
+        return super().__call__(message)
+    async def __acall__(self, message: MessageInType) -> MessageOutType:
+        return await super().__acall__(message)
+    def stateful_op(
+        self,
+        state: tuple[StateType, int] | None,
+        message: MessageInType,
+    ) -> tuple[tuple[StateType, int], MessageOutType]:
+        return super().stateful_op(state, message)
+class BaseAdaptiveTransformer(
+    BaseStatefulTransformer[
+        SettingsType,
+        MessageInType | SampleMessage,
+        MessageOutType | None,
+        StateType,
+    ],
+    ABC,
+    typing.Generic[SettingsType, MessageInType, MessageOutType, StateType],
+):
+    @abstractmethod
+    def partial_fit(self, message: SampleMessage) -> None: ...
+    async def apartial_fit(self, message: SampleMessage) -> None:
+        """Override me if you need async partial fitting."""
+        return self.partial_fit(message)
+    def __call__(self, message: MessageInType | SampleMessage) -> MessageOutType | None:
+        """
+        Adapt transformer with training data (and optionally labels)
+        in SampleMessage
+        Args:
+            message: An instance of SampleMessage with optional
+             labels (y) in message.trigger.value.data and
+             data (X) in message.sample.data
+        Returns: None
+        """
+        if is_sample_message(message):
+            return self.partial_fit(message)
+        return super().__call__(message)
+    async def __acall__(self, message: MessageInType | SampleMessage) -> MessageOutType | None:
+        if is_sample_message(message):
+            return await self.apartial_fit(message)
+        return await super().__acall__(message)
+class BaseAsyncTransformer(
+    BaseStatefulTransformer[SettingsType, MessageInType, MessageOutType, StateType],
+    ABC,
+    typing.Generic[SettingsType, MessageInType, MessageOutType, StateType],
+):
+    """
+    This reverses the priority of async and sync methods from :obj:`BaseStatefulTransformer`.
+    Whereas in :obj:`BaseStatefulTransformer`, the async methods simply called the sync methods,
+    here the sync methods call the async methods, more similar to :obj:`BaseStatefulProducer`.
+    """
+    def _process(self, message: MessageInType) -> MessageOutType:
+        return run_coroutine_sync(self._aprocess(message))
+    @abstractmethod
+    async def _aprocess(self, message: MessageInType) -> MessageOutType: ...
+    def __call__(self, message: MessageInType) -> MessageOutType:
+        # Override (synchronous) __call__ to run coroutine `aprocess`.
+        return run_coroutine_sync(self.__acall__(message))
+    async def __acall__(self, message: MessageInType) -> MessageOutType:
+        # Note: In Python 3.12, we can invoke this with `await obj(message)`
+        # Earlier versions must be explicit: `await obj.__acall__(message)`
+        msg_hash = self._hash_message(message)
+        if msg_hash != self._hash:
+            self._reset_state(message)
+            self._hash = msg_hash
+        return await self._aprocess(message)