PyPI - pychemstation - Versions diffs - 0.4.7.dev1__py3-none-any.whl → 0.4.7.dev3__py3-none-any.whl - Mend

pychemstation 0.4.7.dev1py3-none-any.whl → 0.4.7.dev3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

hein-analytical-control/analysis/spec_utils.py DELETED Viewed

@@ -1,304 +0,0 @@
-"""
-Module contains various utility function for spectral data processing and
-analysis.
-"""
-import numpy as np
-import scipy
-from .utils import find_nearest_value_index
-def create_binary_peak_map(data):
-    """Return binary map of the peaks within data points.
-    True values are assigned to potential peak points, False - to baseline.
-    Args:
-        data (:obj:np.array): 1D array with data points.
-    Returns:
-        :obj:np.array, dtype=bool: Mapping of data points, where True is
-            potential peak region point, False - baseline.
-    """
-    # copying array
-    data_c = np.copy(data)
-    # placeholder for the peak mapping
-    peak_map = np.full_like(data_c, False, dtype=bool)
-    for _ in range(100500):  # shouldn't take more iterations
-        # looking for peaks
-        peaks_found = np.logical_or(
-            data_c > np.mean(data_c) + np.std(data_c) * 3,
-            data_c < np.mean(data_c) - np.std(data_c) * 3,
-        )
-        # merging with peak mapping
-        np.logical_or(peak_map, peaks_found, out=peak_map)
-        # if no peaks found - break
-        if not peaks_found.any():
-            break
-        # setting values to 0 and iterating again
-        data_c[peaks_found] = 0
-    return peak_map
-def combine_map_to_regions(mapping):
-    """Combine True values into their indexes arrays.
-    Args:
-        mapping (:obj:np.array): Boolean mapping array to extract the indexes
-            from.
-    Returns:
-        :obj:np.array: 2D array with left and right borders of regions, where
-            mapping is True.
-    Example:
-        >>> combine_map_to_regions(np.array([True, True, False, True, False]))
-        array([[0, 1],
-                [3, 3]])
-    """
-    # No peaks identified, i.e. mapping is all False
-    if not mapping.any():
-        return np.array([], dtype="int64")
-    # region borders
-    region_borders = np.diff(mapping)
-    # corresponding indexes
-    border_indexes = np.argwhere(region_borders)
-    lefts = border_indexes[::2] + 1  # because diff was used to get the index
-    # edge case, where first peak doesn't have left border
-    if mapping[border_indexes][0]:
-        # just preppend 0 as first left border
-        # mind the vstack, as np.argwhere produces a vector array
-        lefts = np.vstack((0, lefts))
-    rights = border_indexes[1::2]
-    # another edge case, where last peak doesn't have a right border
-    if mapping[-1]:  # True if last point identified as potential peak
-        # just append -1 as last peak right border
-        rights = np.vstack((rights, -1))
-    # columns as borders, rows as regions, i.e.
-    # :output:[0] -> first peak region
-    return np.hstack((lefts, rights))
-def filter_regions(x_data, peaks_regions):
-    """Filter peak regions.
-    Peak regions are filtered to remove potential false positives (e.g. noise
-        spikes).
-    Args:
-        x_data (:obj:np.array): X data points, needed to pick up the data
-            resolution and map the region indexes to the corresponding data
-            points.
-        y_data (:obj:np.array): Y data points, needed to validate if the peaks
-            are actually present in the region and remove invalid regions.
-        peaks_regions (:obj:np.array): 2D Nx2 array with peak regions indexes
-            (rows) as left and right borders (columns).
-    Returns:
-        :obj:np.array: 2D Mx2 array with filtered peak regions indexes(rows) as
-            left and right borders (columns).
-    """
-    # filter peaks where region is smaller than spectrum resolution
-    # like single spikes, e.g. noise
-    # compute the regions first
-    x_data_regions = np.copy(x_data[peaks_regions])
-    # get arguments where absolute difference is greater than data resolution
-    resolution = np.absolute(np.mean(np.diff(x_data)))
-    # (N, 1) array!
-    valid_regions_map = np.absolute(np.diff(x_data_regions)) > resolution
-    # get their indexes, mind the flattening of all arrays!
-    valid_regions_indexes = np.argwhere(valid_regions_map.flatten()).flatten()
-    # filtering!
-    peaks_regions = peaks_regions[valid_regions_indexes]
-    return peaks_regions
-def filter_noisy_regions(y_data, peaks_regions):
-    """Remove noisy regions from given regions array.
-    Peak regions are filtered to remove false positive noise regions, e.g.
-        incorrectly assigned due to curvy baseline. Filtering is performed by
-        computing average peak points/data points ratio.
-    Args:
-        y_data (:obj:np.array): Y data points, needed to validate if the peaks
-            are actually present in the region and remove invalid regions.
-        peaks_regions (:obj:np.array): 2D Nx2 array with peak regions indexes
-            (rows) as left and right borders (columns).
-    Returns:
-        :obj:np.array: 2D Mx2 array with filtered peak regions indexes(rows) as
-            left and right borders (columns).
-    """
-    # compute the actual regions data points
-    y_data_regions = []
-    for region in peaks_regions:
-        y_data_regions.append(y_data[region[0] : region[-1]])
-    # compute noise data regions, i.e. in between peak regions
-    noise_data_regions = []
-    for row, _ in enumerate(peaks_regions):
-        try:
-            noise_data_regions.append(
-                y_data[peaks_regions[row][1] : peaks_regions[row + 1][0]]
-            )
-        except IndexError:
-            # exception for the last row -> discard
-            pass
-    # compute average peaks/data points ratio for noisy regions
-    noise_peaks_ratio = []
-    for region in noise_data_regions:
-        # protection from empty regions
-        if region.size != 0:
-            # minimum height is pretty low to ensure enough noise is picked
-            peaks, _ = scipy.signal.find_peaks(region, height=region.max() * 0.2)
-            noise_peaks_ratio.append(peaks.size / region.size)
-    # compute average with weights equal to the region length
-    noise_peaks_ratio = np.average(
-        noise_peaks_ratio, weights=[region.size for region in noise_data_regions]
-    )
-    # filtering!
-    valid_regions_indexes = []
-    for row, region in enumerate(y_data_regions):
-        peaks, _ = scipy.signal.find_peaks(region, height=region.max() * 0.2)
-        if peaks.size != 0 and peaks.size / region.size < noise_peaks_ratio:
-            valid_regions_indexes.append(row)
-    # protecting from complete cleaning
-    if not valid_regions_indexes:
-        return peaks_regions
-    peaks_regions = peaks_regions[np.array(valid_regions_indexes)]
-    return peaks_regions
-def merge_regions(x_data, peaks_regions, d_merge, recursively=True):
-    """Merge peak regions if distance between is less than delta.
-    Args:
-        x_data (:obj:np.array): X data points.
-        peaks_regions (:obj:np.array): 2D Nx2 array with peak regions indexes
-            (rows) as left and right borders (columns).
-        d_merge (float): Minimum distance in X data points to merge two or more
-            regions together.
-        recursively (bool, optional): If True - will repeat the procedure until
-            all regions with distance < than d_merge will merge.
-    Returns:
-        :obj:np.array: 2D Mx2 array with peak regions indexes (rows) as left and
-            right borders (columns), merged according to predefined minimal
-            distance.
-    Example:
-        >>> regions = np.array([
-                [1, 10],
-                [11, 20],
-                [25, 45],
-                [50, 75],
-                [100, 120],
-                [122, 134]
-            ])
-        >>> data = np.ones_like(regions) # ones as example
-        >>> merge_regions(data, regions, 1)
-        array([[  1,  20],
-               [ 25,  45],
-               [ 50,  75],
-               [100, 120],
-               [122, 134]])
-        >>> merge_regions(data, regions, 20, True)
-        array([[  1,  75],
-               [100, 134]])
-    """
-    # the code is pretty ugly but works
-    merged_regions = []
-    # converting to list to drop the data of the fly
-    regions = peaks_regions.tolist()
-    for i, _ in enumerate(regions):
-        try:
-            # check left border of i regions with right of i+1
-            if abs(x_data[regions[i][-1]] - x_data[regions[i + 1][0]]) <= d_merge:
-                # if lower append merge the regions
-                merged_regions.append([regions[i][0], regions[i + 1][-1]])
-                # drop the merged one
-                regions.pop(i + 1)
-            else:
-                # if nothing to merge, just append the current region
-                merged_regions.append(regions[i])
-        except IndexError:
-            # last row
-            merged_regions.append(regions[i])
-    merged_regions = np.array(merged_regions)
-    if not recursively:
-        return merged_regions
-    # if recursively, check for the difference
-    if (merged_regions == regions).all():
-        # done
-        return merged_regions
-    return merge_regions(x_data, merged_regions, d_merge, recursively=True)
-def expand_regions(x_data, peaks_regions, d_expand):
-    """Expand the peak regions by the desired value.
-    Args:
-        x_data (:obj:np.array): X data points.
-        peaks_regions (:obj:np.array): 2D Nx2 array with peak regions indexes
-            (rows) as left and right borders (columns).
-        d_expand (float): Value to expand borders to (in X data scale).
-    Returns:
-        :obj:np.array: 2D Nx2 array with expanded peak regions indexes (rows) as
-            left and right borders (columns).
-    """
-    data_regions = np.copy(x_data[peaks_regions])
-    # determine scale orientation, i.e. decreasing (e.g. ppm on NMR spectrum)
-    # or increasing (e.g. wavelength on UV spectrum)
-    if (data_regions[:, 0] - data_regions[:, 1]).mean() > 0:
-        # ppm-like scale
-        data_regions[:, 0] += d_expand
-        data_regions[:, -1] -= d_expand
-    else:
-        # wavelength-like scale
-        data_regions[:, 0] -= d_expand
-        data_regions[:, -1] += d_expand
-    # converting new values to new indexes
-    for index_, value in np.ndenumerate(data_regions):
-        data_regions[index_] = find_nearest_value_index(x_data, value)[1]
-    return data_regions.astype(int)

hein-analytical-control/analysis/utils.py DELETED Viewed

@@ -1,63 +0,0 @@
-import numpy as np
-def find_nearest_value_index(array, value) -> tuple[float, int]:
-    """Returns closest value and its index in a given array.
-    :param array: An array to search in.
-    :type array: np.array(float)
-    :param value: Target value.
-    :type value: float
-    :returns: Nearest value in array and its index.
-    """
-    index_ = np.argmin(np.abs(array - value))
-    return array[index_], index_
-def interpolate_to_index(array, ids, precision: int = 100) -> np.array:
-    """Find value in between arrays elements.
-    Constructs linspace of size "precision" between index+1 and index to
-    find approximate value for array[index], where index is float number.
-    Used for 2D data, where default scipy analysis occurs along one axis only,
-    e.g. signal.peak_width.
-    Rough equivalent of array[index], where index is float number.
-    :param array: Target array.
-    :type array: np.array(float)
-    :param ids: An array with "intermediate" indexes to interpolate to.
-    :type ids: np.array[float]
-    :param precision: Desired presion.
-    :returns: New array with interpolated values according to provided indexes "ids".
-    Example:
-        >>> interpolate_to_index(np.array([1.5]), np.array([1,2,3], 100))
-            array([2.50505051])
-    """
-    # breaking ids into fractional and integral parts
-    prec, ids = np.modf(ids)
-    # rounding and switching type to int
-    prec = np.around(prec * precision).astype("int32")
-    ids = ids.astype("int32")
-    # linear interpolation for each data point
-    # as (n x m) matrix where n is precision and m is number of indexes
-    space = np.linspace(array[ids], array[ids + 1], precision)
-    # due to rounding error the index may become 100 in (100, ) array
-    # as a consequence raising IndexError when such array is indexed
-    # therefore index 100 will become the last (-1)
-    prec[prec == 100] = -1
-    # precise slicing
-    true_values = np.array(
-        [space[:, index[0]][value] for index, value in np.ndenumerate(prec)]
-    )
-    return true_values

hein-analytical-control/devices/Agilent/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-"""
-.. include:: README.md
-"""

hein-analytical-control/devices/Agilent/chemstation.py DELETED Viewed

@@ -1,290 +0,0 @@
-#!/usr/bin/python
-# coding: utf-8
-"""
-File parser for Chemstation files (*.ch)
-Basically a port of the matlab script at:
-https://github.com/chemplexity/chromatography/blob/master/Development/File%20Conversion/ImportAgilentFID.m
-This file is a standalone file to parse the binary files created by Chemstation
-I use it for file with version 130, genereted by an Agilent LC.
-"""
-import struct
-from struct import unpack
-import numpy as np
-# Constants used for binary file parsing
-ENDIAN = ">"
-STRING = ENDIAN + "{}s"
-UINT8 = ENDIAN + "B"
-UINT16 = ENDIAN + "H"
-INT16 = ENDIAN + "h"
-INT32 = ENDIAN + "i"
-UINT32 = ENDIAN + "I"
-def fread(fid, nelements, dtype):
-    """Equivalent to Matlab fread function"""
-    if dtype is str:
-        dt = np.uint8  # WARNING: assuming 8-bit ASCII for np.str!
-    else:
-        dt = dtype
-    data_array = np.fromfile(fid, dt, nelements)
-    data_array.shape = (nelements, 1)
-    return data_array
-def parse_utf16_string(file_, encoding="UTF16"):
-    """Parse a pascal type UTF16 encoded string from a binary file object"""
-    # First read the expected number of CHARACTERS
-    string_length = unpack(UINT8, file_.read(1))[0]
-    # Then read and decode
-    parsed = unpack(STRING.format(2 * string_length), file_.read(2 * string_length))
-    return parsed[0].decode(encoding)
-class cached_property(object):
-    """A property that is only computed once per instance and then replaces
-    itself with an ordinary attribute. Deleting the attribute resets the
-    property.
-    https://github.com/bottlepy/bottle/commit/fa7733e075da0d790d809aa3d2f53071897e6f76
-    """
-    def __init__(self, func):
-        self.__doc__ = getattr(func, "__doc__")
-        self.func = func
-    def __get__(self, obj, cls):
-        if obj is None:
-            return self
-        value = obj.__dict__[self.func.__name__] = self.func(obj)
-        return value
-class CHFile(object):
-    """Class that implementats the Agilent .ch file format version
-    130. Warning: Not all aspects of the file header is understood,
-    so there may and probably is information that is not parsed. See
-    _parse_header_status for an overview of which parts of the header
-    is understood.
-    Attributes:
-        values (numpy.array): The internsity values (y-value) or the
-        spectrum. The unit for the values is given in `metadata['units']`
-        metadata (dict): The extracted metadata
-        filepath (str): The filepath this object was loaded from
-    """
-    # Fields is a table of name, offset and type. Types 'x-time' and 'utf16'
-    # are specially handled, the rest are format arguments for struct unpack
-    fields = (
-        ("sequence_line_or_injection", 252, UINT16),
-        ("injection_or_sequence_line", 256, UINT16),
-        ("data_offset", 264, UINT32),
-        ("start_time", 282, "x-time"),
-        ("end_time", 286, "x-time"),
-        ("version_string", 326, "utf16"),
-        ("description", 347, "utf16"),
-        ("sample", 858, "utf16"),
-        ("operator", 1880, "utf16"),
-        ("date", 2391, "utf16"),
-        ("inlet", 2492, "utf16"),
-        ("instrument", 2533, "utf16"),
-        ("method", 2574, "utf16"),
-        ("software version", 3601, "utf16"),
-        ("software name", 3089, "utf16"),
-        ("software revision", 3802, "utf16"),
-        ("zero", 4110, INT32),
-        ("units", 4172, "utf16"),
-        ("detector", 4213, "utf16"),
-        ("yscaling", 4732, ENDIAN + "d"),
-    )
-    # The start position of the data
-    # Get it from metadata['data_offset'] * 512
-    data_start = 6144
-    # The versions of the file format supported by this implementation
-    supported_versions = {130}
-    def __init__(self, filepath):
-        self.filepath = filepath
-        self.metadata = {}
-        with open(self.filepath, "rb") as file_:
-            self._parse_header(file_)
-            self.values = self._parse_data(file_)
-    def _parse_header(self, file_):
-        """Parse the header"""
-        # Parse and check version
-        length = unpack(UINT8, file_.read(1))[0]
-        parsed = unpack(STRING.format(length), file_.read(length))
-        version = int(parsed[0])
-        if version not in self.supported_versions:
-            raise ValueError("Unsupported file version {}".format(version))
-        self.metadata["magic_number_version"] = version
-        # Parse all metadata fields
-        for name, offset, type_ in self.fields:
-            file_.seek(offset)
-            if type_ == "utf16":
-                self.metadata[name] = parse_utf16_string(file_)
-            elif type_ == "x-time":
-                self.metadata[name] = unpack(UINT32, file_.read(4))[0] / 60000
-            else:
-                self.metadata[name] = unpack(type_, file_.read(struct.calcsize(type_)))[
-                    0
-                ]
-    def _parse_header_status(self):
-        """Print known and unknown parts of the header"""
-        file_ = open(self.filepath, "rb")
-        print("Header parsing status")
-        # Map positions to fields for all the known fields
-        knowns = {item[1]: item for item in self.fields}
-        # A couple of places has a \x01 byte before a string, these we simply
-        # skip
-        skips = {325, 3600}
-        # Jump to after the magic number version
-        file_.seek(4)
-        # Initialize variables for unknown bytes
-        unknown_start = None
-        unknown_bytes = b""
-        # While we have not yet reached the data
-        while file_.tell() < self.data_start:
-            current_position = file_.tell()
-            # Just continue on skip bytes
-            if current_position in skips:
-                file_.read(1)
-                continue
-            # If we know about a data field that starts at this point
-            if current_position in knowns:
-                # If we have collected unknown bytes, print them out and reset
-                if unknown_bytes != b"":
-                    print(
-                        "Unknown at", unknown_start, repr(unknown_bytes.rstrip(b"\x00"))
-                    )
-                    unknown_bytes = b""
-                    unknown_start = None
-                # Print out the position, type, name and value of the known
-                # value
-                print("Known field at {: >4},".format(current_position), end=" ")
-                name, _, type_ = knowns[current_position]
-                if type_ == "x-time":
-                    print(
-                        'x-time, "{: <19}'.format(name + '"'),
-                        unpack(ENDIAN + "f", file_.read(4))[0] / 60000,
-                    )
-                elif type_ == "utf16":
-                    print(
-                        ' utf16, "{: <19}'.format(name + '"'), parse_utf16_string(file_)
-                    )
-                else:
-                    size = struct.calcsize(type_)
-                    print(
-                        '{: >6}, "{: <19}'.format(type_, name + '"'),
-                        unpack(type_, file_.read(size))[0],
-                    )
-            # We do not know about a data field at this position If we have
-            # already collected 4 zero bytes, assume that we are done with
-            # this unkonw field, print and reset
-            else:
-                if unknown_bytes[-4:] == b"\x00\x00\x00\x00":
-                    print(
-                        "Unknown at", unknown_start, repr(unknown_bytes.rstrip(b"\x00"))
-                    )
-                    unknown_bytes = b""
-                    unknown_start = None
-                # Read one byte and save it
-                one_byte = file_.read(1)
-                if unknown_bytes == b"":
-                    # Only start a new collection of unknown bytes, if this
-                    # byte is not a zero byte
-                    if one_byte != b"\x00":
-                        unknown_bytes = one_byte
-                        unknown_start = file_.tell() - 1
-                else:
-                    unknown_bytes += one_byte
-        file_.close()
-    def _parse_data(self, file_):
-        """Parse the data. Decompress the delta-encoded data, and scale them
-        with y-scaling"""
-        scaling = self.metadata["yscaling"]
-        # Go to the end of the file
-        file_.seek(0, 2)
-        stop = file_.tell()
-        # Go to the start point of the data
-        file_.seek(self.data_start)
-        signal = []
-        buff = [0, 0, 0, 0]
-        while file_.tell() < stop:
-            buff[0] = fread(file_, 1, INT16)[0][0]
-            buff[1] = buff[3]
-            if buff[0] << 12 == 0:
-                break
-            for i in range(buff[0] & 4095):
-                buff[2] = fread(file_, 1, INT16)[0][0]
-                if buff[2] != -32768:
-                    buff[1] = buff[1] + buff[2]
-                else:
-                    buff[1] = fread(file_, 1, INT32)[0][0]
-                signal.append(buff[1])
-            buff[3] = buff[1]
-        signal = np.array(signal)
-        signal = signal * scaling
-        return signal
-    @cached_property
-    def times(self):
-        """The time values (x-value) for the data set in minutes"""
-        return np.linspace(
-            self.metadata["start_time"], self.metadata["end_time"], len(self.values)
-        )
-if __name__ == "__main__":
-    CHFile("lcdiag.reg")

pychemstation 0.4.7.dev1__py3-none-any.whl → 0.4.7.dev3__py3-none-any.whl

pychemstation 0.4.7.dev1py3-none-any.whl → 0.4.7.dev3py3-none-any.whl