PyPI - S2Generator - Versions diffs - 0.0.1__tar.gz - Mend

S2Generator 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

S2Generator-0.0.1/LICENSE +21 -0
S2Generator-0.0.1/PKG-INFO +21 -0
S2Generator-0.0.1/README.md +1 -0
S2Generator-0.0.1/S2Generator/__init__.py +17 -0
S2Generator-0.0.1/S2Generator/base.py +326 -0
S2Generator-0.0.1/S2Generator/encoders.py +246 -0
S2Generator-0.0.1/S2Generator/generators.py +659 -0
S2Generator-0.0.1/S2Generator/params.py +126 -0
S2Generator-0.0.1/S2Generator/visualization.py +56 -0
S2Generator-0.0.1/S2Generator.egg-info/PKG-INFO +21 -0
S2Generator-0.0.1/S2Generator.egg-info/SOURCES.txt +14 -0
S2Generator-0.0.1/S2Generator.egg-info/dependency_links.txt +1 -0
S2Generator-0.0.1/S2Generator.egg-info/requires.txt +3 -0
S2Generator-0.0.1/S2Generator.egg-info/top_level.txt +1 -0
S2Generator-0.0.1/setup.cfg +4 -0
S2Generator-0.0.1/setup.py +33 -0

S2Generator-0.0.1/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2019 Cuixiaolong
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

S2Generator-0.0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,21 @@
+Metadata-Version: 2.1
+Name: S2Generator
+Version: 0.0.1
+Summary: A series-symbol (S2) dual-modality data generation mechanism, enabling the unrestricted creation of high-quality time series data paired with corresponding symbolic representations.
+Home-page: https://github.com/wwhenxuan/S2Generator
+Author: whenxuan
+Author-email: wwhenxuan@gmail.com
+Keywords: Time Series,Data Generation,Complex System Modeling
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: Topic :: Scientific/Engineering :: Mathematics
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+#  S2Generator

S2Generator-0.0.1/README.md ADDED Viewed

	@@ -0,0 +1 @@
1	+ # S2Generator

S2Generator-0.0.1/S2Generator/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025/01/23 17:37:24
+@author: Whenxuan Wang
+@email: wwhenxuan@gmail.com
+"""
+# The basic data structure of symbolic expressions
+from .base import Node, NodeList
+# Parameter control of S2 data generation
+from .params import Params
+# S2 Data Generator
+from .generators import Generator
+# Visualize the generated S2 object
+from .visualization import s2plot

S2Generator-0.0.1/S2Generator/base.py ADDED Viewed

@@ -0,0 +1,326 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025/01/23 18:25:07
+@author: Whenxuan Wang
+@email: wwhenxuan@gmail.com
+"""
+import numpy as np
+from numpy import ndarray
+import scipy.special
+from typing import Optional, Union, List
+from S2Generator.params import Params
+operators_real = {
+    "add": 2,
+    "sub": 2,
+    "mul": 2,
+    "div": 2,
+    "abs": 1,
+    "inv": 1,
+    "sqrt": 1,
+    "log": 1,
+    "exp": 1,
+    "sin": 1,
+    "arcsin": 1,
+    "cos": 1,
+    "arccos": 1,
+    "tan": 1,
+    "arctan": 1,
+    "pow2": 1,
+    "pow3": 1,
+}
+operators_extra = {"pow": 2}
+math_constants = ["e", "pi", "euler_gamma", "CONSTANT"]
+all_operators = {**operators_real, **operators_extra}
+SPECIAL_WORDS = [
+    "<EOS>",
+    "<X>",
+    "</X>",
+    "<Y>",
+    "</Y>",
+    "</POINTS>",
+    "<INPUT_PAD>",
+    "<OUTPUT_PAD>",
+    "<PAD>",
+    "(",
+    ")",
+    "SPECIAL",
+    "OOD_unary_op",
+    "OOD_binary_op",
+    "OOD_constant",
+]
+class Node(object):
+    """Generate a node in the sampling tree"""
+    def __init__(self, value: Union[str, int], params: Params, children: list = None) -> None:
+        # The specific value stored in the current node
+        self.value = value
+        # The list of child nodes that the current node points to
+        self.children = children if children else []
+        self.params = params
+    def push_child(self, child: "Node") -> None:
+        """Add a child node to the current node"""
+        self.children.append(child)
+    def prefix(self) -> str:
+        """Get all the contents of this tree using a recursive traversal starting from the current root node"""
+        s = str(self.value)
+        for c in self.children:
+            s += "," + c.prefix()
+        return s
+    def qtree_prefix(self) -> str:
+        """Get all the contents of this tree using a recursive traversal starting from the current root node, storing the result in a list"""
+        s = "[.$" + str(self.value) + "$ "
+        for c in self.children:
+            s += c.qtree_prefix()
+        s += "]"
+        return s
+    def infix(self) -> str:
+        """Output the entire symbolic expression using in-order traversal"""
+        nb_children = len(self.children)  # Get the number of children
+        if nb_children == 0:
+            # If there are no children, the current node is a leaf node
+            if self.value.lstrip("-").isdigit():
+                return str(self.value)
+            else:
+                s = str(self.value)
+                return s  # Output the content of the leaf node
+        if nb_children == 1:
+            # If there is only one child, it indicates a unary operator
+            s = str(self.value)
+            # Handle different types of unary operators
+            if s == "pow2":
+                s = "(" + self.children[0].infix() + ")**2"
+            elif s == "pow3":
+                s = "(" + self.children[0].infix() + ")**3"
+            else:
+                # Output in the form of f(x), where f is functions like sin and cos
+                s = s + "(" + self.children[0].infix() + ")"
+            return s
+        # If the current node is a binary operator, combine using the intermediate terms
+        s = "(" + self.children[0].infix()
+        for c in self.children[1:]:
+            s = s + " " + str(self.value) + " " + c.infix()
+        return s + ")"
+    def val(self, x: ndarray, deterministic: Optional[bool] = True) -> ndarray:
+        """Evaluate the symbolic expression using specific numerical sequences"""
+        if len(self.children) == 0:
+            # If the node is a leaf node, it is a symbolic variable or a random constant
+            if str(self.value).startswith("x_"):
+                # Handle symbolic expressions
+                _, dim = self.value.split("_")
+                dim = int(dim)
+                return x[:, dim]
+            elif str(self.value) == "rand":
+                # Handle random constants
+                if deterministic:
+                    return np.zeros((x.shape[0],))
+                return np.random.randn(x.shape[0])
+            elif str(self.value) in math_constants:
+                return getattr(np, str(self.value)) * np.ones((x.shape[0],))
+            else:
+                return float(self.value) * np.ones((x.shape[0],))
+        # Handle various binary operators and perform specific calculations recursively
+        if self.value == "add":
+            return self.children[0].val(x) + self.children[1].val(x)  # Addition
+        if self.value == "sub":
+            return self.children[0].val(x) - self.children[1].val(x)  # Subtraction
+        if self.value == "mul":
+            m1, m2 = self.children[0].val(x), self.children[1].val(x)  # Multiplication
+            # Handle exceptions in penalized calculations
+            try:
+                return m1 * m2
+            except Exception as e:
+                nans = np.empty((m1.shape[0],))
+                nans[:] = np.nan
+                return nans
+        if self.value == "pow":
+            m1, m2 = self.children[0].val(x), self.children[1].val(x)  # Exponentiation
+            try:
+                return np.power(m1, m2)
+            except Exception as e:
+                nans = np.empty((m1.shape[0],))
+                nans[:] = np.nan
+                return nans
+        if self.value == "max":
+            return np.maximum(self.children[0].val(x), self.children[1].val(x))  # Maximum
+        if self.value == "min":
+            return np.minimum(self.children[0].val(x), self.children[1].val(x))  # Minimum
+        if self.value == "div":
+            # Ensure denominator is not zero
+            denominator = self.children[1].val(x)
+            denominator[denominator == 0.0] = np.nan
+            try:
+                return self.children[0].val(x) / denominator  # Division
+            except Exception as e:
+                nans = np.empty((denominator.shape[0],))
+                nans[:] = np.nan
+                return nans
+        # Handle various unary operators
+        if self.value == "inv":
+            # Ensure denominator is not zero
+            denominator = self.children[0].val(x)
+            denominator[denominator == 0.0] = np.nan
+            try:
+                return 1 / denominator  # Reciprocal
+            except Exception as e:
+                nans = np.empty((denominator.shape[0],))
+                nans[:] = np.nan
+                return nans
+        if self.value == "log":
+            numerator = self.children[0].val(x)
+            # Ensure logarithm inputs are not negative or zero
+            if self.params.use_abs:
+                # Use log(abs(.)) if specified
+                numerator[numerator <= 0.0] *= -1
+            else:
+                numerator[numerator <= 0.0] = np.nan
+            try:
+                return np.log(numerator)  # Logarithm
+            except Exception as e:
+                nans = np.empty((numerator.shape[0],))
+                nans[:] = np.nan
+                return nans
+        if self.value == "sqrt":
+            numerator = self.children[0].val(x)
+            # Ensure square root inputs are non-negative
+            if self.params.use_abs:
+                # Apply absolute value if specified
+                numerator[numerator <= 0.0] *= -1
+            else:
+                numerator[numerator < 0.0] = np.nan
+            try:
+                return np.sqrt(numerator)  # Square root
+            except Exception as e:
+                nans = np.empty((numerator.shape[0],))
+                nans[:] = np.nan
+                return nans
+        if self.value == "pow2":
+            numerator = self.children[0].val(x)
+            try:
+                return numerator ** 2  # Square
+            except Exception as e:
+                nans = np.empty((numerator.shape[0],))
+                nans[:] = np.nan
+                return nans
+        if self.value == "pow3":
+            numerator = self.children[0].val(x)
+            try:
+                return numerator ** 3  # Cube
+            except Exception as e:
+                nans = np.empty((numerator.shape[0],))
+                nans[:] = np.nan
+                return nans
+        if self.value == "abs":
+            return np.abs(self.children[0].val(x))  # Absolute value
+        if self.value == "sign":
+            return (self.children[0].val(x) >= 0) * 2.0 - 1.0  # Sign function
+        if self.value == "step":
+            x = self.children[0].val(x)  # Step function
+            return x if x > 0 else 0
+        if self.value == "id":
+            return self.children[0].val(x)  # Identity mapping
+        if self.value == "fresnel":
+            return scipy.special.fresnel(self.children[0].val(x))[0]
+        if self.value.startswith("eval"):
+            n = self.value[-1]
+            return getattr(scipy.special, self.value[:-1])(n, self.children[0].val(x))[0]
+        else:
+            fn = getattr(np, self.value, None)
+            if fn is not None:
+                try:
+                    return fn(self.children[0].val(x))
+                except Exception as e:
+                    nans = np.empty((x.shape[0],))
+                    nans[:] = np.nan
+                    return nans
+            fn = getattr(scipy.special, self.value, None)
+            if fn is not None:
+                return fn(self.children[0].val(x))
+            assert False, "Could not find function"
+    def get_recurrence_degree(self) -> int:
+        """Get the maximum variable index for leaf nodes when the current node is the root"""
+        recurrence_degree = 0
+        if len(self.children) == 0:
+            # If the current node is a leaf node
+            if str(self.value).startswith("x_"):
+                _, offset = self.value.split("_")
+                offset = int(offset)
+                if offset > recurrence_degree:
+                    recurrence_degree = offset
+            return recurrence_degree
+        return max([child.get_recurrence_degree() for child in self.children])
+    def replace_node_value(self, old_value: str, new_value: str) -> None:
+        """Traverse the entire symbolic expression and replace it with a specific value"""
+        if self.value == old_value:
+            self.value = new_value
+        for child in self.children:
+            child.replace_node_value(old_value, new_value)
+    def __len__(self) -> int:
+        """Output the total length of the expression with the current node as the root node"""
+        lenc = 1
+        for c in self.children:
+            lenc += len(c)
+        return lenc
+    def __str__(self) -> str:
+        # infix a default print
+        return self.infix()
+    def __repr__(self) -> str:
+        # infix a default print
+        return str(self)
+class NodeList(object):
+    """A list that stores the entire multivariate symbolic expression"""
+    def __init__(self, nodes: List[Node]) -> None:
+        self.nodes = []  # Initialize the list to store root nodes
+        for node in nodes:
+            self.nodes.append(node)
+        self.params = nodes[0].params
+    def infix(self) -> str:
+        """Connect all multivariate symbolic expressions with |"""
+        return " | ".join([node.infix() for node in self.nodes])  # In-order traversal of the tree
+    def prefix(self) -> str:
+        """Connect all multivariate symbolic expressions with ,|,"""
+        return ",|,".join([node.prefix() for node in self.nodes])
+    def val(self, xs: ndarray, deterministic: Optional[bool] = True) -> ndarray:
+        """Sample the entire multivariate symbolic expression to obtain a specific numerical sequence"""
+        batch_vals = [np.expand_dims(node.val(np.copy(xs), deterministic=deterministic), -1) for node in self.nodes]
+        return np.concatenate(batch_vals, -1)
+    def replace_node_value(self, old_value: str, new_value: str) -> None:
+        """Traverse the entire symbolic expression to replace a specific value"""
+        for node in self.nodes:
+            node.replace_node_value(old_value, new_value)
+    def __len__(self) -> int:
+        # Get the length of the entire multivariate symbolic expression
+        return sum([len(node) for node in self.nodes])
+    def __str__(self) -> str:
+        """Output the multivariate symbolic expression in string form"""
+        return self.infix()
+    def __repr__(self) -> str:
+        return str(self)

S2Generator-0.0.1/S2Generator/encoders.py ADDED Viewed

@@ -0,0 +1,246 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2025/01/23 17:37:24
+@author: Whenxuan Wang
+@email: wwhenxuan@gmail.com
+"""
+import numpy as np
+from numpy import ndarray
+from S2Generator.base import Node, NodeList
+from S2Generator.params import Params
+from typing import Union, List, Dict, Tuple
+class GeneralEncoder(object):
+    """General encoder for handling S2 data"""
+    def __init__(self, params: Params, symbols: List[str], all_operators: Dict[str, str]) -> None:
+        # Create a numerical encoder
+        self.float_encoder = FloatSequences(params)
+        # Create a symbolic encoder
+        self.equation_encoder = Equation(params, symbols, self.float_encoder, all_operators)
+class FloatSequences(object):
+    """Float number encoder for S2 data generation"""
+    def __init__(self, params: Params) -> None:
+        # Floating-point precision
+        self.float_precision = params.float_precision
+        self.mantissa_len = params.mantissa_len
+        # Maximum exponent range
+        self.max_exponent = params.max_exponent
+        # Base
+        self.base = (self.float_precision + 1) // self.mantissa_len
+        # Maximum number of tokens in the longest encoding
+        self.max_token = 10 ** self.base
+        self.symbols = ["+", "-"]
+        self.symbols.extend(["N" + f"%0{self.base}d" % i for i in range(self.max_token)])
+        self.symbols.extend(["E" + str(i) for i in range(-self.max_exponent, self.max_exponent + 1)])
+    def encode(self, values: ndarray) -> List:
+        """Encode a float number"""
+        precision = self.float_precision
+        if len(values.shape) == 1:
+            # Process each channel separately
+            seq = []
+            value = values
+            for val in value:
+                # Iterate over each value to encode
+                assert val not in [-np.inf, np.inf]  # Cannot encode illegal maximum values
+                # Encode the sign
+                sign = "+" if val >= 0 else "-"
+                # Use scientific notation
+                m, e = (f"%.{precision}e" % val).split("e")
+                i, f = m.lstrip("-").split(".")
+                i = i + f
+                tokens = chunks(i, self.base)
+                expon = int(e) - precision
+                if expon < -self.max_exponent:
+                    tokens = ["0" * self.base] * self.mantissa_len
+                    expon = int(0)
+                seq.extend([sign, *["N" + token for token in tokens], "E" + str(expon)])
+            return seq
+        else:
+            seqs = [self.encode(values[0])]
+            N = values.shape[0]
+            for n in range(1, N):
+                # Process each channel separately
+                seqs += [self.encode(values[n])]
+        return seqs
+    def decode(self, lst: List):
+        """Parse a list that starts with a float. Return the float value, and the position it ends in the list."""
+        if len(lst) == 0:
+            # The encoding sequence is empty
+            return None
+        seq = []
+        for val in chunks(lst, 2 + self.mantissa_len):
+            # Iterate over each string value object in the sequence
+            for x in val:
+                if x[0] not in ["-", "+", "E", "N"]:
+                    return np.nan
+            try:
+                # Process the sign bit
+                sign = 1 if val[0] == "+" else -1
+                mant = ""
+                for x in val[1:-1]:
+                    mant += x[1:]
+                # Process the mantissa and exponent
+                mant = int(mant)
+                exp = int(val[-1][1:])
+                value = sign * mant * (10 ** exp)
+                value = float(value)
+            except Exception:
+                value = np.nan
+            seq.append(value)
+        return seq
+class Equation(object):
+    """Symbolic expression encoder for handling S2 data"""
+    def __init__(self, params: Params, symbols: List[str], float_encoder: FloatSequences,
+                 all_operators: Dict[str, str]) -> None:
+        self.params = params
+        # Maximum numerical range in symbolic expressions
+        self.max_int = self.params.max_int
+        # List of special symbols
+        self.symbols = symbols
+        # Handle additional symbols
+        if params.extra_unary_operators != "":
+            self.extra_unary_operators = self.params.extra_unary_operators.split(",")
+        else:
+            self.extra_unary_operators = []
+        if params.extra_binary_operators != "":
+            self.extra_binary_operators = self.params.extra_binary_operators.split(",")
+        else:
+            self.extra_binary_operators = []
+        # Sequence encoder used
+        self.float_encoder = float_encoder
+        # Dictionary storing unary and binary operators
+        self.all_operators = all_operators
+    def encode(self, tree: NodeList) -> List[str]:
+        """Encode the input symbolic expression"""
+        res = []
+        for elem in tree.prefix().split(","):
+            try:
+                val = float(elem)
+                if elem.lstrip("-").isdigit():
+                    # Add the encoding of the number
+                    res.extend(self.write_int(int(elem)))
+                else:
+                    # Encode using the numerical encoder
+                    res.extend(self.float_encoder.encode(np.array([val])))
+            except ValueError:
+                res.append(elem)
+        return res
+    def decode(self, lst):
+        trees = []
+        lists = self.split_at_value(lst, "|")
+        for lst in lists:
+            tree = self._decode(lst)[0]
+            if tree is None:
+                return None
+            trees.append(tree)
+        tree = NodeList(trees)
+        return tree
+    def _decode(self, lst: List) -> Tuple[Union[Node, None], int]:
+        if len(lst) == 0:
+            return None, 0
+        elif "OOD" in lst[0]:
+            return None, 0
+        elif lst[0] in self.all_operators.keys():
+            res = Node(lst[0], self.params)
+            arity = self.all_operators[lst[0]]
+            pos = 1
+            for i in range(arity):
+                child, length = self._decode(lst[pos:])
+                if child is None:
+                    return None, pos
+                res.push_child(child)
+                pos += length
+            return res, pos
+        elif lst[0].startswith("INT"):
+            val, length = self.parse_int(lst)
+            return Node(str(val), self.params), length
+        elif lst[0] == "+" or lst[0] == "-":
+            try:
+                val = self.float_encoder.decode(lst[:3])[0]
+            except Exception as e:
+                return None, 0
+            return Node(str(val), self.params), 3
+        elif (
+                lst[0].startswith("CONSTANT") or lst[0] == "y"
+        ):  # Added this manually, be careful!!
+            return Node(lst[0], self.params), 1
+        elif lst[0] in self.symbols:
+            return Node(lst[0], self.params), 1
+        else:
+            try:
+                float(lst[0])  # If number, return leaf
+                return Node(lst[0], self.params), 1
+            except:
+                return None, 0
+    @staticmethod
+    def split_at_value(lst: List, value: int) -> List:
+        indices = [i for i, x in enumerate(lst) if x == value]
+        res = []
+        for start, end in zip([0, *[i + 1 for i in indices]], [*[i - 1 for i in indices], len(lst)]):
+            res.append(lst[start: end + 1])
+        return res
+    def parse_int(self, lst: List) -> Tuple[int, int]:
+        """
+        Parse a list that starts with an integer.
+        Return the integer value, and the position it ends in the list.
+        """
+        base = self.max_int
+        val = 0
+        i = 0
+        for x in lst[1:]:
+            if not (x.rstrip("-").isdigit()):
+                break
+            val = val * base + int(x)
+            i += 1
+        if base > 0 and lst[0] == "INT-":
+            val = -val
+        return val, i + 1
+    def write_int(self, val):
+        """Convert a decimal integer to a representation in the given base."""
+        if not self.params.use_sympy:
+            return [str(val)]
+        base = self.max_int
+        res = []
+        max_digit = abs(base)
+        neg = val < 0
+        val = -val if neg else val
+        while True:
+            rem = val % base
+            val = val // base
+            if rem < 0 or rem > max_digit:
+                rem -= base
+                val += 1
+            res.append(str(rem))
+            if val == 0:
+                break
+        res.append("INT-" if neg else "INT+")
+        return res[::-1]
+def chunks(lst: List, n: int) -> List:
+    """Yield successive n-sized chunks from lst."""
+    for i in range(0, len(lst), n):
+        yield lst[i: i + n]