PyPI - quickseries - Versions diffs - 0.2.1__tar.gz - Mend

quickseries 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

quickseries-0.2.1/LICENSE +28 -0
quickseries-0.2.1/PKG-INFO +16 -0
quickseries-0.2.1/README.md +234 -0
quickseries-0.2.1/quickseries/__init__.py +4 -0
quickseries-0.2.1/quickseries/approximate.py +319 -0
quickseries-0.2.1/quickseries/benchmark.py +108 -0
quickseries-0.2.1/quickseries/expansions.py +126 -0
quickseries-0.2.1/quickseries/simplefit.py +65 -0
quickseries-0.2.1/quickseries/sourceutils.py +133 -0
quickseries-0.2.1/quickseries/sputils.py +26 -0
quickseries-0.2.1/quickseries.egg-info/PKG-INFO +16 -0
quickseries-0.2.1/quickseries.egg-info/SOURCES.txt +15 -0
quickseries-0.2.1/quickseries.egg-info/dependency_links.txt +1 -0
quickseries-0.2.1/quickseries.egg-info/requires.txt +10 -0
quickseries-0.2.1/quickseries.egg-info/top_level.txt +1 -0
quickseries-0.2.1/setup.cfg +4 -0
quickseries-0.2.1/setup.py +13 -0

quickseries-0.2.1/LICENSE ADDED Viewed

@@ -0,0 +1,28 @@
+BSD 3-Clause License
+Copyright (c) 2023, Million Concepts
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

quickseries-0.2.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,16 @@
+Metadata-Version: 2.1
+Name: quickseries
+Version: 0.2.1
+Home-page: https://github.com/millionconcepts/quickseries.git
+Author: Michael St. Clair
+Author-email: mstclair@millionconcepts.com
+Requires-Python: >=3.11
+License-File: LICENSE
+Requires-Dist: dustgoggles
+Requires-Dist: numpy
+Requires-Dist: scipy
+Requires-Dist: sympy
+Provides-Extra: jit
+Requires-Dist: numba; extra == "jit"
+Provides-Extra: tests
+Requires-Dist: pytest; extra == "tests"

quickseries-0.2.1/README.md ADDED Viewed

@@ -0,0 +1,234 @@
+# quickseries
+`quickseries` generates Python functions that perform fast vectorized power
+series approximations of mathematical functions. It can provide performance
+improvements ranging from ~3x (simple functions, no fiddling around with
+parameters) to ~100x (complicated functions, some parameter tuning).
+`quickseries` is in beta; bug reports are appreciated.
+Install from source using `pip install .`. Dependencies are also described
+in a Conda `environment.yml` file.
+The minimum supported version of Python is *3.11*.
+## example of use
+```
+>>> import numpy as np
+>>> from quickseries import quickseries
+>>> bounds = (-np.pi, np.pi)
+>>> approx = quickseries("sin(x)*cos(x)", point=0, order=12, bounds=bounds)
+>>> x = np.linspace(*bounds, 100000)
+>>> print(f"max error: {max(abs(np.sin(x) * np.cos(x) - approx(x)))}")
+>>> print("original runtime:")
+>>> %timeit np.sin(x) * np.cos(x)
+>>> print("approx runtime:")
+>>> %timeit approx(x)
+max error: 0.0003270875375037813
+original runtime:
+968 µs ± 2.17 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
+approx runtime:
+325 µs ± 3.89 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
+```
+## usage notes
+### features
+* The most important keyword arguments to `quickseries` are `bounds`,
+  `nterms`, and `point`. `bounds` specifies the range (or ranges, for
+  multivariate functions) of values across which to approximate the function.
+  `nterms` specifies how many terms to use in the series expansion. `point`
+  specifies the value (or values, for multivariate functions) about which
+  to generate the series expansion. See "limitations" and "tips" below for
+  examples and discussion.
+* `quickseries()` is capable of auto-jitting the functions it generates
+with `numba`. Pass the `jit=True` argument. `numba` is an optional dependency;
+install it with your preferred package manager.
+* `quickseries.benchmark()` offers an easy way to test the accuracy and
+efficiency of `quickseries.quickseries()`-generated functions.
+* By default, `quickseries()` caches the code it generates. If you wish to
+  turn this behavior off, pass `cache=False`.
+  * If you call `quickseries()` with the same arguments from separate modules,
+    it will write separate caches for each module.
+  * ipython/Jupyter shells/kernels all share one cache within the same user
+    account.
+  * `quickseries()` treats stdin or similar 'anonymous' invocation contexts
+    like modules named "__quickseries_anonymous_caller_cache__" in the current
+    working directory.
+  * In this mode, `quickseries()` also caches any results of `numba` JIT
+    compilation.
+  * Caching is turned _off_ by default for `benchmark()`.
+* If you pass the `precision` argument to `quickseries()`, it will attempt to
+  guarantee that the function it returns will not cast input values to bit widths
+  greater than the value of `precision`. Legal values of `precision` are 16, 32,
+  and 64. The returned function will not, however, attempt to reduce the precision
+  of its arguments. For instance, `quickseries("sin(x) + exp(x)", precision=32)`
+  will return a Python `float` if passed an `float`, and a `np.float64` `ndarray`
+  if passed a `np.float64` `ndarray`. However, it will return a `np.float32`
+  `ndarray` if passed a `np.float32` `ndarray`, which is not guaranteed without
+  the `precision=32` argument.
+### argument naming
+* Multivariate `quickseries()`-generated functions always map positional arguments
+  to variables in the string representation of the input function in alphanumeric
+  order. This is in order to maintain consistency between slightly different
+  forms of the same expression.
+  * Examples:
+    * `quickseries("cos(x) * sin(y)")(1, 2)` approximates `sin(1) * cos(2)`
+    * `quickseries("sin(y) * cos(x)")(1, 2)` approximates `cos(1) * sin(2)`
+    * `quickseries("sin(x) * cos(y)")(1, 2)` approximates `sin(1) * cos(2)`
+  * Note that you can always determine the argument order of a `quickseries()`-
+    generated function by using the `help()` builtin, `inspect.getfullargspec()`,
+    examining the function's docstring, etc.
+* Most legal Python variable names are allowable names for free variables.
+  Named mathematical functions and constants are the major exceptions.
+  * Examples:
+    * `"ln(_)"`, `"ln(One_kitty)"`, `"ln(x0)"`, and `"ln(ă)"` will all work fine.
+    * `"ln(if)"` and `"ln(🔥)"` will both fail, because `if` and `🔥` are not
+      legal Python variable names.
+    * `"ln(gamma)"` will fail, because `quickseries()` will interpret "gamma"
+      as the gamma function.
+    * `"cos(x) * cos(pi * 2)"` will succeed, but `quickseries()` will interpret
+      it as "the cosine of a variable named 'x' times the cosine of two times
+      the mathematical constant pi" -- in other words, as `"cos(x)"`.
+### limitations
+* `quickseries` only works for functions ℝ<sup>_n_</sup>🡒ℝ for finite _n_. In
+  programming terms, this means it will only produce functions that accept a
+  fixed number of floating-point or integer arguments (which may be 'arraylike'
+  objects such as pandas `Series` or numpy `ndarrays`) and return a single
+  floating-point value (or a 1-D floating-point array if passed arraylike
+  arguments).
+* `quickseries` only works consistently on functions that are continuous and
+  infinitely differentiable within the domain of interest. Specifically, they
+  should not have singularities, discontinuities, or infinite / undefined
+  values at `point` or within `bounds`. Failure cases differ:
+  * `quickseries` will always fail on functions that are infinite/undefined
+    at `point`, like `quickseries("ln(x)", point=-1)`.
+  * It will almost always fail on functions with a largeish interval of
+    infinite/undefined values within `bounds`, such as
+    `quickseries("gamma(x + y)", bounds=((-1.1, 0), (0, 1)), point=(-0.5, 0))`.
+  * It will usually succeed but produce bad results on functions with
+    singularities or point discontinuities within `bounds` or
+    near `point` but not at `point`, such as `quickseries("tan(x)", bounds=(1, 2))`.
+  * It will often succeed, but usually produce bad results, on univariate
+    functions that are continuous but not differentiable at `point`, such as
+    `quickseries("abs(sin(x))", point=0)`. It will always fail on multivariate
+    functions of this kind.
+* Functions given to `quickseries` must be expressed in strict closed form
+  and include only finite terms. They cannot contain limits, integrals,
+  derivatives, summations, continued fractions, etc.
+* `quickseries` is not guaranteed to work for all such functions.
+### tips
+* Narrowing `bounds` will tend to make the approximation more accurate within
+those bounds. In the example at the top of this README, setting `bounds` to
+`(-1, 1)` provides ~20x greater accuracy within the (-1, 1) interval (with
+the downside that the resulting approximation will get pretty bad past about
++/-pi/2).
+* Like many optimizers, `quickseries()` tends to be much more effective
+  closer to 0 and when its input arguments have similar orders of
+  magnitude. If it is practical to shift/squeeze your data towards 0, you
+  may be able to get more use out of `quickseries`. One of the biggest reasons
+  for this is that high-order polynomials are more numerically stable with
+  smaller input values.
+  * Functions with a pole at 0 can of course present an exception to this
+    rule. It will still generally be better to keep their input values small.
+* Increasing `nterms` will tend to make the approximation slower but more
+accurate. In the example above, increasing `nterms` to 14 provides ~20x
+greater accuracy but makes the approximation ~20% slower.
+  * This tends to have diminishing returns. In the example above, increasing
+  `nterms` to 30 provides no meaningful increase in accuracy over `order=14`,
+  but makes the approximation *slower* than `np.sin(x) * np.cos(x)`.
+  * Setting `nterms` too high can also cause the approximation algorithm to
+  fail entirely.
+* For most functions, placing `point` in the middle of `bounds` will produce the
+best results, and if you don't pass `point` at all, `quickseries` defaults to
+placing it in the middle of `bounds`.
+* The location of accuracy/performance "sweet spots" in the parameter space
+depends on the function and the approximation bounds. If you want to
+seriously optimize a particular function in a particular interval, you will
+need to play around with these parameters.
+* The speedup (or lack thereof) that a `quickseries()`-generated approximation
+  provides can vary greatly in different operating environments and on different
+  processors.
+  * It can also vary depending on the length of the input arguments. It generally
+    provides most benefit on arrays with tens or hundreds of thousands of elements,
+    although this again varies depending on operating environment, the particular
+    approximated function, etc.
+* In general, `quickseries` provides more performance benefits for more 'complicated'
+  input functions. This is due to the implicit 'simplification' offered by the
+  power series expansion.
+* It is often difficult to generate a polynomial approximation that
+  remains good across a wide range of input values. In some cases, it may be
+  useful to generate different functions for different parts of your code, or
+  even to perform piecewise operations with multiple functions (although this
+  of course adds complexity and overhead).
+* By default, if you pass a simple polynomial expression to `quickseries()`
+(e.g. `"x**4 + 2 * x**3"`), it does not actually generate an approximation,
+but instead simply attempts to rewrite it in a more efficient form.
+    * `nterms`, `bounds`, and `point` are ignored in this "rewrite" mode.
+    * This type of `quickseries()`-generated function should produce the same
+    results as any other Python function that straightforwardly implements a
+    form of the input polynomial (down to floating-point error).
+    * This can produce surprising speedups even in simple cases -- for example,
+    `quickseries("x**4")` is ~20x faster than `lambda x: x ** 4` on some
+    `numpy` arrays.
+    * If you want `quickseries()` to actually create an approximation of a
+    simple polynomial, pass `approx_poly=True`.
+      * When approximating a polynomial, there is generally no good reason to
+      set `nterms` > that polynomial's order. If you do, the function
+      `quickseries()` generates will typically be very similar to a simple
+      rewrite of the input polynomial, but with slightly worse performance and
+      accuracy.
+      * `point=0` often produces boring results for polynomial approximation.
+* In many, but not all, cases, `jit=True` will provide a significant performance
+  improvement, sometimes by an order of magnitude. It also permits calling
+  `quickseries`-generated functions from within other `numba`-compiled
+  functions.
+  * Note that some functions may not be compatible with `numba`.
+* `quickseries` tends to be most effective on univariate functions, mostly
+   because the number of terms in a function's power expansion increases
+   geometrically with its number of free parameters.
+* Functions generated by `quickseries()` may in some cases be less
+space/memory-efficient even if they are more time/compute-efficient.
+* By default, `quickseries` takes the analytic series expansion of the input
+  function as a strong suggestion rather than the last word on the topic, and
+  performs a numerical optimization step to improve its goodness of fit across
+  `bounds`. There are good reasons you might not want it to do this, though --
+   for instance, if your input arguments are always going to be quite close to
+  `point`, messing with the analytic series expansion may be wasteful or even
+  counterproductive. If you don't want it to do this, pass `fit_series_expansion=False`.
+  In this case, `quickseries` ignores the `bounds` argument, except to infer
+  a value for `point` if you do not specify one.
+  * In some cases, this optimization step can become numerically unstable. In
+    these cases, you may wish to experiment with constraining it rather than
+    turning it off completely. You can do this by passing `bound_series_fit=True`.
+* By default, the functions that `quickseries` generates precompute all repeated
+  exponents in the generated polynomial. This is a space-for-time trade, and
+  may not always be desirable (or even effective). You can turn this off by
+  passing `prefactor=False`.
+  * If `jit=True`, `quickseries` does _not_ do this by default. The `numba`
+    compiler implicitly performs a similar optimization, and computing these
+    terms explicitly tends to be counterproductive. If you want `quickseries`
+    to do it anyway, you can pass `prefactor=True`.
+* Specifying `precision` can lead to significant speedups and memory usage
+  improvements.
+* Many libraries and formats do not support the "half-float" values generated
+  by `quickseries` when passed `precision=16`.
+## tests
+`quickseries` has a few simple tests. You can run them by executing `pytest`
+in the repository's root directory. More comprehensive test coverage is
+planned.

quickseries-0.2.1/quickseries/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from quickseries.approximate import quickseries
+from quickseries.benchmark import benchmark
+__version__ = "0.2.1"

quickseries-0.2.1/quickseries/approximate.py ADDED Viewed

@@ -0,0 +1,319 @@
+import re
+from inspect import getfullargspec, signature
+from itertools import chain
+from typing import Literal, Optional, Sequence, Union, Collection
+import numpy as np
+import sympy as sp
+from cytoolz import groupby
+from dustgoggles.func import gmap
+from quickseries.expansions import multivariate_taylor, series_lambda
+from quickseries.simplefit import fit
+from quickseries.sourceutils import (
+    _cacheget, _cacheid, _finalize_quickseries, lastline
+)
+from quickseries.sputils import LmSig, lambdify
+"""signature of sympy-lambdified numpy/scipy functions"""
+EXP_PATTERN = re.compile(r"\w+ ?\*\* ?(\d+)")
+"""what exponentials in sympy-lambdified functions look like"""
+def is_simple_poly(expr: sp.Expr) -> bool:
+    gens = sp.poly_from_expr(expr)[1]["gens"]
+    return all(isinstance(g, sp.Symbol) for g in gens)
+def regexponents(text: str) -> tuple[int]:
+    # noinspection PyTypeChecker
+    return tuple(map(int, re.findall(EXP_PATTERN, text)))
+def _decompose(
+    remaining: tuple[str],
+    reduced: set[str],
+    replacements: list[tuple[int, list[int]]]
+) -> bool:
+    if len(remaining) == 1:  # trivial case
+        replacements[0][1][:] = [1 for _ in range(replacements[0][0])]
+        return True
+    counts = {
+        k: len(v)
+        for k, v in groupby(lambda x: x, remaining).items()
+        if k not in reduced
+    }
+    if len(counts) < 2:  # nothing useful left to do
+        return True
+    elif counts[max(counts)] > 1:
+        # don't decompose the biggest factor; because it appears more than
+        # once, we'd like to precompute it
+        reduced.add(max(counts))
+        return False
+    # otherwise, do a decomposition pass with the smallest factor
+    factor = sorted(counts.keys())[0]
+    for k, v in replacements:
+        factorization = []
+        # "divide out" `factor` from elements of existing decomposition
+        for f in v:
+            # don't decompose factors we've already evaluated, and don't
+            # try to divide `factor` out of smaller factors (nonsensical)
+            if f in reduced or f <= factor:
+                factorization.append(f)
+                continue
+            factorization.append(factor)
+            difference = f - factor
+            while difference >= max([e for e in remaining if e != f]):
+                factorization.append(factor)
+                difference = difference - factor
+            if difference > 0:
+                factorization.append(difference)
+        v[:] = factorization
+    reduced.add(factor)
+    return False
+def optimize_exponents(
+    exps: Sequence[int],
+) -> tuple[dict[int, list[int]], dict[int, list[int]]]:
+    # list of tuples like: (power, [powers to use in decomposition])
+    replacements = [(e, [e]) for e in exps]
+    # which powers have we already assessed?
+    reduced = set()
+    # which powers haven't we?
+    remaining = tuple(chain(*[r[1] for r in replacements]))
+    # NOTE: _decompose() modifies remaning and reduced inplace
+    while _decompose(remaining, reduced, replacements) is False:
+        remaining = tuple(chain(*[r[1] for r in replacements]))
+    # this is analogous to casting to set: we no longer care about number of
+    # occurrences
+    replacements = {k: v for k, v in replacements}
+    # figure out which factors we'd like to predefine as variables, and what
+    # the "building blocks" of those variables are. 1 is a placeholder: we
+    # will never define it, but it's useful in this loop.
+    variables = {1: [1]}
+    for e in sorted(set(remaining)):
+        if e == 1:
+            continue
+        if exps.count(e) == 1:
+            if not any(k > e for k, v in replacements.items()):
+                continue
+        vfactor, remainder = [], e
+        while remainder > 0:
+            pick = max([v for v in variables.keys() if v <= remainder])
+            vfactor.append(pick)
+            remainder -= pick
+        variables[e] = vfactor
+    # remove the placeholder first-order variable
+    variables.pop(1)
+    return replacements, variables
+def force_line_precision(line: str, precision: Literal[16, 32, 64]) -> str:
+    constructor_rep = f"float{precision}"
+    constructor = getattr(np, f"float{precision}")
+    last, out = 0, ""
+    for match in re.finditer(
+        r"([+* (-]+|^)([\d.]+)(e[+\-]?\d+)?.*?([+* )]|$)", line
+    ):
+        out += line[last : match.span()[0]]
+        # don't replace exponents
+        if match.group(1) == "**":
+            out += line[slice(*match.span())]
+        else:
+            # NOTE: casting number to string within the f-string statement
+            # appears to upcast it before generating the representation.
+            number = str(constructor(float(match.group(2))))
+            out += f"{match.group(1)}{constructor_rep}({number}"
+            if match.group(3) is not None:  # scientific notation
+                out += match.group(3)
+            out += f"){match.group(4)}"
+        last = match.span()[1]
+    return out + line[last:]
+def rewrite(
+    poly_lambda: LmSig,
+    precompute: bool = True,
+    precision: Optional[Literal[16, 32, 64]] = None,
+) -> str:
+    # sympy will always place this on a single line. it includes
+    # the Python expression form of the hornerized polynomial
+    # and a return statement. lastline() grabs polynomial and strips return.
+    polyexpr = lastline(poly_lambda)
+    # remove pointless '1.0' terms
+    polyexpr = re.sub(r"(?:\*+)?1\.0\*+", "", polyexpr)
+    # names of arguments to the lambdified function
+    free = getfullargspec(poly_lambda).args
+    lines = []
+    if precompute is True:
+        polyexpr, factorlines = _rewrite_precomputed(polyexpr, free)
+        lines += factorlines
+    if precision is not None:
+        polyexpr = force_line_precision(polyexpr, precision)
+    lines.append(f"return {polyexpr}")
+    _, key = _cacheid()
+    lines.insert(0, f"def {key}({', '.join(free)}):")
+    return "\n    ".join(lines)
+def _rewrite_precomputed(
+    polyexpr: str, free: Collection[str]
+) -> tuple[str, list[str]]:
+    # replacements: what factors we will decompose each exponent into
+    # free: which factors we will define as variables, and their
+    # "building blocks"
+    factorlines = []
+    for f in free:
+        expat = re.compile(rf"{f}+ ?\*\* ?(\d+)")
+        replacements, variables = optimize_exponents(
+            gmap(int, expat.findall(polyexpr))
+        )
+        for k, v in variables.items():
+            multiplicands = []
+            for power in v:
+                if power == 1:
+                    multiplicands.append(f)
+                else:
+                    multiplicands.append(f"{f}{power}")
+            factorlines.append(f"{f}{k} = {'*'.join(multiplicands)}")
+        for k, v in replacements.items():
+            substitution = "*".join([f"{f}{r}" if r != 1 else f for r in v])
+            polyexpr = polyexpr.replace(f"{f}**{k}", substitution)
+    return polyexpr, factorlines
+def _pvec(
+    bounds: Sequence[tuple[float, float]], offset_resolution: int
+) -> list[np.ndarray]:
+    axes = [np.linspace(*b, offset_resolution) for b in bounds]
+    indices = map(np.ravel, np.indices([offset_resolution for _ in bounds]))
+    return [j[i] for j, i in zip(axes, indices)]
+def _perform_series_fit(
+    func: str | sp.Expr,
+    bounds: tuple[float, float] | Sequence[tuple[float, float]],
+    nterms: int,
+    fitres: int,
+    point: float | Sequence[float],
+    apply_bounds: bool,
+    is_poly: bool
+) -> tuple[sp.Expr, np.ndarray]:
+    if (len(bounds) == 1) and (is_poly is False):
+        approx, expr = series_lambda(func, point[0], nterms, True)
+    else:
+        approx, expr = multivariate_taylor(func, point, nterms, True)
+    lamb, vecs = lambdify(func), _pvec(bounds, fitres)
+    try:
+        dep = lamb(*vecs)
+    except TypeError as err:
+        # this is a potentially slow but unavoidable case
+        if "converted to Python scalars" not in str(err):
+            raise
+        dep = np.array([lamb(v) for v in vecs])
+    guess = [1 for _ in range(len(signature(approx).parameters) - len(vecs))]
+    params, _ = fit(
+        func=approx,
+        vecs=vecs,
+        dependent_variable=dep,
+        guess=guess,
+        bounds=(-5, 5) if apply_bounds is True else None,
+    )
+    # insert coefficients into polynomial
+    expr = expr.subs({f"a_{i}": coef for i, coef in enumerate(params)})
+    return expr, params
+def _makebounds(
+    bounds: Optional[Sequence[tuple[float, float]] | tuple[float, float]],
+    n_free: int,
+    point: Optional[Sequence[float] | float]
+) -> tuple[list[tuple[float, float]], list[float]]:
+    bounds = (-1, 1) if bounds is None else bounds
+    if not isinstance(bounds[0], (list, tuple)):
+        bounds = [bounds for _ in range(n_free)]
+    if point is None:
+        point = [np.mean(b) for b in bounds]
+    elif not isinstance(point, (list, tuple)):
+        point = [point for _ in bounds]
+    return bounds, point
+def _make_quickseries(
+    approx_poly: bool,
+    bound_series_fit: bool,
+    bounds: Optional[Sequence[tuple[float, float]] | tuple[float, float]],
+    expr: sp.Expr,
+    fit_series_expansion: bool,
+    fitres: int,
+    nterms: int,
+    point: Optional[Sequence[float] | float],
+    precision: Optional[Literal[16, 32, 64]],
+    prefactor: bool,
+) -> dict[str, sp.Expr | np.ndarray | str]:
+    if len(expr.free_symbols) == 0:
+        raise ValueError("func must have at least one free variable.")
+    free = sorted(expr.free_symbols, key=lambda s: str(s))
+    bounds, point = _makebounds(bounds, len(free), point)
+    output, is_poly = {}, is_simple_poly(expr)
+    if (approx_poly is True) or (is_poly is False):
+        if fit_series_expansion is True:
+            expr, output["params"] = _perform_series_fit(
+                expr, bounds, nterms, fitres, point, bound_series_fit, is_poly
+            )
+        elif (len(free) > 1) or (is_poly is True):
+            _, expr = multivariate_taylor(expr, point, nterms, False)
+        else:
+            _, expr = series_lambda(expr, point[0], nterms, False)
+    # rewrite polynomial in horner form for fast evaluation
+    output["expr"] = sp.horner(expr)
+    polyfunc = sp.lambdify(free, output["expr"], ("scipy", "numpy"))
+    # polish it and optionally rewrite it to precompute repeated powers or
+    # force precision
+    return output | {"source": rewrite(polyfunc, prefactor, precision)}
+def quickseries(
+    func: Union[str, sp.Expr],
+    *,
+    bounds: tuple[float, float] = (-1, 1),
+    nterms: int = 9,
+    point: Optional[float] = None,
+    fitres: int = 100,
+    prefactor: Optional[bool] = None,
+    approx_poly: bool = False,
+    jit: bool = False,
+    precision: Optional[Literal[16, 32, 64]] = None,
+    fit_series_expansion: bool = True,
+    bound_series_fit: bool = False,
+    extended_output: bool = False,
+    cache: bool = True,
+) -> Union[LmSig, tuple[LmSig, dict]]:
+    if not isinstance(func, (str, sp.Expr)):
+        raise TypeError(f"Unsupported type for func {type(func)}.")
+    polyfunc, ext = None, {"cache": "off"}
+    if cache is True:
+        polyfunc, source = _cacheget(jit)
+        if polyfunc is not None:
+            ext |= {"source": source, "cache": "hit"}
+        else:
+            ext["cache"] = "miss"
+    if polyfunc is None:
+        ext |= _make_quickseries(
+            approx_poly,
+            bound_series_fit,
+            bounds,
+            func if isinstance(func, sp.Expr) else sp.sympify(func),
+            fit_series_expansion,
+            fitres,
+            nterms,
+            point,
+            precision,
+            prefactor if prefactor is not None else not jit
+        )
+        polyfunc = _finalize_quickseries(ext["source"], jit, cache)
+    if extended_output is True:
+        return polyfunc, ext
+    return polyfunc

quickseries-0.2.1/quickseries/benchmark.py ADDED Viewed

@@ -0,0 +1,108 @@
+import timeit
+from inspect import getfullargspec
+from itertools import product
+from time import time
+from typing import Union, Sequence, Optional
+import numpy as np
+import sympy as sp
+from dustgoggles.func import gmap
+from quickseries import quickseries
+from quickseries.approximate import _makebounds
+from quickseries.sputils import lambdify, LmSig
+def _offset_check_cycle(
+    absdiff: float,
+    frange: tuple[float, float],
+    lamb: LmSig,
+    quick: LmSig,
+    vecs: Sequence[np.ndarray],
+    worstpoint: Optional[list[float]],
+) -> tuple[float, float, float, tuple[float, float], list[float]]:
+    approx_y, orig_y = quick(*vecs), lamb(*vecs)
+    frange = (min(orig_y.min(), frange[0]), max(orig_y.max(), frange[1]))
+    offset = abs(approx_y - orig_y)
+    worstix = np.argmax(offset)
+    if (new_absdiff := offset[worstix]) > absdiff:
+        absdiff = new_absdiff
+        worstpoint = [v[worstix] for v in vecs]
+    return absdiff, np.median(offset), np.mean(offset ** 2), frange, worstpoint
+def benchmark(
+    func: Union[str, sp.Expr, sp.core.function.FunctionClass],
+    offset_resolution: int = 10000,
+    n_offset_shuffles: int = 50,
+    timeit_cycles: int = 20000,
+    testbounds="equal",
+    cache: bool = False,
+    **quickkwargs
+) -> dict[str, sp.Expr | float | np.ndarray | str | list[float]]:
+    lamb = lambdify(func)
+    compile_start = time()
+    quick, ext = quickseries(
+        func, **(quickkwargs | {'extended_output': True, 'cache': cache})
+    )
+    gentime = time() - compile_start
+    if testbounds == "equal":
+        testbounds, _ = _makebounds(
+            quickkwargs.get("bounds"), len(getfullargspec(lamb).args), None
+        )
+    vecs = [np.linspace(*b, offset_resolution) for b in testbounds]
+    if (pre := quickkwargs.get("precision")) is not None:
+        vecs = gmap(
+            lambda arr: arr.astype(getattr(np, f"float{pre}")), vecs
+        )
+    if len(testbounds) > 1:
+        # always check the extrema of the bounds
+        extrema = [[] for _ in vecs]
+        for p in product((-1, 1), repeat=len(vecs)):
+            for i, side in enumerate(p):
+                extrema[i].append(vecs[i][side])
+        extrema = [np.array(e) for e in extrema]
+        absdiff, _, __, frange, worstpoint = _offset_check_cycle(
+            0, (np.inf, -np.inf), lamb, quick, extrema, None
+        )
+        medians, mses = [], []
+        for _ in range(n_offset_shuffles):
+            gmap(np.random.shuffle, vecs)
+            absdiff, mediff, mse, frange, worstpoint = _offset_check_cycle(
+                absdiff, frange, lamb, quick, vecs, worstpoint
+            )
+            medians.append(mediff)
+            mses.append(mse)
+        mediff, mse = np.median(medians), np.median(mses)
+    # no point in shuffling for 1D -- we're doing that for > 1D
+    # because it becomes quickly unreasonable in terms of memory
+    # to be exhaustive, but this _is_ exhaustive for 1D
+    else:
+        approx_y, orig_y = quick(*vecs), lamb(*vecs)
+        frange = (orig_y.min(), orig_y.max())
+        offset = abs(approx_y - orig_y)
+        worstix = np.argmax(offset)
+        absdiff = offset[worstix]
+        mediff = np.median(offset)
+        mse = np.mean(offset ** 2)
+        worstpoint = [vecs[0][worstix]]
+        del offset, orig_y, approx_y
+    # TODO: should probably permit specifying dtype for jitted
+    #  functions -- both here and in primary quickseries().
+    approx_time = timeit.timeit(lambda: quick(*vecs), number=timeit_cycles)
+    orig_time = timeit.timeit(lambda: lamb(*vecs), number=timeit_cycles)
+    orig_s = orig_time / timeit_cycles
+    approx_s = approx_time / timeit_cycles
+    return {
+        'absdiff': absdiff,
+        'reldiff': absdiff / np.ptp(frange),
+        'mediff': mediff,
+        'mse': mse,
+        'worstpoint': worstpoint,
+        'range': frange,
+        'orig_s': orig_s,
+        'approx_s': approx_s,
+        'timeratio': approx_s / orig_s,
+        'gentime': gentime,
+        'polyfunc': quick
+    } | ext

quickseries-0.2.1/quickseries/expansions.py ADDED Viewed

@@ -0,0 +1,126 @@
+from functools import reduce
+from typing import Union, Sequence
+from dustgoggles.structures import listify
+import sympy as sp
+from quickseries.sputils import LmSig
+def _rectify_series(series, add_coefficients):
+    if isinstance(series, sp.Order):
+        raise ValueError(
+            "Cannot produce a meaningful approximation with the requested "
+            "parameters (most likely order is too low)."
+        )
+    outargs, coefsyms = [], []
+    for a in series.args:
+        # NOTE: the Expr.evalf() calls are simply to try to evaluate
+        #  anything we can.
+        if hasattr(a, "evalf") and isinstance((n := a.evalf()), sp.Number):
+            outargs.append(n)
+        elif isinstance(a, sp.Order):
+            continue
+        elif isinstance(a, (sp.Mul, sp.Symbol, sp.Pow)):
+            if add_coefficients is True:
+                coefficient = sp.symbols(f"a_{len(coefsyms)}")
+                outargs.append((coefficient * a).evalf())
+                coefsyms.append(coefficient)
+            else:
+                outargs.append(a.evalf())
+        else:
+            raise ValueError(
+                f"don't know how to handle expression element {a} of "
+                f"type({type(a)})"
+            )
+    return sum(outargs), coefsyms
+def series_lambda(
+    func: Union[str, sp.Expr],
+    x0: float = 0,
+    nterms: int = 9,
+    add_coefficients: bool = False,
+    modules: Union[str, Sequence[str]] = ("scipy", "numpy")
+) -> tuple[LmSig, sp.Expr]:
+    """
+    Construct a power expansion of a sympy Expr or the string expression of a
+    function; optionally, add free coefficients to the terms of the resulting
+    polynomial to permit optimization by downstream functions.
+    Args:
+        func: Mathematical function to expand, expressed as a string or a
+            sympy Expr.
+        x0: Point about which to expand func.
+        nterms: Order of power expansion.
+        add_coefficients: If True, add additional arguments/symbols to the
+            returned function and Expr corresponding to the polynomial's
+            coefficients.
+        modules: Modules from which to draw the building blocks of the
+            returned function.
+    Returns:
+        approximant: Python function that implements the power expansion.
+        expr: sympy Expr used to construct approximant.
+    """
+    func = sp.sympify(func) if isinstance(func, str) else func
+    # limiting precision of x0 is necessary due to a bug in sp.series
+    series = sp.series(func, x0=round(x0, 6), n=nterms)
+    # noinspection PyTypeChecker
+    # remove Order (limit behavior) terms, try to split constants from
+    # polynomial terms
+    expr, coefsyms = _rectify_series(series, add_coefficients)
+    syms = sorted(func.free_symbols, key=lambda x: str(x))
+    # noinspection PyTypeChecker
+    return sp.lambdify(syms + coefsyms, expr, modules), expr
+def additive_combinations(n_terms, number):
+    if n_terms == 1:
+        return [(n,) for n in range(number + 1)]
+    combinations = []  # NOTE: this is super gross-looking written as a chain
+    for j in range(number + 1):
+        combinations += [
+            (j, *t)
+            for t in additive_combinations(n_terms - 1, number - j)
+        ]
+    return combinations
+def multivariate_taylor(
+    func: Union[str, sp.Expr],
+    point: Sequence[float],
+    nterms: int,
+    add_coefficients: bool = False
+) -> tuple[LmSig, sp.Expr]:
+    func = sp.sympify(func) if isinstance(func, str) else func
+    pointsyms = sorted(func.free_symbols, key=lambda s: str(s))
+    dimensionality = len(pointsyms)
+    argsyms = listify(
+        sp.symbols(",".join([f"x{i}" for i in range(dimensionality)]))
+    )
+    ixsyms = listify(
+        sp.symbols(",".join(f"i{i}" for i in range(dimensionality)))
+    )
+    deriv = sp.Derivative(func, *[(p, i) for p, i in zip(pointsyms, ixsyms)])
+    # noinspection PyTypeChecker
+    fact = reduce(sp.Mul, [sp.factorial(i) for i in ixsyms])
+    err = reduce(
+        sp.Mul,
+        [(x - a) ** i for x, a, i in zip(argsyms, pointsyms, ixsyms)]
+    )
+    taylor = deriv / fact * err
+    # TODO, probably: there's a considerably faster way to do this in some
+    #  cases by precomputing partial derivatives
+    decomp = additive_combinations(dimensionality, nterms - 1)
+    built = reduce(
+        sp.Add,
+        (taylor.subs({i: d for i, d in zip(ixsyms, d)}) for d in decomp)
+    ).doit()
+    evaluated = built.subs({s: p for s, p in zip(pointsyms, point)}).evalf()
+    # this next line is kind of aesthetic -- we just want the argument names
+    # to be consistent with the input
+    evaluated = evaluated.subs({a: p for a, p in zip(argsyms, pointsyms)})
+    evaluated, coefsyms = _rectify_series(evaluated, add_coefficients)
+    # noinspection PyTypeChecker
+    return sp.lambdify(pointsyms + coefsyms, evaluated), evaluated

quickseries-0.2.1/quickseries/simplefit.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""lightweight version of `moonbow`'s polynomial fit functionality"""
+from functools import wraps
+from inspect import Parameter, signature
+from typing import Callable, Optional, Sequence, Union
+import numpy as np
+from scipy.optimize import curve_fit
+def fit_wrap(
+    func: Callable[[np.ndarray | float, ...], np.ndarray | float],
+    dimensionality: int,
+    fit_parameters: Sequence[str]
+) -> Callable[[np.ndarray | float, ...], np.ndarray | float]:
+    @wraps(func)
+    def wrapped_fit(independent_variable, *params):
+        variable_components = [
+            independent_variable[n] for n in range(dimensionality)
+        ]
+        exploded_function = func(*variable_components, *params)
+        return exploded_function
+    # rewrite the signature so that curve_fit will like it
+    sig = signature(wrapped_fit)
+    curve_fit_params = (
+        Parameter("independent_variable", Parameter.POSITIONAL_ONLY),
+        *fit_parameters,
+    )
+    wrapped_fit.__signature__ = sig.replace(parameters=curve_fit_params)
+    return wrapped_fit
+def fit(
+    func: Callable,
+    vecs: list[np.ndarray],
+    dependent_variable: np.ndarray,
+    guess: Optional[Sequence[float]] = None,
+    bounds: Optional[
+        Union[tuple[tuple[float, float]], tuple[float, float]]
+    ] = None
+) -> tuple[np.ndarray, np.ndarray]:
+    sig = signature(func)
+    assert len(vecs) < len(sig.parameters), (
+        "The model function must have at least one 'free' "
+        "parameter to be a meaningful candidate for fitting."
+    )
+    fit_parameters = [
+        item
+        for ix, item in enumerate(sig.parameters.values())
+        if ix >= len(vecs)
+    ]
+    # TODO: check dim of dependent
+    if not all(p.ndim == 1 for p in vecs):
+        raise ValueError("each input vector must be 1-dimensional")
+    # TODO: optional goodness-of-fit evaluation
+    kw = {'bounds': bounds} if bounds is not None else {}
+    # noinspection PyTypeChecker
+    return curve_fit(
+        fit_wrap(func, len(vecs), fit_parameters),
+        vecs,
+        dependent_variable,
+        maxfev=20000,
+        p0=guess,
+        **kw
+    )

quickseries-0.2.1/quickseries/sourceutils.py ADDED Viewed

@@ -0,0 +1,133 @@
+import pickle
+from hashlib import md5
+from inspect import currentframe, getargvalues, getsource
+import linecache
+from pathlib import Path
+import re
+from types import FunctionType
+from typing import Callable
+from dustgoggles.dynamic import define, get_codechild
+CACHE_ARGS = (
+    "func",
+    "bounds",
+    "nterms",
+    "point",
+    "fitres",
+    "prefactor",
+    "approx_poly",
+    "precision",
+    "fit_series_expansion",
+    "bound_series_fit"
+)
+def cache_source(source: str, fn: Path):
+    fn = str(fn)
+    linecache.cache[fn] = (len(source), None, source.splitlines(True), fn)
+# TODO: pull this little bitty change up to dustgoggles
+def compile_source(source: str, fn: str = ""):
+    return get_codechild(compile(source, fn, "exec"))
+def _cachedir(callfile: str) -> Path:
+    if callfile == 'ipython_shell':
+        import IPython.paths
+        return Path(IPython.paths.get_ipython_cache_dir()) / "qs_cache"
+    return Path(callfile).parent / "__pycache__" / "qs_cache"
+def _cachekey(args, callfile=None):
+    from quickseries import __version__
+    # TODO: is this actually stable?
+    arghash = pickle.dumps(
+        {a: args.locals[a] for a in sorted(CACHE_ARGS)}
+        | {'f': callfile, '__version__': __version__}
+    )
+    # arbitrary cutoff for a reasonable tradeoff between collision safety and
+    # readability
+    return f"quickseries_{md5(arghash).hexdigest()}"[:-18]
+# TODO, maybe: the frame traversal is potentially wasteful when repeated,
+#  although it probably doesn't matter too much.
+def _cacheid():
+    """
+    WARNING: do not call this outside the normal quickseries workflow. It can
+     be tricked, but to no good end.
+    """
+    frame, callfile, args = currentframe(), None, None
+    while callfile is None:
+        frame = frame.f_back
+        if frame is None or frame.f_code.co_filename == "<stdin>":
+            callfile = "__quickseries_anonymous_caller_cache__/anonymous"
+        elif hasattr(frame.f_code, "co_name"):
+            if args is None and frame.f_code.co_name == "quickseries":
+                args = getargvalues(frame)
+            elif frame.f_code.co_name == "benchmark":
+                continue
+            elif args is not None:
+                callfile = frame.f_code.co_filename
+                if re.search(r"interactiveshell.py|ipython", callfile):
+                    callfile = 'ipython_shell'
+    if args is None:
+        raise ReferenceError("Cannot use _cachefile() outside quickseries().")
+    key = _cachekey(args, callfile)
+    return _cachedir(callfile) / key / "func", key
+def _compile_quickseries(source, jit, cache, cachefile):
+    globals_ = globals()
+    if (precmatch := re.search(r"float\d\d", source)) is not None:
+        import numpy
+        globals_[precmatch.group()] = getattr(numpy, precmatch.group())
+    func = FunctionType(compile_source(source, str(cachefile)), globals_)
+    cache_source(source, cachefile)
+    func.__doc__ = source
+    if jit is True:
+        import numba as nb
+        return nb.njit(func, cache=cache)
+    return func
+def _cacheget(jit=False):
+    cachefile, key = _cacheid()
+    if not cachefile.exists():
+        return None, None
+    with cachefile.open() as stream:
+        source = stream.read()
+    return _compile_quickseries(source, jit, True, cachefile), source
+def _cachewrite(source, cachefile):
+    # we make the __pycache__ directory to enable numba JIT result caching,
+    # just in case it happens; if it doesn't, the presence of the directory is
+    # harmless.
+    (cachefile.parent / "__pycache__").mkdir(exist_ok=True, parents=True)
+    # TODO, maybe: use a more sensible data structure
+    with cachefile.open("w") as stream:
+        stream.write(source)
+def _finalize_quickseries(source, jit=False, cache=False):
+    # note that we use this as a function identifier and 'fake' target for
+    # linecache even if we're not actually using the quickseries cache
+    cachefile, key = _cacheid()
+    if cache is True:
+        _cachewrite(source, cachefile)
+    return _compile_quickseries(source, jit, cache, cachefile)
+def lastline(func: Callable) -> str:
+    """try to get the last line of a function, sans return statement"""
+    return tuple(
+        filter(None, getsource(func).split("\n"))
+    )[-1].replace("return", "").strip()

quickseries-0.2.1/quickseries/sputils.py ADDED Viewed

@@ -0,0 +1,26 @@
+from typing import Any, Callable, Sequence, Union
+import numpy as np
+import sympy as sp
+LmSig = Callable[[np.ndarray | float, ...], np.ndarray | float]
+def lambdify(
+    func: Union[str, sp.Expr],
+    modules: Union[str, Sequence[str]] = ("scipy", "numpy")
+) -> LmSig:
+    """
+    Transform a sympy Expr or a string representation of a function into a
+    callable with enforced argument order, incorporating code from specified
+    modules.
+    """
+    if isinstance(func, str):
+        try:
+            func = sp.sympify(func)
+        except sp.SympifyError:
+            raise ValueError(f"Unable to parse {func}.")
+    # noinspection PyTypeChecker
+    return sp.lambdify(
+        sorted(func.free_symbols, key=lambda x: str(x)), func, modules
+    )

quickseries-0.2.1/quickseries.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,16 @@
+Metadata-Version: 2.1
+Name: quickseries
+Version: 0.2.1
+Home-page: https://github.com/millionconcepts/quickseries.git
+Author: Michael St. Clair
+Author-email: mstclair@millionconcepts.com
+Requires-Python: >=3.11
+License-File: LICENSE
+Requires-Dist: dustgoggles
+Requires-Dist: numpy
+Requires-Dist: scipy
+Requires-Dist: sympy
+Provides-Extra: jit
+Requires-Dist: numba; extra == "jit"
+Provides-Extra: tests
+Requires-Dist: pytest; extra == "tests"

quickseries-0.2.1/quickseries.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,15 @@
+LICENSE
+README.md
+setup.py
+quickseries/__init__.py
+quickseries/approximate.py
+quickseries/benchmark.py
+quickseries/expansions.py
+quickseries/simplefit.py
+quickseries/sourceutils.py
+quickseries/sputils.py
+quickseries.egg-info/PKG-INFO
+quickseries.egg-info/SOURCES.txt
+quickseries.egg-info/dependency_links.txt
+quickseries.egg-info/requires.txt
+quickseries.egg-info/top_level.txt

quickseries-0.2.1/quickseries.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

quickseries-0.2.1/quickseries.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,10 @@
+dustgoggles
+numpy
+scipy
+sympy
+[jit]
+numba
+[tests]
+pytest

quickseries-0.2.1/quickseries.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ quickseries

quickseries-0.2.1/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

quickseries-0.2.1/setup.py ADDED Viewed

@@ -0,0 +1,13 @@
+from setuptools import find_packages, setup
+setup(
+    name="quickseries",
+    version="0.2.1",
+    packages=find_packages(),
+    url="https://github.com/millionconcepts/quickseries.git",
+    author="Michael St. Clair",
+    author_email="mstclair@millionconcepts.com",
+    python_requires=">=3.11",
+    install_requires=["dustgoggles", "numpy", "scipy", "sympy"],
+    extras_require={"jit": "numba", "tests": "pytest"}
+)