PyPI - max-div - Versions diffs - 0.0.3__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

max-div 0.0.3py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

max_div/_cli.py +99 -0
max_div/benchmark/__init__.py +2 -1
max_div/benchmark/_formatting.py +218 -0
max_div/benchmark/randint.py +104 -0
max_div/benchmark/randint_constrained.py +355 -0
max_div/constraints/__init__.py +2 -0
max_div/constraints/_numba.py +110 -0
max_div/constraints/constraint.py +10 -0
max_div/constraints/constraints.py +47 -0
max_div/internal/benchmarking/_micro_benchmark.py +48 -7
max_div/internal/formatting/__init__.py +1 -0
max_div/internal/formatting/_markdown.py +43 -0
max_div/internal/math/__init__.py +1 -0
max_div/internal/math/fast_log.py +167 -0
max_div/internal/math/random.py +166 -0
max_div/internal/math/select_k_minmax.py +250 -0
max_div/sampling/__init__.py +1 -1
max_div/sampling/con.py +350 -0
max_div/sampling/uncon.py +269 -0
{max_div-0.0.3.dist-info → max_div-0.1.1.dist-info}/METADATA +13 -8
max_div-0.1.1.dist-info/RECORD +32 -0
max_div-0.1.1.dist-info/entry_points.txt +2 -0
max_div/benchmark/sample_int.py +0 -85
max_div/internal/compat/__init__.py +0 -1
max_div/internal/compat/_numba/__init__.py +0 -14
max_div/internal/compat/_numba/_dummy_numba.py +0 -94
max_div/internal/compat/_numba/_helpers.py +0 -14
max_div/sampling/discrete.py +0 -176
max_div-0.0.3.dist-info/RECORD +0 -23
max_div-0.0.3.dist-info/entry_points.txt +0 -2
{max_div-0.0.3.dist-info → max_div-0.1.1.dist-info}/WHEEL +0 -0
{max_div-0.0.3.dist-info → max_div-0.1.1.dist-info}/licenses/LICENSE +0 -0

max_div/_cli.py ADDED Viewed

@@ -0,0 +1,99 @@
+"""Command-line interface for max-div."""
+import click
+from max_div.benchmark import benchmark_randint as _benchmark_randint
+from max_div.benchmark import benchmark_randint_constrained as _benchmark_randint_constrained
+# -------------------------------------------------------------------------
+#  Main CLI Group
+# -------------------------------------------------------------------------
+@click.group()
+def cli():
+    """max-div: Flexible Solver for Maximum Diversity Problems with Fairness Constraints."""
+    pass
+# -------------------------------------------------------------------------
+#  Benchmarking Commands
+# -------------------------------------------------------------------------
+@cli.group()
+@click.option(
+    "--turbo",
+    is_flag=True,
+    default=False,
+    help="Run shorter, less accurate benchmark; identical to --speed=1.0; intended for testing purposes.",
+)
+@click.option(
+    "--speed",
+    default=0.0,
+    help="Values closer to 1.0 result in shorter, less accurate benchmark; Overridden by --turbo when provided.",
+)
+@click.option(
+    "--markdown",
+    is_flag=True,
+    default=False,
+    help="Output benchmark results in Markdown table format.",
+)
+@click.pass_context
+def benchmark(ctx, turbo: bool, speed: float, markdown: bool):
+    """Benchmarking commands."""
+    # Store flags in context so subcommands can access them
+    ctx.ensure_object(dict)
+    if turbo:
+        ctx.obj["speed"] = 1.0
+    else:
+        ctx.obj["speed"] = speed
+    ctx.obj["markdown"] = markdown
+@benchmark.command(name="randint")
+@click.pass_context
+def randint(ctx):
+    """Benchmarks the `randint` function from `max_div.sampling.uncon`."""
+    speed = ctx.obj["speed"]
+    markdown = ctx.obj["markdown"]
+    _benchmark_randint(speed=speed, markdown=markdown)
+@benchmark.command(name="randint_constrained")
+@click.pass_context
+def randint_constrained(ctx):
+    """Benchmarks the `randint_constrained` function from `max_div.sampling.con`."""
+    speed = ctx.obj["speed"]
+    markdown = ctx.obj["markdown"]
+    _benchmark_randint_constrained(speed=speed, markdown=markdown)
+# -------------------------------------------------------------------------
+#  Misc Commands
+# -------------------------------------------------------------------------
+@cli.command()
+def numba_status():
+    """Show Numba version, llvmlite version, and configuration including SVML status."""
+    import llvmlite
+    import numba
+    click.echo(f"Numba version    : {numba.__version__}")
+    click.echo(f"llvmlite version : {llvmlite.__version__}")
+    # Show key configuration settings
+    from numba import config
+    click.echo("\nNumba Configuration:")
+    click.echo("-" * 50)
+    click.echo(f"SVML enabled       : {config.USING_SVML}")
+    click.echo(f"Threading layer    : {config.THREADING_LAYER}")
+    click.echo(f"Number of threads  : {config.NUMBA_NUM_THREADS}")
+    click.echo(f"Optimization level : {config.OPT}")
+    click.echo(f"Debug mode         : {config.DEBUG}")
+    click.echo(f"Disable JIT        : {config.DISABLE_JIT}")
+    click.echo("-" * 50)
+# -------------------------------------------------------------------------
+#  Entrypoint
+# -------------------------------------------------------------------------
+if __name__ == "__main__":
+    cli()

max_div/benchmark/__init__.py CHANGED Viewed

@@ -1 +1,2 @@
-from .sample_int import benchmark_sample_int
+from .randint import benchmark_randint
+from .randint_constrained import benchmark_randint_constrained

max_div/benchmark/_formatting.py ADDED Viewed

@@ -0,0 +1,218 @@
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Literal
+from max_div.internal.benchmarking import BenchmarkResult
+from max_div.internal.formatting import md_bold, md_colored, md_table
+# =================================================================================================
+#  Helper classes / types
+# =================================================================================================
+@dataclass
+class Percentage:
+    frac: float  # fraction between 0.0 and 1.0
+    decimals: int = 1  # number of decimals to display
+    def __str__(self):
+        return f"{(self.frac * 100):.{self.decimals}f}%"
+CellContent = str | BenchmarkResult | Percentage
+# =================================================================================================
+#  Aggregation
+# =================================================================================================
+def extend_table_with_aggregate_row(
+    data: list[list[CellContent]],
+    agg: Literal["mean", "geomean", "sum"],
+    include_benchmark_result: bool = True,
+    include_percentage: bool = True,
+) -> list[list[CellContent]]:
+    """
+    This function adds aggregate statistics for BenchmarkResult | Percentage (=Aggregatable) columns to the data table.
+    Extend an extra row to the provided data that contains aggregate statistics of the provided data:
+     - for each column that has at least 1 row containing a Aggregatable object, compute an aggregate
+     - all other columns are kept empty
+    The last column not containing any Aggregatable objects that comes before the first column containing
+      Aggregatable objects is used as label for the aggregate row, based on the 'agg' argument, capitalized.
+    BenchmarkResults are aggregated by aggregation the q25, q50, and q75 times separately.
+    Percentage objects are aggregated with decimals equal to max of what we observed in that col.
+    """
+    n_cols = len(data[0])
+    Aggregatable = BenchmarkResult | Percentage
+    # Identify which columns contain Aggregatable objects
+    has_aggregatable = [False] * n_cols
+    for row in data:
+        for col_idx, cell in enumerate(row):
+            if isinstance(cell, Aggregatable):
+                has_aggregatable[col_idx] = True
+    # Find the first column with Aggregatable objects
+    first_aggregatable_col = None
+    for col_idx, has_result in enumerate(has_aggregatable):
+        if has_result:
+            first_aggregatable_col = col_idx
+            break
+    # Find the last non-Aggregatable column before the first Aggregatable column
+    label_col = None
+    for col_idx in range(first_aggregatable_col - 1, -1, -1):
+        if not has_aggregatable[col_idx]:
+            label_col = col_idx
+            break
+    # Create the aggregate row
+    agg_row: list[CellContent] = [""] * n_cols
+    # Set the label if we found a label column
+    if label_col is not None:
+        agg_row[label_col] = agg.capitalize() + ":"
+    # Compute aggregates for each column with BenchmarkResult objects
+    for col_idx in range(n_cols):
+        if include_benchmark_result:
+            # Collect all BenchmarkResult values from this column
+            results = [row[col_idx] for row in data if isinstance(row[col_idx], BenchmarkResult)]
+            if results:  # Only compute if we have values
+                agg_row[col_idx] = BenchmarkResult.aggregate(results, method=agg)
+        if include_percentage:
+            # Collect all Percentage values from this column
+            percentages = [row[col_idx] for row in data if isinstance(row[col_idx], Percentage)]
+            if percentages:  # Only compute if we have values
+                # Compute average fraction and max decimals
+                avg_frac = sum(p.frac for p in percentages) / len(percentages)
+                max_decimals = max(p.decimals for p in percentages)
+                agg_row[col_idx] = Percentage(frac=avg_frac, decimals=max_decimals + 1)
+    # Return data with the aggregate row appended
+    return data + [agg_row]
+# =================================================================================================
+#  Markdown highlighters
+# =================================================================================================
+class HighLighter(ABC):
+    @abstractmethod
+    def process_row(self, row: list[CellContent]) -> list[CellContent]:
+        raise NotImplementedError()
+class FastestBenchmark(HighLighter):
+    def __init__(self, bold: bool = True, color: str = "#00aa00"):
+        self.bold = bold
+        self.color = color
+    def process_row(self, row: list[CellContent]) -> list[CellContent]:
+        if any(isinstance(value, BenchmarkResult) for value in row):
+            # Find the fastest BenchmarkResult (minimum median time)
+            t_q50_min = min([value.t_sec_q_50 for value in row if isinstance(value, BenchmarkResult)])
+            # Convert row to strings, highlighting the results with t_q25 <= t_q50_min
+            converted_row: list[CellContent] = []
+            for i, value in enumerate(row):
+                if isinstance(value, BenchmarkResult):
+                    text = str(value)
+                    if value.t_sec_q_25 <= t_q50_min:
+                        if self.bold:
+                            text = md_bold(text)
+                        text = md_colored(text, self.color)
+                    converted_row.append(text)
+                else:
+                    converted_row.append(value)
+            return converted_row
+        else:
+            return row
+class HighestPercentage(HighLighter):
+    def __init__(self, bold: bool = True, color: str = "#00aa00"):
+        self.bold = bold
+        self.color = color
+    def process_row(self, row: list[CellContent]) -> list[CellContent]:
+        if any(isinstance(value, Percentage) for value in row):
+            # Find the highest Percentage (maximum frac)
+            max_perc = max([value for value in row if isinstance(value, Percentage)], key=lambda x: x.frac)
+            # Convert row to strings, highlighting the results with frac == max_frac
+            converted_row: list[CellContent] = []
+            for i, value in enumerate(row):
+                if isinstance(value, Percentage):
+                    text = str(value)
+                    if text == str(max_perc):  # make green if its str-representation is equal
+                        if self.bold:
+                            text = md_bold(text)
+                        text = md_colored(text, self.color)
+                    converted_row.append(text)
+                else:
+                    converted_row.append(value)
+            return converted_row
+        else:
+            return row
+class BoldLabels(HighLighter):
+    def process_row(self, row: list[CellContent]) -> list[CellContent]:
+        converted_row: list[CellContent] = []
+        for value in row:
+            if isinstance(value, str) and value.endswith(":"):
+                converted_row.append(md_bold(value))
+            else:
+                converted_row.append(value)
+        return converted_row
+# =================================================================================================
+#  Formatting
+# =================================================================================================
+def format_as_markdown(
+    headers: list[str], data: list[list[CellContent]], highlighters: list[HighLighter] | None = None
+) -> list[str]:
+    """
+    Format benchmark data as a Markdown table.
+    Converts BenchmarkResult objects to strings using t_sec_with_uncertainty_str.
+    The fastest BenchmarkResult in each row is highlighted in bold and green.
+    :param headers: List of column headers
+    :param data: 2D list where each row contains strings and/or BenchmarkResult objects
+    :param highlighters: Optional list of HighLighter objects to apply to each row
+    :return: List of strings representing the Markdown table lines
+    """
+    # Convert data to string format and identify the fastest results
+    converted_data: list[list[str]] = [headers]
+    for row in data:
+        # highlight if requested
+        for highlighter in highlighters or []:
+            row = highlighter.process_row(row)
+        # convert to str
+        row = [str(cell) for cell in row]
+        # append to converted data
+        converted_data.append(row)
+    return md_table(converted_data)
+def format_for_console(headers: list[str], data: list[list[CellContent]]) -> list[str]:
+    """Similar to `format_as_markdown`, but without extensive formatting, to keep it readable with rendering."""
+    table_data = [headers]
+    for row in data:
+        converted_row: list[str] = []
+        for cell in row:
+            if isinstance(cell, BenchmarkResult):
+                converted_row.append(cell.t_sec_with_uncertainty_str)
+            else:
+                converted_row.append(str(cell))
+        table_data.append(converted_row)
+    return md_table(table_data)

max_div/benchmark/randint.py ADDED Viewed

@@ -0,0 +1,104 @@
+import numpy as np
+from tqdm import tqdm
+from max_div.internal.benchmarking import BenchmarkResult, benchmark
+from max_div.sampling.uncon import randint_numba, randint_numpy
+from ._formatting import (
+    BoldLabels,
+    CellContent,
+    FastestBenchmark,
+    extend_table_with_aggregate_row,
+    format_as_markdown,
+    format_for_console,
+)
+def benchmark_randint(speed: float = 0.0, markdown: bool = False) -> None:
+    """
+    Benchmarks the `randint` function from `max_div.sampling.uncon`.
+    Different scenarios are tested:
+     * with & without replacement
+     * uniform & non-uniform sampling
+     * `use_numba` True and False
+     * different sizes of (`n`, `k`):
+        * both `n` & `k` are varied across [1, 10, 100, 1000, 10000]
+        * all valid combinations are tested (if `replace==False` we don't test `k`>`n`)
+    :param speed: value in [0.0, 1.0] (default=0.0); 0.0=accurate but slow; 1.0=fast but less accurate
+    :param markdown: If `True`, outputs the results as a Markdown table.
+    """
+    print("Benchmarking `randint`...")
+    print()
+    for replace, use_p, desc in [
+        (True, False, "A. WITH replacement, UNIFORM probabilities"),
+        (False, False, "B. WITHOUT replacement, UNIFORM probabilities"),
+        (True, True, "C. WITH replacement, CUSTOM probabilities"),
+        (False, True, "D. WITHOUT replacement, CUSTOM probabilities"),
+    ]:
+        if markdown:
+            print(f"## {desc}")
+        else:
+            print(f"{desc}:")
+        # --- create headers ------------------------------
+        if markdown:
+            headers = [
+                "`k`",
+                "`n`",
+                "`randint_numpy`",
+                "`randint_numba`",
+            ]
+        else:
+            headers = ["k", "n", "randint_numpy", "randint_numba"]
+        # --- benchmark ------------------------------------
+        data: list[list[CellContent]] = []
+        n_k_values = [(n, k) for n in [10, 100, 1000, 10000] for k in [1, 10, 100, 1000, 10000] if replace or (k <= n)]
+        for n, k in tqdm(n_k_values, leave=False):
+            data_row: list[CellContent] = [str(k), str(n)]
+            for use_numba in [False, True]:
+                if use_p:
+                    p = np.random.rand(n)
+                    p /= p.sum()
+                else:
+                    p = np.zeros(0)
+                p = p.astype(np.float32)
+                if use_numba:
+                    def func_to_benchmark():
+                        randint_numba(n=n, k=k, replace=replace, p=p)
+                else:
+                    def func_to_benchmark():
+                        randint_numpy(n=n, k=k, replace=replace, p=p)
+                data_row.append(
+                    benchmark(
+                        f=func_to_benchmark,
+                        t_per_run=0.05 / (1000.0**speed),
+                        n_warmup=int(8 - 5 * speed),
+                        n_benchmark=int(25 - 22 * speed),
+                        silent=True,
+                    )
+                )
+            data.append(data_row)
+        # --- show results -----------------------------------------
+        data = extend_table_with_aggregate_row(data, agg="geomean")
+        if markdown:
+            display_data = format_as_markdown(headers, data, highlighters=[FastestBenchmark(), BoldLabels()])
+        else:
+            display_data = format_for_console(headers, data)
+        print()
+        for line in display_data:
+            print(line)
+        print()

max-div 0.0.3__py3-none-any.whl → 0.1.1__py3-none-any.whl

max-div 0.0.3py3-none-any.whl → 0.1.1py3-none-any.whl