PyPI - da4ml - Versions diffs - 0.2.0__tar.gz → 0.2.1__tar.gz - Mend

da4ml 0.2.0tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of da4ml might be problematic. Click here for more details.

Files changed (48) hide show

{da4ml-0.2.0/src/da4ml.egg-info → da4ml-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: da4ml
-Version: 0.2.0
+Version: 0.2.1
 Summary: Digital Arithmetic for Machine Learning
 Author-email: Chang Sun <chsun@cern.ch>
 License: GNU Lesser General Public License v3 (LGPLv3)

{da4ml-0.2.0 → da4ml-0.2.1}/src/da4ml/_version.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.2.0'
-__version_tuple__ = version_tuple = (0, 2, 0)
+__version__ = version = '0.2.1'
+__version_tuple__ = version_tuple = (0, 2, 1)

{da4ml-0.2.0 → da4ml-0.2.1}/src/da4ml/cmvm/api.py RENAMED Viewed

@@ -140,10 +140,6 @@ def jit_solve(
             if not method0 == method1 == 'wmc-dc' or decompose_dc >= 0:
                 decompose_dc -= 1
                 continue
-        if sum([op.cost for op in sol1.ops]) * 4 > sum([op.cost for op in sol0.ops]) and decompose_dc > 0:
-            # If the second stage is too expensive, the decomposition usually doesn't worth it
-            decompose_dc -= 1
-            continue
         break
     if max(latencies1) > latency_allowed:
         # When latency depends on the bw, may happen
@@ -158,8 +154,8 @@ def solve(
     method1: str = 'auto',
     hard_dc: int = -1,
     decompose_dc: int = -2,
-    qintervals: tuple[QInterval, ...] | None = None,
-    latencies: tuple[float, ...] | None = None,
+    qintervals: list[QInterval] | None = None,
+    latencies: list[float] | None = None,
     adder_size: int = -1,
     carry_size: int = -1,
     search_all_decompose_dc: bool = True,

{da4ml-0.2.0 → da4ml-0.2.1}/src/da4ml/cmvm/core/__init__.py RENAMED Viewed

@@ -131,7 +131,6 @@ def to_solution(
     _global_id = len(ops)
     for i_out in range(n_out):
-        heap = []
         idx, shifts = np.where(expr[:, i_out] != 0)
         sub = np.empty(len(idx), dtype=np.int64)
         for i, (i_in, shift) in enumerate(zip(idx, shifts)):

{da4ml-0.2.0 → da4ml-0.2.1}/src/da4ml/cmvm/types.py RENAMED Viewed

@@ -159,6 +159,8 @@ def _relu(v: 'T', i: int | None = None, f: int | None = None, inv: bool = False,
     from ..trace.fixed_variable import FixedVariable
     assert isinstance(v, FixedVariable), f'Unknown type {type(v)} for symbolic relu'
+    if inv:
+        v = -v
     return v.relu(i, f, round_mode=round_mode)
@@ -295,9 +297,7 @@ class Solution(NamedTuple):
         inp_qint = [op.qint for op in self.ops if op.opcode == -1]
         if quantize:  # TRN and WRAP
             k, i, f = map(np.array, zip(*map(minimal_kif, inp_qint)))
-            eps = 2.0**-f
-            _low, _high = -(2.0 ** (i + f)) * k, 2.0 ** (i + f) - 1
-            inp = eps * ((np.floor(inp / eps) - _low) % 2.0 ** (k + i + f) + _low)
+            inp = [_quantize(*x, round_mode='TRN') for x in zip(inp, k, i, f)]
         inp = inp * (2.0 ** np.array(self.inp_shift))
         for i, op in enumerate(self.ops):
@@ -561,7 +561,7 @@ class CascadedSolution(NamedTuple):
     @property
     def reg_bits(self):
         """The number of bits used for the register in the solution."""
-        bits = 0
+        bits = sum(map(sum, (_minimal_kif(qint) for qint in self.inp_qint)))
         for _sol in self.solutions:
             kifs = [_minimal_kif(qint) for qint in _sol.out_qint]
             _bits = sum(map(sum, kifs))

{da4ml-0.2.0 → da4ml-0.2.1}/src/da4ml/codegen/cpp/cpp_codegen.py RENAMED Viewed

@@ -4,13 +4,13 @@ from ...cmvm.types import Op, QInterval, Solution, _minimal_kif
 from ...trace.fixed_variable import _const_f
-def kif_to_vitis_type(k: bool | int, i: int, f: int):
+def kif_to_vitis_type(k: bool | int = 1, i: int = 0, f: int = 0):
     if k == i == f == 0:
         f = 1
     return f'ap_{"" if k else "u"}fixed<{k+i+f},{k+i}>'
-def kif_to_hlslib_type(k: bool | int, i: int, f: int):
+def kif_to_hlslib_type(k: bool | int = 1, i: int = 0, f: int = 0):
     if k == i == f == 0:
         f = 1
     return f'ac_fixed<{int(k)},{k+i+f},{k+i}>'

{da4ml-0.2.0 → da4ml-0.2.1}/src/da4ml/codegen/verilog/io_wrapper.py RENAMED Viewed

@@ -171,13 +171,13 @@ void inference(int32_t *c_inp, int32_t *c_out, size_t n_samples) {{
 }}"""
-def pipeline_binder_gen(csol: CascadedSolution, module_name: str, II: int = 1):
+def pipeline_binder_gen(csol: CascadedSolution, module_name: str, II: int = 1, latency_multiplier: int = 1):
     k_in, i_in, f_in = zip(*map(_minimal_kif, csol.inp_qint))
     k_out, i_out, f_out = zip(*map(_minimal_kif, csol.out_qint))
     max_inp_bw = max(k + i for k, i in zip(k_in, i_in)) + max(f_in)
     max_out_bw = max(k + i for k, i in zip(k_out, i_out)) + max(f_out)
-    n_stage = len(csol.solutions)
+    latency = len(csol.solutions) * latency_multiplier
     n_in, n_out = csol.shape
     return f"""#include "V{module_name}.h"
@@ -196,7 +196,7 @@ constexpr size_t N_out = {n_out};
 constexpr size_t max_inp_bw = {max_inp_bw};
 constexpr size_t max_out_bw = {max_out_bw};
 constexpr size_t II = {II};
-constexpr size_t latency = {n_stage};
+constexpr size_t latency = {latency};
 typedef V{module_name} dut_t;
 extern "C" {{

{da4ml-0.2.0 → da4ml-0.2.1}/src/da4ml/codegen/verilog/pipeline.py RENAMED Viewed

@@ -3,19 +3,37 @@ from .comb import comb_logic_gen
 def pipeline_logic_gen(
-    csol: CascadedSolution, name: str, print_latency=False, timescale: str | None = '`timescale 1 ns / 1 ps', reset_high=True
+    csol: CascadedSolution,
+    name: str,
+    print_latency=False,
+    timescale: str | None = '`timescale 1 ns / 1 ps',
+    register_layers: int = 1,
 ):
     N = len(csol.solutions)
     inp_bits = [sum(map(sum, map(_minimal_kif, sol.inp_qint))) for sol in csol.solutions]
     out_bits = inp_bits[1:] + [sum(map(sum, map(_minimal_kif, csol.out_qint)))]
     registers = [f'reg [{width}-1:0] stage{i}_inp;' for i, width in enumerate(inp_bits)]
+    for i in range(0, register_layers - 1):
+        registers += [f'reg [{width}-1:0] stage{j}_inp_copy{i};' for j, width in enumerate(inp_bits)]
     wires = [f'wire [{width}-1:0] stage{i}_out;' for i, width in enumerate(out_bits)]
     comb_logic = [f'{name}_stage{i} stage{i} (.inp(stage{i}_inp), .out(stage{i}_out));' for i in range(N)]
-    serial_logic = ['stage0_inp <= inp;']
-    serial_logic += [f'stage{i}_inp <= stage{i-1}_out;' for i in range(1, N)]
+    if register_layers == 1:
+        serial_logic = ['stage0_inp <= inp;']
+        serial_logic += [f'stage{i}_inp <= stage{i-1}_out;' for i in range(1, N)]
+    else:
+        serial_logic = ['stage0_inp_copy0 <= inp;']
+        for j in range(1, register_layers - 1):
+            serial_logic.append(f'stage0_inp_copy{j} <= stage0_inp_copy{j-1};')
+        serial_logic.append(f'stage0_inp <= stage0_inp_copy{register_layers - 2};')
+        for i in range(1, N):
+            serial_logic.append(f'stage{i}_inp_copy0 <= stage{i-1}_out;')
+            for j in range(1, register_layers - 1):
+                serial_logic.append(f'stage{i}_inp_copy{j} <= stage{i}_inp_copy{j-1};')
+            serial_logic.append(f'stage{i}_inp <= stage{i}_inp_copy{register_layers - 2};')
     serial_logic += [f'out <= stage{N-1}_out;']
     sep0 = '\n    '

{da4ml-0.2.0 → da4ml-0.2.1}/src/da4ml/codegen/verilog/source/build_prj.tcl RENAMED Viewed

@@ -26,7 +26,6 @@ file mkdir "${output_dir}/reports"
 # synth
 synth_design -top $top_module -mode out_of_context -retiming \
     -flatten_hierarchy rebuilt -resource_sharing auto \
-    -keep_equivalent_registers -shreg_min_size 8 \
     -directive AlternateRoutability
 write_checkpoint -force "${output_dir}/${project_name}_post_synth.dcp"

{da4ml-0.2.0 → da4ml-0.2.1}/src/da4ml/codegen/verilog/verilog_model.py RENAMED Viewed

@@ -34,6 +34,7 @@ class VerilogModel:
         clock_period: int = 5,
         clock_uncertainty: float = 0.1,
         io_delay_minmax: tuple[float, float] = (0.2, 0.4),
+        register_layers: int = 1,
     ):
         self._solution = solution
         self._path = Path(path)
@@ -45,6 +46,7 @@ class VerilogModel:
         self._clock_period = clock_period
         self._clock_uncertainty = clock_uncertainty
         self._io_delay_minmax = io_delay_minmax
+        self._register_layers = register_layers
         self._pipe = solution if isinstance(solution, CascadedSolution) else None
         if latency_cutoff > 0 and self._pipe is None:
@@ -62,7 +64,7 @@ class VerilogModel:
         self._path.mkdir(parents=True, exist_ok=True)
         if self._pipe is not None:  # Pipeline
             # Main logic
-            codes = pipeline_logic_gen(self._pipe, self._prj_name, self._print_latency)
+            codes = pipeline_logic_gen(self._pipe, self._prj_name, self._print_latency, register_layers=self._register_layers)
             for k, v in codes.items():
                 with open(self._path / f'{k}.v', 'w') as f:
                     f.write(v)
@@ -86,8 +88,8 @@ class VerilogModel:
             with open(self._path / f'{self._prj_name}.xdc', 'w') as f:
                 f.write(xdc)
-            # C++ binder w/
-            binder = pipeline_binder_gen(self._pipe, f'{self._prj_name}_wrapper', 1)
+            # C++ binder w/ verilog wrapper for uniform bw
+            binder = pipeline_binder_gen(self._pipe, f'{self._prj_name}_wrapper', 1, self._register_layers)
             # Verilog IO wrapper (non-uniform bw to uniform one, clk passthrough)
             io_wrapper = generate_io_wrapper(self._pipe, self._prj_name, True)
@@ -243,11 +245,12 @@ class VerilogModel:
         in_bits, out_bits = np.sum(kifs_in), np.sum(kifs_out)
         if self._pipe is not None:
             n_stage = len(self._pipe[0])
+            delay_suffix = '' if self._register_layers == 1 else f'x {self._register_layers} '
             lat_cutoff = self._latency_cutoff
             reg_bits = self._pipe.reg_bits
             spec = f"""Top Module: {self._prj_name}\n====================
 {inp_size} ({in_bits} bits) -> {out_size} ({out_bits} bits)
-{n_stage} stages @ max_delay={lat_cutoff}
+{n_stage} {delay_suffix}stages @ max_delay={lat_cutoff}
 Estimated cost: {cost} LUTs, {reg_bits} FFs"""
         else:

{da4ml-0.2.0 → da4ml-0.2.1}/src/da4ml/trace/fixed_variable.py RENAMED Viewed

@@ -266,7 +266,7 @@ class FixedVariable:
             step = Decimal(2) ** -f
             i = ceil(log2(val + step)) if not i else i
             eps = step / 2 if round_mode == 'RND' else 0
-            val = floor(val / step + eps) % Decimal(2) ** i * step
+            val = (floor(val / step + eps) * step) % (Decimal(2) ** i)
             return FixedVariable(val, val, step, hwconf=self.hwconf)
         step = max(Decimal(2) ** -f, self.step) if f is not None else self.step
@@ -324,7 +324,7 @@ class FixedVariable:
         # bit-exactness will be lost in these cases, but they should never happen (quantizers are used in a weird way)
         # Keeping this for now; change if absolutely necessary
         f = min(f, _f)
-        k = min(k, _k)
+        k = min(k, _k) if i >= _i else k
         i = min(i, _i)
         step = max(Decimal(2) ** -f, self.step)

{da4ml-0.2.0 → da4ml-0.2.1}/src/da4ml/trace/fixed_variable_array.py RENAMED Viewed

@@ -1,6 +1,8 @@
+from inspect import signature
 from typing import Any
 import numpy as np
+from numba.typed import List as NumbaList
 from numpy.typing import NDArray
 from ..cmvm import solve
@@ -14,7 +16,13 @@ class FixedVariableArray:
         solver_options: dict[str, Any] | None = None,
     ):
         self._vars = np.array(vars)
-        self.solver_options = solver_options
+        _solver_options = signature(solve).parameters
+        _solver_options = {k: v.default for k, v in _solver_options.items() if v.default is not v.empty}
+        if solver_options is not None:
+            _solver_options.update(solver_options)
+        _solver_options.pop('qintervals', None)
+        _solver_options.pop('latencies', None)
+        self.solver_options = _solver_options
     @classmethod
     def from_lhs(
@@ -75,8 +83,10 @@ class FixedVariableArray:
         r = []
         for i in range(mat0.shape[0]):
             vec = mat0[i]
-            qintervals = tuple([QInterval(float(v.low), float(v.high), float(v.step)) for v in vec._vars])
-            latencies = tuple([float(v.latency) for v in vec._vars])
+            _qintervals = [QInterval(float(v.low), float(v.high), float(v.step)) for v in vec._vars]
+            _latencies = [float(v.latency) for v in vec._vars]
+            qintervals = NumbaList(_qintervals)  # type: ignore
+            latencies = NumbaList(_latencies)  # type: ignore
             hwconf = self._vars.ravel()[0].hwconf
             kwargs.update(adder_size=hwconf.adder_size, carry_size=hwconf.carry_size)
             _mat = np.ascontiguousarray(mat1.astype(np.float32))
@@ -96,8 +106,8 @@ class FixedVariableArray:
         axes = _axes[ndim0 - 1 :] + _axes[: ndim0 - 1]
         return r.transpose(axes)
-    def __getitem__(self, *item):
-        vars = self._vars[*item]
+    def __getitem__(self, item):
+        vars = self._vars[item]
         if isinstance(vars, np.ndarray):
             return FixedVariableArray(vars, self.solver_options)
         else:

{da4ml-0.2.0 → da4ml-0.2.1}/src/da4ml/trace/tracer.py RENAMED Viewed

@@ -101,6 +101,8 @@ def comb_trace(inputs: FixedVariableArray, outputs: FixedVariableArray) -> Solut
 def comb_trace(inputs, outputs):
     inputs, outputs = list(np.ravel(inputs)), list(np.ravel(outputs))
+    # latency = max(v.latency if isinstance(v, FixedVariable) else 0 for v in outputs)
+    # outputs = [v if isinstance(v, FixedVariable) else FixedVariable(v,v,0, latency=latency, opr='const') for v in outputs]
     ops, out_index = _comb_trace(inputs, outputs)
     shape = len(inputs), len(outputs)
     inp_shift = [0] * shape[0]

{da4ml-0.2.0 → da4ml-0.2.1/src/da4ml.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: da4ml
-Version: 0.2.0
+Version: 0.2.1
 Summary: Digital Arithmetic for Machine Learning
 Author-email: Chang Sun <chsun@cern.ch>
 License: GNU Lesser General Public License v3 (LGPLv3)