PyPI - da4ml - Versions diffs - 0.3.0.post1__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

da4ml 0.3.0.post1py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of da4ml might be problematic. Click here for more details.

Files changed (26) hide show

da4ml/_version.py +16 -3
da4ml/cmvm/types.py +12 -2
da4ml/codegen/cpp/cpp_codegen.py +4 -1
da4ml/codegen/verilog/comb.py +19 -11
da4ml/codegen/verilog/source/binder_util.hh +8 -6
da4ml/codegen/verilog/source/build_prj.tcl +6 -8
da4ml/codegen/verilog/source/ioutil.hh +2 -1
da4ml/codegen/verilog/source/multiplier.v +37 -0
da4ml/codegen/verilog/verilog_model.py +4 -5
da4ml/converter/__init__.py +3 -0
da4ml/converter/hgq2/__init__.py +3 -0
da4ml/converter/hgq2/parser.py +60 -10
da4ml/converter/hgq2/replica.py +125 -35
da4ml/trace/fixed_variable.py +133 -20
da4ml/trace/fixed_variable_array.py +55 -7
da4ml/trace/ops/__init__.py +4 -4
da4ml/trace/ops/einsum_utils.py +5 -2
da4ml/trace/ops/reduce_utils.py +4 -2
da4ml/trace/pipeline.py +6 -4
da4ml/trace/tracer.py +27 -13
da4ml-0.3.2.dist-info/METADATA +66 -0
{da4ml-0.3.0.post1.dist-info → da4ml-0.3.2.dist-info}/RECORD +25 -23
da4ml-0.3.0.post1.dist-info/METADATA +0 -107
{da4ml-0.3.0.post1.dist-info → da4ml-0.3.2.dist-info}/WHEEL +0 -0
{da4ml-0.3.0.post1.dist-info → da4ml-0.3.2.dist-info}/licenses/LICENSE +0 -0
{da4ml-0.3.0.post1.dist-info → da4ml-0.3.2.dist-info}/top_level.txt +0 -0

da4ml/_version.py CHANGED Viewed

@@ -1,7 +1,14 @@
 # file generated by setuptools-scm
 # don't change, don't track in version control
-__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
 TYPE_CHECKING = False
 if TYPE_CHECKING:
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
     from typing import Union
     VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
 else:
     VERSION_TUPLE = object
+    COMMIT_ID = object
 version: str
 __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
-__version__ = version = '0.3.0.post1'
-__version_tuple__ = version_tuple = (0, 3, 0, 'post1')
+__version__ = version = '0.3.2'
+__version_tuple__ = version_tuple = (0, 3, 2)
+__commit_id__ = commit_id = None

da4ml/cmvm/types.py CHANGED Viewed

@@ -321,7 +321,7 @@ class Solution(NamedTuple):
                 case 4:  # const addition
                     bias = op.data * op.qint.step
                     buf[i] = buf[op.id0] + bias
-                case 5:
+                case 5:  # const definition
                     buf[i] = op.data * op.qint.step  # const definition
                 case 6 | -6:  # MSB Mux
                     id_c = op.data & 0xFFFFFFFF
@@ -340,6 +340,9 @@ class Solution(NamedTuple):
                         else:
                             _k, _i, _f = _minimal_kif(qint_k)
                             buf[i] = v0 if k >= 2.0 ** (_i - 1) else v1 * 2.0**shift
+                case 7:
+                    v0, v1 = buf[op.id0], buf[op.id1]
+                    buf[i] = v0 * v1
                 case _:
                     raise ValueError(f'Unknown opcode {op.opcode} in {op}')
@@ -370,6 +373,8 @@ class Solution(NamedTuple):
                     case 6 | -6:
                         _sign = '-' if op.opcode == -6 else ''
                         op_str = f'msb(buf[{op.data}]) ? buf[{op.id0}] : {_sign}buf[{op.id1}]'
+                    case 7:
+                        op_str = f'buf[{op.id0}] * buf[{op.id1}]'
                     case _:
                         raise ValueError(f'Unknown opcode {op.opcode} in {op}')
@@ -436,7 +441,12 @@ class Solution(NamedTuple):
     @property
     def inp_qint(self):
         """Quantization intervals of the input elements."""
-        return [op.qint for op in self.ops if op.opcode == -1]
+        qints = [QInterval(0.0, 0.0, 1.0) for _ in range(self.shape[0])]
+        for op in self.ops:
+            if op.opcode != -1:
+                continue
+            qints[op.id0] = op.qint
+        return qints
     def save(self, path: str | Path):
         """Save the solution to a file."""

da4ml/codegen/cpp/cpp_codegen.py CHANGED Viewed

@@ -86,7 +86,10 @@ def ssa_gen(sol: Solution, print_latency: bool, typestr_fn: Callable[[bool | int
                 sign = '-' if op.opcode == -6 else ''
                 ref1 = f'v{op.id1}' if shift == 0 else f'bit_shift<{shift}>(v{op.id1})'
                 val = f'{ref_k} ? {_type}({ref0}) : {_type}({sign}{ref1})'
+            case 7:
+                # Multiplication
+                ref1 = f'v{op.id1}'
+                val = f'{ref0} * {ref1}'
             case _:
                 raise ValueError(f'Unsupported opcode: {op.opcode}')

da4ml/codegen/verilog/comb.py CHANGED Viewed

@@ -9,7 +9,7 @@ def ssa_gen(sol: Solution, neg_defined: set[int], print_latency: bool = False):
     ops = sol.ops
     kifs = list(map(_minimal_kif, (op.qint for op in ops)))
     widths = list(map(sum, kifs))
-    inp_kifs = [_minimal_kif(op.qint) for op in ops if op.opcode == -1]
+    inp_kifs = [_minimal_kif(qint) for qint in sol.inp_qint]
     inp_widths = list(map(sum, inp_kifs))
     _inp_widths = np.cumsum([0] + inp_widths)
     inp_idxs = np.stack([_inp_widths[1:] - 1, _inp_widths[:-1]], axis=1)
@@ -31,6 +31,17 @@ def ssa_gen(sol: Solution, neg_defined: set[int], print_latency: bool = False):
             case -1:  # Input marker
                 i0, i1 = inp_idxs[op.id0]
                 line = f'{_def} assign {v} = inp[{i0}:{i1}];'
+            case 0 | 1:  # Common a+/-b<<shift oprs
+                p0, p1 = kifs[op.id0], kifs[op.id1]  # precision -> keep_neg, integers (no sign), fractional
+                bw0, bw1 = widths[op.id0], widths[op.id1]  # width
+                s0, f0, s1, f1 = int(p0[0]), p0[2], int(p1[0]), p1[2]
+                shift = op.data + f0 - f1
+                v0, v1 = f'v{op.id0}[{bw0 - 1}:0]', f'v{op.id1}[{bw1 - 1}:0]'
+                line = f'{_def} shift_adder #({bw0}, {bw1}, {s0}, {s1}, {bw}, {shift}, {op.opcode}) op_{i} ({v0}, {v1}, {v});'
             case 2 | -2:  # ReLU
                 lsb_bias = kifs[op.id0][2] - kifs[i][2]
                 i0, i1 = bw + lsb_bias - 1, lsb_bias
@@ -93,16 +104,6 @@ def ssa_gen(sol: Solution, neg_defined: set[int], print_latency: bool = False):
                     num = 2**bw + num
                 line = f"{_def} assign {v} = '{bin(num)[1:]};"
-            case 0 | 1:  # Common a+/-b<<shift oprs
-                p0, p1 = kifs[op.id0], kifs[op.id1]  # precision -> keep_neg, integers (no sign), fractional
-                bw0, bw1 = widths[op.id0], widths[op.id1]  # width
-                s0, f0, s1, f1 = int(p0[0]), p0[2], int(p1[0]), p1[2]
-                shift = op.data + f0 - f1
-                v0, v1 = f'v{op.id0}[{bw0 - 1}:0]', f'v{op.id1}[{bw1 - 1}:0]'
-                line = f'{_def} shift_adder #({bw0}, {bw1}, {s0}, {s1}, {bw}, {shift}, {op.opcode}) op_{i} ({v0}, {v1}, {v});'
             case 6 | -6:  # MSB Muxing
                 k, a, b = op.data & 0xFFFFFFFF, op.id0, op.id1
                 p0, p1 = kifs[a], kifs[b]
@@ -115,6 +116,13 @@ def ssa_gen(sol: Solution, neg_defined: set[int], print_latency: bool = False):
                 vk, v0, v1 = f'v{k}[{bwk - 1}]', f'v{a}[{bw0 - 1}:0]', f'v{b}[{bw1 - 1}:0]'
                 line = f'{_def} mux #({bw0}, {bw1}, {s0}, {s1}, {bw}, {shift}, {inv}) op_{i} ({vk}, {v0}, {v1}, {v});'
+            case 7:  # Multiplication
+                bw0, bw1 = widths[op.id0], widths[op.id1]  # width
+                s0, s1 = int(kifs[op.id0][0]), int(kifs[op.id1][0])
+                v0, v1 = f'v{op.id0}[{bw0 - 1}:0]', f'v{op.id1}[{bw1 - 1}:0]'
+                line = f'{_def} multiplier #({bw0}, {bw1}, {s0}, {s1}, {bw}) op_{i} ({v0}, {v1}, {v});'
             case _:
                 raise ValueError(f'Unknown opcode {op.opcode} for operation {i} ({op})')

da4ml/codegen/verilog/source/binder_util.hh CHANGED Viewed

@@ -10,7 +10,7 @@ constexpr bool _openmp = false;
 template <typename CONFIG_T>
 std::enable_if_t<CONFIG_T::II != 0> _inference(int32_t *c_inp, int32_t *c_out, size_t n_samples) {
-    typename CONFIG_T::dut_t *dut = new typename CONFIG_T::dut_t;
+    auto dut = std::make_unique<typename CONFIG_T::dut_t>();
     size_t clk_req = n_samples * CONFIG_T::II + CONFIG_T::latency + 1;
@@ -18,14 +18,18 @@ std::enable_if_t<CONFIG_T::II != 0> _inference(int32_t *c_inp, int32_t *c_out, s
         size_t t_out = t_inp - CONFIG_T::latency - 1;
         if (t_inp < n_samples * CONFIG_T::II && t_inp % CONFIG_T::II == 0) {
-            write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(dut->inp, &c_inp[t_inp / CONFIG_T::II * CONFIG_T::N_inp]);
+            write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(
+                dut->inp, &c_inp[t_inp / CONFIG_T::II * CONFIG_T::N_inp]
+            );
         }
         dut->clk = 0;
         dut->eval();
         if (t_inp > CONFIG_T::latency && t_out % CONFIG_T::II == 0) {
-            read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(dut->out, &c_out[t_out / CONFIG_T::II * CONFIG_T::N_out]);
+            read_output<CONFIG_T::N_out, CONFIG_T::max_out_bw>(
+                dut->out, &c_out[t_out / CONFIG_T::II * CONFIG_T::N_out]
+            );
         }
         dut->clk = 1;
@@ -33,12 +37,11 @@ std::enable_if_t<CONFIG_T::II != 0> _inference(int32_t *c_inp, int32_t *c_out, s
     }
     dut->final();
-    delete dut;
 }
 template <typename CONFIG_T>
 std::enable_if_t<CONFIG_T::II == 0> _inference(int32_t *c_inp, int32_t *c_out, size_t n_samples) {
-    typename CONFIG_T::dut_t *dut = new typename CONFIG_T::dut_t;
+    auto dut = std::make_unique<typename CONFIG_T::dut_t>();
     for (size_t i = 0; i < n_samples; ++i) {
         write_input<CONFIG_T::N_inp, CONFIG_T::max_inp_bw>(dut->inp, &c_inp[i * CONFIG_T::N_inp]);
@@ -47,7 +50,6 @@ std::enable_if_t<CONFIG_T::II == 0> _inference(int32_t *c_inp, int32_t *c_out, s
     }
     dut->final();
-    delete dut;
 }
 template <typename CONFIG_T> void batch_inference(int32_t *c_inp, int32_t *c_out, size_t n_samples) {

da4ml/codegen/verilog/source/build_prj.tcl CHANGED Viewed

@@ -1,7 +1,7 @@
 set project_name "${PROJECT_NAME}"
 set device "${DEVICE}"
-set top_module "${project_name}_wrapper"
+set top_module "${project_name}"
 set output_dir "./output_${project_name}"
 create_project $project_name "${output_dir}/$project_name" -force -part $device
@@ -9,9 +9,10 @@ create_project $project_name "${output_dir}/$project_name" -force -part $device
 set_property TARGET_LANGUAGE Verilog [current_project]
 set_property DEFAULT_LIB work [current_project]
-read_verilog "${project_name}_wrapper.v"
 read_verilog "${project_name}.v"
 read_verilog "shift_adder.v"
+read_verilog "negative.v"
+read_verilog "mux.v"
 foreach file [glob -nocomplain "${project_name}_stage*.v"] {
     read_verilog $file
 }
@@ -25,8 +26,7 @@ file mkdir "${output_dir}/reports"
 # synth
 synth_design -top $top_module -mode out_of_context -retiming \
-    -flatten_hierarchy rebuilt -resource_sharing auto \
-    -directive AlternateRoutability
+    -flatten_hierarchy full -resource_sharing auto
 write_checkpoint -force "${output_dir}/${project_name}_post_synth.dcp"
@@ -34,15 +34,13 @@ report_timing_summary -file "${output_dir}/reports/${project_name}_post_synth_ti
 report_power -file "${output_dir}/reports/${project_name}_post_synth_power.rpt"
 report_utilization -file "${output_dir}/reports/${project_name}_post_synth_util.rpt"
-# set_property CARRY_REMAP 3 [get_cells -hier -filter {ref_name == CARRY8}]
-opt_design -directive ExploreSequentialArea
+# opt_design -directive ExploreSequentialArea
 opt_design -directive ExploreWithRemap
 report_design_analysis -congestion -file "${output_dir}/reports/${project_name}_post_opt_congestion.rpt"
 # place
-place_design -directive AltSpreadLogic_high -fanout_opt
+place_design -directive SSI_HighUtilSLRs -fanout_opt
 report_design_analysis -congestion -file "${output_dir}/reports/${project_name}_post_place_congestion_initial.rpt"
 phys_opt_design -directive AggressiveExplore

da4ml/codegen/verilog/source/ioutil.hh CHANGED Viewed

@@ -68,7 +68,8 @@ template <size_t bw, size_t N_out> std::vector<int32_t> bitunpack(const std::vec
 }
 template <size_t bits_in, typename inp_buf_t>
-std::enable_if_t<std::is_integral_v<inp_buf_t>, void> _write_input(inp_buf_t &inp_buf, const std::vector<int32_t> &input) {
+std::enable_if_t<std::is_integral_v<inp_buf_t>, void>
+_write_input(inp_buf_t &inp_buf, const std::vector<int32_t> &input) {
     assert(input.size() == (bits_in + 31) / 32);
     inp_buf = input[0] & 0xFFFFFFFF;
     if (bits_in > 32) {

da4ml/codegen/verilog/source/multiplier.v ADDED Viewed

@@ -0,0 +1,37 @@
+`timescale 1ns / 1ps
+module multiplier #(
+    parameter BW_INPUT0 = 32,
+    parameter BW_INPUT1 = 32,
+    parameter SIGNED0 = 0,
+    parameter SIGNED1 = 0,
+    parameter BW_OUT = 32
+) (
+    input [BW_INPUT0-1:0] in0,
+    input [BW_INPUT1-1:0] in1,
+    output [BW_OUT-1:0] out
+);
+  localparam BW_BUF = BW_INPUT0 + BW_INPUT1;
+  // verilator lint_off UNUSEDSIGNAL
+  wire [BW_BUF - 1:0] buffer;
+  // verilator lint_on UNUSEDSIGNAL
+  generate
+    if (SIGNED0 == 1 && SIGNED1 == 1) begin : signed_signed
+      assign buffer[BW_BUF-1:0] = $signed(in0) * $signed(in1);
+    end else if (SIGNED0 == 1 && SIGNED1 == 0) begin : signed_unsigned
+      assign buffer[BW_BUF-1:0] = $signed(in0) * $signed({{1'b0,in1}});
+      // assign buffer[BW_BUF-1] = in0[BW_INPUT0-1];
+    end else if (SIGNED0 == 0 && SIGNED1 == 1) begin : unsigned_signed
+      assign buffer[BW_BUF-1:0] = $signed({{1'b0,in0}}) * $signed(in1);
+      // assign buffer[BW_BUF-1] = in1[BW_INPUT1-1];
+    end else begin : unsigned_unsigned
+      assign buffer[BW_BUF-1:0] = in0 * in1;
+    end
+  endgenerate
+  assign out[BW_OUT-1:0] = buffer[BW_OUT-1:0];
+endmodule

da4ml/codegen/verilog/verilog_model.py CHANGED Viewed

@@ -28,10 +28,10 @@ class VerilogModel:
         solution: Solution | CascadedSolution,
         prj_name: str,
         path: str | Path,
-        latency_cutoff: int = -1,
+        latency_cutoff: float = -1,
         print_latency: bool = True,
         part_name: str = 'xcvu13p-flga2577-2-e',
-        clock_period: int = 5,
+        clock_period: float = 5,
         clock_uncertainty: float = 0.1,
         io_delay_minmax: tuple[float, float] = (0.2, 0.4),
         register_layers: int = 1,
@@ -114,9 +114,8 @@ class VerilogModel:
             f.write(binder)
         # Common resource copy
-        shutil.copy(self.__src_root / 'verilog/source/shift_adder.v', self._path)
-        shutil.copy(self.__src_root / 'verilog/source/mux.v', self._path)
-        shutil.copy(self.__src_root / 'verilog/source/negative.v', self._path)
+        for fname in self.__src_root.glob('verilog/source/*.v'):
+            shutil.copy(fname, self._path)
         shutil.copy(self.__src_root / 'verilog/source/build_binder.mk', self._path)
         shutil.copy(self.__src_root / 'verilog/source/ioutil.hh', self._path)
         shutil.copy(self.__src_root / 'verilog/source/binder_util.hh', self._path)

da4ml/converter/__init__.py CHANGED Viewed

@@ -0,0 +1,3 @@
+from .hgq2 import trace_model
+__all__ = ['trace_model']

da4ml/converter/hgq2/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .parser import trace_model
+__all__ = ['trace_model']

da4ml/converter/hgq2/parser.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from collections.abc import Sequence
 from dataclasses import dataclass
-from typing import Any
+from typing import Any, Literal, overload
 import keras
+import numpy as np
 from keras import KerasTensor, Operation
-from ...trace import FixedVariableArray, HWConfig
+from ...trace import FixedVariableArray, HWConfig, comb_trace
 from ...trace.fixed_variable_array import FixedVariableArrayInput
 from .replica import _registry
@@ -20,6 +21,8 @@ class OpObj:
 def parse_model(model: keras.Model):
+    if isinstance(model, keras.Sequential):
+        model = model._functional
     operators: dict[int, list[OpObj]] = {}
     for depth, nodes in model._nodes_by_depth.items():
         _oprs = []
@@ -49,9 +52,22 @@ def replace_tensors(tensor_map: dict[KerasTensor, FixedVariableArray], obj: Any)
     return obj
+def _flatten_arr(args: Any) -> FixedVariableArray:
+    if isinstance(args, FixedVariableArray):
+        return np.ravel(args)  # type: ignore
+    if not isinstance(args, Sequence):
+        return None  # type: ignore
+    args = [_flatten_arr(a) for a in args]
+    args = [a for a in args if a is not None]
+    return np.concatenate(args)  # type: ignore
 def _apply_nn(
-    model: keras.Model, inputs: FixedVariableArray | Sequence[FixedVariableArray], verbose: bool = False
-) -> tuple[FixedVariableArray, ...]:
+    model: keras.Model,
+    inputs: FixedVariableArray | Sequence[FixedVariableArray],
+    verbose: bool = False,
+    dump: bool = False,
+) -> tuple[FixedVariableArray, ...] | dict[str, FixedVariableArray]:
     """
     Apply a keras model to a fixed variable array or a sequence of fixed variable arrays.
@@ -73,6 +89,8 @@ def _apply_nn(
     assert len(model.inputs) == len(inputs), f'Model has {len(model.inputs)} inputs, got {len(inputs)}'
     tensor_map = {keras_tensor: da_tensor for keras_tensor, da_tensor in zip(model.inputs, inputs)}
+    _inputs = _flatten_arr(inputs)
     for ops in parse_model(model):
         for op in ops:
             assert all(t in tensor_map for t in op.requires)
@@ -82,24 +100,56 @@ def _apply_nn(
                 continue
             mirror_op = _registry[op.operation.__class__](op.operation)
             if verbose:
-                print(f'Processing operation {op.operation.name} ({op.operation.__class__.__name__})')
+                print(f'Processing operation {op.operation.name} ({op.operation.__class__.__name__})', end='')
             outputs = mirror_op(*args, **kwargs)
             for keras_tensor, da_tensor in zip(op.produces, outputs):
                 tensor_map[keras_tensor] = da_tensor
+            if verbose:
+                cost = comb_trace(_inputs, _flatten_arr(outputs)).cost
+                print(f' cumcost: {cost}')
+    if not dump:
+        return tuple(tensor_map[keras_tensor] for keras_tensor in model.outputs)
+    else:
+        return {k.name: v for k, v in tensor_map.items()}
+@overload
+def trace_model(  # type: ignore
+    model: keras.Model,
+    hwconf: HWConfig = HWConfig(1, -1, -1),
+    solver_options: dict[str, Any] | None = None,
+    verbose: bool = False,
+    inputs: tuple[FixedVariableArray, ...] | FixedVariableArray | None = None,
+    dump: Literal[False] = False,
+) -> tuple[FixedVariableArray, FixedVariableArray]: ...
-    return tuple(tensor_map[keras_tensor] for keras_tensor in model.outputs)
+@overload
+def trace_model(  # type: ignore
+    model: keras.Model,
+    hwconf: HWConfig = HWConfig(1, -1, -1),
+    solver_options: dict[str, Any] | None = None,
+    verbose: bool = False,
+    inputs: tuple[FixedVariableArray, ...] | FixedVariableArray | None = None,
+    dump: Literal[True] = False,  # type: ignore
+) -> dict[str, FixedVariableArray]: ...
-def trace_model(
+def trace_model(  # type: ignore
     model: keras.Model,
     hwconf: HWConfig = HWConfig(1, -1, -1),
     solver_options: dict[str, Any] | None = None,
     verbose: bool = False,
     inputs: tuple[FixedVariableArray, ...] | None = None,
-) -> tuple[tuple[FixedVariableArray, ...], tuple[FixedVariableArray, ...]]:
+    dump=False,
+):
     if inputs is None:
         inputs = tuple(
             FixedVariableArrayInput(inp.shape[1:], hwconf=hwconf, solver_options=solver_options) for inp in model.inputs
         )
-    outputs = _apply_nn(model, inputs, verbose=verbose)
-    return inputs, outputs
+    outputs = _apply_nn(model, inputs, verbose=verbose, dump=dump)
+    if not dump:
+        return _flatten_arr(inputs), _flatten_arr(outputs)
+    else:
+        return {k: _flatten_arr(v) for k, v in outputs.items()}  # type: ignore

da4ml 0.3.0.post1__py3-none-any.whl → 0.3.2__py3-none-any.whl

Potentially problematic release.

da4ml 0.3.0.post1py3-none-any.whl → 0.3.2py3-none-any.whl