PyPI - da4ml - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.0b0__py3-none-any.whl - Mend

da4ml 0.4.0py3-none-any.whl → 0.5.0b0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of da4ml might be problematic. Click here for more details.

Files changed (40) hide show

da4ml/__init__.py +2 -16
da4ml/_version.py +2 -2
da4ml/cmvm/__init__.py +2 -2
da4ml/cmvm/api.py +15 -4
da4ml/cmvm/core/__init__.py +2 -2
da4ml/cmvm/types.py +32 -18
da4ml/cmvm/util/bit_decompose.py +2 -2
da4ml/codegen/hls/hls_codegen.py +10 -5
da4ml/codegen/hls/hls_model.py +7 -4
da4ml/codegen/rtl/common_source/build_binder.mk +6 -5
da4ml/codegen/rtl/common_source/build_quartus_prj.tcl +104 -0
da4ml/codegen/rtl/common_source/{build_prj.tcl → build_vivado_prj.tcl} +39 -18
da4ml/codegen/rtl/common_source/template.sdc +27 -0
da4ml/codegen/rtl/common_source/template.xdc +11 -13
da4ml/codegen/rtl/rtl_model.py +105 -53
da4ml/codegen/rtl/verilog/__init__.py +2 -1
da4ml/codegen/rtl/verilog/comb.py +47 -7
da4ml/codegen/rtl/verilog/io_wrapper.py +4 -4
da4ml/codegen/rtl/verilog/pipeline.py +12 -12
da4ml/codegen/rtl/verilog/source/lookup_table.v +27 -0
da4ml/codegen/rtl/vhdl/comb.py +27 -21
da4ml/codegen/rtl/vhdl/io_wrapper.py +11 -11
da4ml/codegen/rtl/vhdl/pipeline.py +12 -12
da4ml/codegen/rtl/vhdl/source/lookup_table.vhd +52 -0
da4ml/converter/__init__.py +57 -1
da4ml/converter/hgq2/parser.py +4 -25
da4ml/converter/hgq2/replica.py +210 -25
da4ml/trace/fixed_variable.py +239 -29
da4ml/trace/fixed_variable_array.py +276 -48
da4ml/trace/ops/__init__.py +31 -15
da4ml/trace/ops/reduce_utils.py +3 -3
da4ml/trace/pipeline.py +40 -18
da4ml/trace/tracer.py +33 -8
da4ml/typing/__init__.py +3 -0
{da4ml-0.4.0.dist-info → da4ml-0.5.0b0.dist-info}/METADATA +2 -1
{da4ml-0.4.0.dist-info → da4ml-0.5.0b0.dist-info}/RECORD +39 -35
da4ml/codegen/rtl/vhdl/source/template.xdc +0 -32
{da4ml-0.4.0.dist-info → da4ml-0.5.0b0.dist-info}/WHEEL +0 -0
{da4ml-0.4.0.dist-info → da4ml-0.5.0b0.dist-info}/licenses/LICENSE +0 -0
{da4ml-0.4.0.dist-info → da4ml-0.5.0b0.dist-info}/top_level.txt +0 -0

da4ml/__init__.py CHANGED Viewed

@@ -1,17 +1,3 @@
-# from .cmvm.api import cost, fn_from_kernel
-# from .cmvm.cmvm import compile_kernel
-# from .cmvm.codegen import PyCodegenBackend, VitisCodegenBackend
-# from .cmvm.graph_compile import graph_compile_states
-# from .cmvm.utils import DAState, OpCode, Score
+from . import cmvm, codegen, converter, trace, typing
-# __all__ = [
-#     'DAState',
-#     'OpCode',
-#     'Score',
-#     'cost',
-#     'compile_kernel',
-#     'fn_from_kernel',
-#     'graph_compile_states',
-#     'PyCodegenBackend',
-#     'VitisCodegenBackend',
-# ]
+__all__ = ['cmvm', 'codegen', 'converter', 'trace', 'typing']

da4ml/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.4.0'
-__version_tuple__ = version_tuple = (0, 4, 0)
+__version__ = version = '0.5.0b0'
+__version_tuple__ = version_tuple = (0, 5, 0, 'b0')
 __commit_id__ = commit_id = None

da4ml/cmvm/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 from .api import minimal_latency, solve
-from .types import Op, QInterval, Solution
+from .types import CombLogic, Op, QInterval
-__all__ = ['minimal_latency', 'solve', 'QInterval', 'Op', 'Solution']
+__all__ = ['minimal_latency', 'solve', 'QInterval', 'Op', 'CombLogic']

da4ml/cmvm/api.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from math import ceil, log2
+from typing import TypedDict
 import numpy as np
 from numba import jit, prange
 from .core import _solve, create_state, to_solution
-from .types import CascadedSolution, QInterval
+from .types import Pipeline, QInterval
 from .util import kernel_decompose
@@ -56,7 +57,7 @@ def jit_solve(
     latencies: list[float] | None = None,
     adder_size: int = -1,
     carry_size: int = -1,
-) -> CascadedSolution:
+) -> Pipeline:
     """Optimized implementation of a CMVM computation with cascaded two matrices.
     Parameters
@@ -144,7 +145,7 @@ def jit_solve(
     if max(latencies1) > latency_allowed:
         # When latency depends on the bw, may happen
         print(f'Latency constraint not satisfied: {int(latency_allowed)} < {int(max(latencies1))}')
-    return CascadedSolution((sol0, sol1))
+    return Pipeline((sol0, sol1))
 @jit(cache=True, parallel=True)
@@ -159,7 +160,7 @@ def solve(
     adder_size: int = -1,
     carry_size: int = -1,
     search_all_decompose_dc: bool = True,
-) -> CascadedSolution:
+) -> Pipeline:
     """Solve the CMVM problem with cascaded two matrices.
     Parameters
@@ -251,3 +252,13 @@ def solve(
         carry_size=carry_size,
     )
     return csol
+class solver_options_t(TypedDict, total=False):
+    method0: str
+    method1: str
+    hard_dc: int
+    decompose_dc: int
+    adder_size: int
+    carry_size: int
+    search_all_decompose_dc: bool

da4ml/cmvm/core/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ from math import log2
 import numpy as np
 from numba import jit
-from ..types import DAState, Op, QInterval, Solution
+from ..types import CombLogic, DAState, Op, QInterval
 from .indexers import (
     idx_mc,
     idx_mc_dc,
@@ -194,7 +194,7 @@ def to_solution(
         out_neg.append(sub)
         out_shift[i_out] = out_shift[i_out] + shift0
-    return Solution(
+    return CombLogic(
         shape=state.kernel.shape,  # type: ignore
         inp_shift=list(in_shift),
         out_idxs=out_idx,

da4ml/cmvm/types.py CHANGED Viewed

@@ -11,7 +11,7 @@ from numpy import float32, int8
 from numpy.typing import NDArray
 if TYPE_CHECKING:
-    from ..trace.tracer import FixedVariable
+    from ..trace.fixed_variable import FixedVariable, LookupTable
 class QInterval(NamedTuple):
@@ -228,9 +228,15 @@ def _(v: Decimal, k: int | bool, i: int, f: int, round_mode: str = 'TRN'):
     return eps * ((floor(v / eps) + bias) % Decimal(2) ** b - bias)
-class Solution(NamedTuple):
-    """Represents a series of operations that can be applied to a vector of data.
-    May represent a CMVM solution or a general neural network
+class JSONEncoder(json.JSONEncoder):
+    def default(self, o):
+        if hasattr(o, 'to_dict'):
+            return o.to_dict()
+        super().default(o)
+class CombLogic(NamedTuple):
+    """A combinational logic that describes a series of operations on input data to produce output data.
     Attributes
     ----------
@@ -247,12 +253,14 @@ class Solution(NamedTuple):
     ops: list[Op]
         Core list of operations for generating each buffer element.
     carry_size: int
-        Size of the carrier for the adder.
+        Size of the carrier for the adder, used for cost and latency estimation.
     adder_size: int
-        Elementary size of the adder.
+        Elementary size of the adder, used for cost and latency estimation.
+    lookup_tables: tuple[LookupTable, ...] | None
+        Lookup table arrays for lookup operations, if any.
-    The core part of the solution is the operations in the ops list.
+    The core part of the comb logic is the operations in the ops list.
     For the exact operations executed with Op, refer to the Op class.
     After all operations are executed, the output data is read from data[op.out_idx] and multiplied by 2**out_shift.
@@ -266,6 +274,7 @@ class Solution(NamedTuple):
     ops: list[Op]
     carry_size: int
     adder_size: int
+    lookup_tables: 'tuple[LookupTable, ...] | None' = None
     def __call__(self, inp: list | np.ndarray | tuple, quantize=False, debug=False, dump=False):
         """Executes the solution on the input data.
@@ -343,6 +352,12 @@ class Solution(NamedTuple):
                 case 7:
                     v0, v1 = buf[op.id0], buf[op.id1]
                     buf[i] = v0 * v1
+                case 8:
+                    v0 = buf[op.id0]
+                    tables = self.lookup_tables
+                    assert tables is not None, 'No lookup table provided for lookup operation'
+                    table = tables[op.data]
+                    buf[i] = table.lookup(v0, self.ops[op.id0].qint)
                 case _:
                     raise ValueError(f'Unknown opcode {op.opcode} in {op}')
@@ -375,6 +390,8 @@ class Solution(NamedTuple):
                         op_str = f'msb(buf[{op.data}]) ? buf[{op.id0}] : {_sign}buf[{op.id1}]'
                     case 7:
                         op_str = f'buf[{op.id0}] * buf[{op.id1}]'
+                    case 8:
+                        op_str = f'tables[{int(op.data)}].lookup(buf[{op.id0}])'
                     case _:
                         raise ValueError(f'Unknown opcode {op.opcode} in {op}')
@@ -451,7 +468,7 @@ class Solution(NamedTuple):
     def save(self, path: str | Path):
         """Save the solution to a file."""
         with open(path, 'w') as f:
-            json.dump(self, f)
+            json.dump(self, f, cls=JSONEncoder)
     @classmethod
     def deserialize(cls, data: dict):
@@ -534,12 +551,8 @@ class Solution(NamedTuple):
             data.tofile(f)
-class CascadedSolution(NamedTuple):
-    """A solution that implements cascaded matrix-vector multiplications through multiple CMVM stages.
-    CascadedSolution represents a sequence of Solution objects where the output of each stage
-    is fed as input to the next stage.
+class Pipeline(NamedTuple):
+    """A pipeline with II=1,with each stage represented by a CombLogic
     Attributes
     ----------
     solutions: tuple[Solution, ...]
@@ -548,12 +561,13 @@ class CascadedSolution(NamedTuple):
     Properties
     ----------
     kernel: NDArray[float32]
+        Only useful when the pipeline describes a linear operation.
         The overall kernel matrix which the cascaded solution implements: vec @ kernel = solution(vec).
         This is calculated as the matrix product of all individual solution kernels.
     cost: float
         The total cost of the cascaded solution, computed as the sum of the costs of all stages.
     latency: tuple[float, float]
-        The minimum and maximum latency of the cascaded solution.
+        The minimum and maximum latency of the pipeline, determined by the last stage.
     inp_qint: list[QInterval]
         Input quantization intervals
     inp_lat: list[float]
@@ -572,7 +586,7 @@ class CascadedSolution(NamedTuple):
         The shape of the corresponding kernel matrix.
     """
-    solutions: tuple[Solution, ...]
+    solutions: tuple[CombLogic, ...]
     def __call__(self, inp: list | np.ndarray | tuple, quantize=False, debug=False):
         out = np.asarray(inp)
@@ -634,12 +648,12 @@ class CascadedSolution(NamedTuple):
     def save(self, path: str | Path):
         """Save the solution to a file."""
         with open(path, 'w') as f:
-            json.dump(self, f)
+            json.dump(self, f, cls=JSONEncoder)
     @classmethod
     def deserialize(cls, data: dict):
         """Load the solution from a file."""
-        return cls(solutions=tuple(Solution.deserialize(sol) for sol in data[0]))
+        return cls(solutions=tuple(CombLogic.deserialize(sol) for sol in data[0]))
     @classmethod
     def load(cls, path: str):

da4ml/cmvm/util/bit_decompose.py CHANGED Viewed

@@ -15,7 +15,7 @@ def _volatile_int_arr_to_csd(x: NDArray) -> NDArray[np.int8]:
         thres = _2pn / 1.5
         bit = (x > thres).astype(np.int8)
         bit -= (x < -thres).astype(np.int8)
-        x -= _2pn * bit
+        x -= _2pn * bit.astype(x.dtype)
         buf[..., n] = bit
     return buf
@@ -50,7 +50,7 @@ def _center(arr: NDArray):
     arr = arr * (2.0**-shift1)
     shift0 = shift_centering(arr, 0)  # d_in
     arr = arr * (2.0 ** -shift0[:, None])
-    return arr, shift0, shift1
+    return arr, shift0.astype(np.int8), shift1.astype(np.int8)
 @jit

da4ml/codegen/hls/hls_codegen.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from collections.abc import Callable
-from ...cmvm.types import QInterval, Solution, _minimal_kif
+from ...cmvm.types import CombLogic, QInterval, _minimal_kif
 from ...trace.fixed_variable import _const_f
@@ -34,7 +34,7 @@ def get_typestr_fn(flavor: str):
     return typestr_fn
-def ssa_gen(sol: Solution, print_latency: bool, typestr_fn: Callable[[bool | int, int, int], str]):
+def ssa_gen(sol: CombLogic, print_latency: bool, typestr_fn: Callable[[bool | int, int, int], str]):
     ops = sol.ops
     all_kifs = list(map(_minimal_kif, (op.qint for op in ops)))
     all_types = list(map(lambda x: typestr_fn(*x), all_kifs))
@@ -92,6 +92,11 @@ def ssa_gen(sol: Solution, print_latency: bool, typestr_fn: Callable[[bool | int
                 ref_k = f'v{id_c}[{bw_k - 1}]'
                 sign = '-' if op.opcode == -6 else ''
                 ref1 = f'v{op.id1}' if shift == 0 else f'bit_shift<{shift}>(v{op.id1})'
+                bw0, bw1 = sum(all_kifs[op.id0]), sum(all_kifs[op.id1])
+                if bw0 == 0:
+                    ref0 = '0'
+                if bw1 == 0:
+                    ref1 = '0'
                 val = f'{ref_k} ? {_type}({ref0}) : {_type}({sign}{ref1})'
             case 7:
                 # Multiplication
@@ -108,7 +113,7 @@ def ssa_gen(sol: Solution, print_latency: bool, typestr_fn: Callable[[bool | int
     return lines
-def output_gen(sol: Solution, typestr_fn: Callable[[bool | int, int, int], str]):
+def output_gen(sol: CombLogic, typestr_fn: Callable[[bool | int, int, int], str]):
     lines = []
     for i, idx in enumerate(sol.out_idxs):
         if idx < 0:
@@ -124,7 +129,7 @@ def output_gen(sol: Solution, typestr_fn: Callable[[bool | int, int, int], str])
     return lines
-def get_io_types(sol: Solution, flavor: str):
+def get_io_types(sol: CombLogic, flavor: str):
     typestr_fn = get_typestr_fn(flavor)
     in_kif = map(max, zip(*map(_minimal_kif, sol.inp_qint)))
     inp_type = typestr_fn(*in_kif)
@@ -134,7 +139,7 @@ def get_io_types(sol: Solution, flavor: str):
 def hls_logic_and_bridge_gen(
-    sol: Solution,
+    sol: CombLogic,
     fn_name: str,
     flavor: str,
     pragmas: list[str] | None = None,

da4ml/codegen/hls/hls_model.py CHANGED Viewed

@@ -12,7 +12,7 @@ from uuid import uuid4
 import numpy as np
 from numpy.typing import NDArray
-from da4ml.cmvm.types import Solution
+from da4ml.cmvm.types import CombLogic
 from da4ml.codegen.hls.hls_codegen import get_io_types, hls_logic_and_bridge_gen
 from ... import codegen
@@ -24,7 +24,7 @@ T = TypeVar('T', bound=np.floating)
 class HLSModel:
     def __init__(
         self,
-        solution: Solution,
+        solution: CombLogic,
         prj_name: str,
         path: str | Path,
         flavor: str = 'vitis',
@@ -192,12 +192,12 @@ class HLSModel:
         self.write()
         self._compile(verbose, openmp, o3, clean)
-    def predict(self, data: NDArray[T]) -> NDArray[T]:
+    def predict(self, data: NDArray[T] | Sequence[NDArray[T]]) -> NDArray[T]:
         """Run the model on the input data.
         Parameters
         ----------
-        data : NDArray[np.floating]
+        data: NDArray[np.floating] | Sequence[NDArray[np.floating]]
             Input data to the model. The shape is ignored, and the number of samples is
             determined by the size of the data.
@@ -209,6 +209,9 @@ class HLSModel:
         assert self._lib is not None, 'Library not loaded, call .compile() first.'
         inp_size, out_size = self._solution.shape
+        if isinstance(data, Sequence):
+            data = np.concatenate([a.reshape(a.shape[0], -1) for a in data], axis=-1)
         dtype = data.dtype
         if dtype not in (np.float32, np.float64):
             raise TypeError(f'Unsupported input data type: {dtype}. Expected float32 or float64.')

da4ml/codegen/rtl/common_source/build_binder.mk CHANGED Viewed

@@ -1,7 +1,7 @@
 default: slow
 VERILATOR_ROOT = $(shell verilator -V | grep -a VERILATOR_ROOT | tail -1 | awk '{{print $$3}}')
-INCLUDES = -I./obj_dir -I$(VERILATOR_ROOT)/include
+INCLUDES = -I./obj_dir -I$(VERILATOR_ROOT)/include -I../src
 WARNINGS = -Wl,--no-undefined
 CFLAGS = -std=c++17 -fPIC
 LINKFLAGS = $(INCLUDES) $(WARNINGS)
@@ -9,14 +9,15 @@ LIBNAME = lib$(VM_PREFIX)_$(STAMP).so
 N_JOBS ?= $(shell nproc)
 VERILATOR_FLAGS ?=
-$(VM_PREFIX).v: $(wildcard $(VM_PREFIX).vhd)
+../src/$(VM_PREFIX).v: $(wildcard ../src/$(VM_PREFIX).vhd) $(wildcard ../src/$(VM_PREFIX)_stage*.vhd)
 # vhdl specific - convert to verilog first for verilating
 	mkdir -p obj_dir
-	ghdl -a --std=08 --workdir=obj_dir multiplier.vhd mux.vhd negative.vhd shift_adder.vhd $(wildcard $(VM_PREFIX:_wrapper=)_stage*.vhd) $(wildcard $(VM_PREFIX:_wrapper=).vhd) $(VM_PREFIX).vhd
+	cp ../src/memfiles/* ./
+	ghdl -a --std=08 --workdir=obj_dir ../src/static/multiplier.vhd ../src/static/mux.vhd ../src/static/negative.vhd ../src/static/shift_adder.vhd ../src/static/lookup_table.vhd $(wildcard ../src/$(VM_PREFIX:_wrapper=)_stage*.vhd) $(wildcard ../src/$(VM_PREFIX:_wrapper=).vhd) ../src/$(VM_PREFIX).vhd
 	ghdl synth --std=08 --workdir=obj_dir --out=verilog $(VM_PREFIX) > $(VM_PREFIX).v
-./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a: $(VM_PREFIX).v
-	verilator --cc -j $(N_JOBS) -build $(VM_PREFIX).v --prefix V$(VM_PREFIX) $(VERILATOR_FLAGS) -CFLAGS "$(CFLAGS)"
+./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a: ../src/$(VM_PREFIX).v $(wildcard ../src/$(VM_PREFIX)_stage*.v)
+	verilator --cc -j $(N_JOBS) -build $(VM_PREFIX).v --prefix V$(VM_PREFIX) $(VERILATOR_FLAGS) -CFLAGS "$(CFLAGS)" -I../src -I../src/static
 $(LIBNAME): ./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a $(VM_PREFIX)_binder.cc
 	$(CXX) $(CFLAGS) $(LINKFLAGS) $(CXXFLAGS2) -pthread -shared -o $(LIBNAME) $(VM_PREFIX)_binder.cc ./obj_dir/libV$(VM_PREFIX).a ./obj_dir/libverilated.a ./obj_dir/V$(VM_PREFIX)__ALL.a $(EXTRA_CXXFLAGS)

da4ml/codegen/rtl/common_source/build_quartus_prj.tcl ADDED Viewed

@@ -0,0 +1,104 @@
+set project_name "$::env(PROJECT_NAME)"
+set device "$::env(DEVICE)"
+set source_type "$::env(SOURCE_TYPE)"
+set top_module "${project_name}"
+set output_dir "./output_${project_name}"
+file mkdir $output_dir
+file mkdir "${output_dir}/reports"
+project_new "${project_name}" -overwrite -revision "${project_name}"
+set_global_assignment -name FAMILY [lindex [split "${device}" "-"] 0]
+set_global_assignment -name DEVICE "${device}"
+if { "${source_type}" != "vhdl" && "${source_type}" != "verilog" } {
+    puts "Error: SOURCE_TYPE must be either 'vhdl' or 'verilog'."
+    exit 1
+}
+# Add source files based on type
+if { "${source_type}" == "vhdl" } {
+    set_global_assignment -name VHDL_INPUT_VERSION VHDL_2008
+    foreach file [glob -nocomplain "src/static/*.vhd"] {
+        set_global_assignment -name VHDL_FILE "${file}"
+    }
+    set_global_assignment -name VHDL_FILE "src/${project_name}.vhd"
+    foreach file [glob -nocomplain "src/${project_name}_stage*.vhd"] {
+        set_global_assignment -name VHDL_FILE "${file}"
+    }
+} else {
+    foreach file [glob -nocomplain "src/static/*.v"] {
+        set_global_assignment -name VERILOG_FILE "${file}"
+    }
+    set_global_assignment -name VERILOG_FILE "src/${project_name}.v"
+    foreach file [glob -nocomplain "src/${project_name}_stage*.v"] {
+        set_global_assignment -name VERILOG_FILE "${file}"
+    }
+}
+set mems [glob -nocomplain "src/memfiles/*.mem"]
+# VHDL only uses relative path to working dir apparently...
+if { "${source_type}" == "vhdl" } {
+    foreach f $mems {
+        file copy -force $f [file tail $f]
+    }
+    set mems [glob -nocomplain "*.mem"]
+}
+foreach f $mems {
+    set_global_assignment -name MIF_FILE "${f}"
+}
+# Add SDC constraint file if it exists
+if { [file exists "src/${project_name}.sdc"] } {
+    set_global_assignment -name SDC_FILE "${project_name}.sdc"
+}
+# Set top-level entity
+set_global_assignment -name TOP_LEVEL_ENTITY "${top_module}"
+# OOC
+load_package flow
+proc make_all_pins_virtual {} {
+    execute_module -tool map
+    set name_ids [get_names -filter * -node_type pin]
+    foreach_in_collection name_id $name_ids {
+        set pin_name [get_name_info -info full_path $name_id]
+        post_message "Making VIRTUAL_PIN assignment to $pin_name"
+        set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON
+    }
+    export_assignments
+}
+make_all_pins_virtual
+# Config
+set_global_assignment -name OPTIMIZATION_MODE "HIGH PERFORMANCE EFFORT"
+set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
+set_global_assignment -name AUTO_RESOURCE_SHARING ON
+set_global_assignment -name ALLOW_ANY_RAM_SIZE_FOR_RECOGNITION ON
+set_global_assignment -name ALLOW_ANY_ROM_SIZE_FOR_RECOGNITION ON
+set_global_assignment -name ALLOW_REGISTER_RETIMING ON
+set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
+set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
+set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
+set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
+set_global_assignment -name SYNTHESIS_EFFORT AUTO
+set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON
+# Run!!!
+execute_flow -compile
+project_close

da4ml/codegen/rtl/common_source/{build_prj.tcl → build_vivado_prj.tcl} RENAMED Viewed

@@ -1,6 +1,6 @@
-set project_name "${PROJECT_NAME}"
-set device "${DEVICE}"
-set source_type "${SOURCE_TYPE}"
+set project_name "$::env(PROJECT_NAME)"
+set device "$::env(DEVICE)"
+set source_type "$::env(SOURCE_TYPE)"
 set top_module "${project_name}"
 set output_dir "./output_${project_name}"
@@ -17,28 +17,47 @@ if { $source_type != "vhdl" && $source_type != "verilog" } {
 if { $source_type == "vhdl" } {
     set_property TARGET_LANGUAGE VHDL [current_project]
-    read_vhdl -vhdl2008 "${project_name}.vhd"
-    read_vhdl -vhdl2008 "shift_adder.vhd"
-    read_vhdl -vhdl2008 "negative.vhd"
-    read_vhdl -vhdl2008 "mux.vhd"
-    read_vhdl -vhdl2008 "multiplier.vhd"
-    foreach file [glob -nocomplain "${project_name}_stage*.vhd"] {
+    foreach file [glob -nocomplain "src/static/*.vhd"] {
+        read_vhdl -vhdl2008 $file
+    }
+    read_vhdl -vhdl2008 "src/${project_name}.vhd"
+    foreach file [glob -nocomplain "src/${project_name}_stage*.vhd"] {
         read_vhdl -vhdl2008 $file
     }
 } else {
     set_property TARGET_LANGUAGE Verilog [current_project]
-    read_verilog "${project_name}.v"
-    read_verilog "shift_adder.v"
-    read_verilog "negative.v"
-    read_verilog "mux.v"
-    read_verilog "multiplier.v"
-    foreach file [glob -nocomplain "${project_name}_stage*.v"] {
+    foreach file [glob -nocomplain "src/static/*.v"] {
+        read_verilog $file
+    }
+    read_verilog "src/${project_name}.v"
+    foreach file [glob -nocomplain "src/${project_name}_stage*.v"] {
         read_verilog $file
     }
 }
-read_xdc "${project_name}.xdc" -mode out_of_context
+set mems [glob -nocomplain "src/memfiles/*.mem"]
+# VHDL only uses relative path to working dir apparently...
+if { $source_type == "vhdl" } {
+    foreach f $mems {
+        file copy -force $f [file tail $f]
+    }
+    set mems [glob -nocomplain "*.mem"]
+}
+foreach f $mems {
+    add_files -fileset [current_fileset] $f
+    set_property used_in_synthesis true [get_files $f]
+}
+# Add XDC constraint if it exists
+if { [file exists "src/${project_name}.xdc"] } {
+    read_xdc "src/${project_name}.xdc" -mode out_of_context
+}
 set_property top $top_module [current_fileset]
@@ -46,8 +65,8 @@ file mkdir $output_dir
 file mkdir "${output_dir}/reports"
 # synth
-synth_design -top $top_module -mode out_of_context -retiming \
-    -flatten_hierarchy full -resource_sharing auto
+synth_design -top $top_module -mode out_of_context -global_retiming on \
+    -flatten_hierarchy full -resource_sharing auto -directive PerformanceOptimized
 write_checkpoint -force "${output_dir}/${project_name}_post_synth.dcp"
@@ -66,6 +85,7 @@ report_design_analysis -congestion -file "${output_dir}/reports/${project_name}_
 phys_opt_design -directive AggressiveExplore
 write_checkpoint -force "${output_dir}/${project_name}_post_place.dcp"
+file delete -force "${output_dir}/${project_name}_post_synth.dcp"
 report_design_analysis -congestion -file "${output_dir}/reports/${project_name}_post_place_congestion_final.rpt"
@@ -75,6 +95,7 @@ report_utilization -hierarchical -file "${output_dir}/reports/${project_name}_po
 # route
 route_design -directive NoTimingRelaxation
 write_checkpoint -force "${output_dir}/${project_name}_post_route.dcp"
+file delete -force "${output_dir}/${project_name}_post_place.dcp"
 report_timing_summary -file "${output_dir}/reports/${project_name}_post_route_timing.rpt"

da4ml/codegen/rtl/common_source/template.sdc ADDED Viewed

@@ -0,0 +1,27 @@
+set clock_period $::env(CLOCK_PERIOD)
+# Clock uncertainty as percentage of clock period
+set uncertainty_setup_r $::env(UNCERTAINITY_SETUP)
+set uncertainty_hold_r $::env(UNCERTAINITY_HOLD)
+set delay_max_r $::env(DELAY_MAX)
+set delay_min_r $::env(DELAY_MIN)
+# Calculate actual uncertainty values
+set uncertainty_setup [expr {$clock_period * $uncertainty_setup_r}]
+set uncertainty_hold [expr {$clock_period * $uncertainty_hold_r}]
+set delay_max [expr {$clock_period * $delay_max_r}]
+set delay_min [expr {$clock_period * $delay_min_r}]
+# Create clock with variable period
+create_clock -period $clock_period -name sys_clk [get_ports {clk}]
+# Input/Output constraints
+set_input_delay -clock sys_clk -max $delay_max [get_ports {model_inp[*]}]
+set_input_delay -clock sys_clk -min $delay_min [get_ports {model_inp[*]}]
+set_output_delay -clock sys_clk -max $delay_max [get_ports {model_out[*]}]
+set_output_delay -clock sys_clk -min $delay_min [get_ports {model_out[*]}]
+# Apply calculated uncertainty values
+set_clock_uncertainty -setup -to [get_clocks sys_clk] $uncertainty_setup
+set_clock_uncertainty -hold -to [get_clocks sys_clk] $uncertainty_hold

da4ml/codegen/rtl/common_source/template.xdc CHANGED Viewed

@@ -1,10 +1,10 @@
-set clock_period ${CLOCK_PERIOD}
+set clock_period $::env(CLOCK_PERIOD)
 # Clock uncertainty as percentage of clock period
-set uncertainty_setup_r ${UNCERTAINITY_SETUP}
-set uncertainty_hold_r ${UNCERTAINITY_HOLD}
-set delay_max_r ${DELAY_MAX}
-set delay_min_r ${DELAY_MIN}
+set uncertainty_setup_r $::env(UNCERTAINITY_SETUP)
+set uncertainty_hold_r $::env(UNCERTAINITY_HOLD)
+set delay_max_r $::env(DELAY_MAX)
+set delay_min_r $::env(DELAY_MIN)
 # Calculate actual uncertainty values
 set uncertainty_setup [expr {$clock_period * $uncertainty_setup_r}]
@@ -16,17 +16,15 @@ set delay_min [expr {$clock_period * $delay_min_r}]
 create_clock -period $clock_period -name sys_clk [get_ports {clk}]
 # Input/Output constraints
-set_input_delay -clock sys_clk -max $delay_max [get_ports {inp[*]}]
-set_input_delay -clock sys_clk -min $delay_min [get_ports {inp[*]}]
+set_input_delay -clock sys_clk -max $delay_max [get_ports {model_inp[*]}]
+set_input_delay -clock sys_clk -min $delay_min [get_ports {model_inp[*]}]
-set_output_delay -clock sys_clk -max $delay_max [get_ports {out[*]}]
-set_output_delay -clock sys_clk -min $delay_min [get_ports {out[*]}]
+set_output_delay -clock sys_clk -max $delay_max [get_ports {model_out[*]}]
+set_output_delay -clock sys_clk -min $delay_min [get_ports {model_out[*]}]
 # Apply calculated uncertainty values
 set_clock_uncertainty -setup $uncertainty_setup [get_clocks sys_clk]
 set_clock_uncertainty -hold $uncertainty_hold [get_clocks sys_clk]
-set_property HD.CLK_SRC BUFG_X0Y0 [get_ports clk]
-set_property retiming_forward 1 [get_cells {stage[*]_inp}]
-set_property retiming_backward 1 [get_cells {stage[*]_inp}]
+# Mark lut_rom to be implemented as distributed ROM (*rom_style = "distributed" *)
+set_property rom_style "distributed" [get_cells lut_rom]

da4ml 0.4.0__py3-none-any.whl → 0.5.0b0__py3-none-any.whl

Potentially problematic release.

da4ml 0.4.0py3-none-any.whl → 0.5.0b0py3-none-any.whl