PyPI - accelforge - Versions diffs - 0.0.1__py3-none-any.whl - Mend

accelforge 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (258) hide show

accelforge/__init__.py +21 -0
accelforge/_accelerated_imports.py +16 -0
accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
accelforge/_deprecate/_simanneal/simanneal.py +666 -0
accelforge/_deprecate/_simanneal/tracking.py +105 -0
accelforge/_deprecate/_simanneal/wrappers.py +218 -0
accelforge/_deprecate/_simanneal2/__init__.py +7 -0
accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
accelforge/_deprecate/_simanneal2/tracking.py +116 -0
accelforge/_deprecate/compatibility_util.py +181 -0
accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
accelforge/_deprecate/tags.py +69 -0
accelforge/_deprecate/viz/__init__.py +0 -0
accelforge/_deprecate/viz/interactive.py +159 -0
accelforge/_deprecate/viz/reservationtree.py +307 -0
accelforge/_deprecate/viz/ski_slope.py +88 -0
accelforge/_version.py +15 -0
accelforge/examples.py +39 -0
accelforge/frontend/__init__.py +10 -0
accelforge/frontend/_binding.py +129 -0
accelforge/frontend/_workload_isl/__init__.py +2 -0
accelforge/frontend/_workload_isl/_isl.py +149 -0
accelforge/frontend/_workload_isl/_symbolic.py +141 -0
accelforge/frontend/arch copy.py +1544 -0
accelforge/frontend/arch.py +1642 -0
accelforge/frontend/config.py +63 -0
accelforge/frontend/mapper/__init__.py +5 -0
accelforge/frontend/mapper/ffm.py +126 -0
accelforge/frontend/mapper/mapper.py +7 -0
accelforge/frontend/mapper/metrics.py +30 -0
accelforge/frontend/mapping/__init__.py +1 -0
accelforge/frontend/mapping/mapping.py +1736 -0
accelforge/frontend/model.py +14 -0
accelforge/frontend/renames.py +150 -0
accelforge/frontend/spec copy.py +230 -0
accelforge/frontend/spec.py +301 -0
accelforge/frontend/variables.py +12 -0
accelforge/frontend/workload.py +952 -0
accelforge/mapper/FFM/__init__.py +9 -0
accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
accelforge/mapper/FFM/data.py +61 -0
accelforge/mapper/FFM/main copy.py +236 -0
accelforge/mapper/FFM/main.py +208 -0
accelforge/mapper/FFM/mappings.py +510 -0
accelforge/mapper/FFM/pmappings.py +310 -0
accelforge/mapper/__init__.py +4 -0
accelforge/mapper.py +0 -0
accelforge/model/__init__.py +1 -0
accelforge/model/_looptree/__init__.py +0 -0
accelforge/model/_looptree/accesses.py +335 -0
accelforge/model/_looptree/capacity/__init__.py +1 -0
accelforge/model/_looptree/capacity/aggregators.py +36 -0
accelforge/model/_looptree/capacity/capacity.py +47 -0
accelforge/model/_looptree/energy.py +150 -0
accelforge/model/_looptree/equivalent_ranks.py +29 -0
accelforge/model/_looptree/latency/__init__.py +1 -0
accelforge/model/_looptree/latency/latency.py +98 -0
accelforge/model/_looptree/latency/memory.py +120 -0
accelforge/model/_looptree/latency/processors.py +92 -0
accelforge/model/_looptree/mapping_utilities.py +71 -0
accelforge/model/_looptree/reuse/__init__.py +4 -0
accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
accelforge/model/_looptree/reuse/isl/des.py +59 -0
accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
accelforge/model/_looptree/run.py +122 -0
accelforge/model/_looptree/types.py +26 -0
accelforge/model/_looptree/visualization/__init__.py +0 -0
accelforge/model/_looptree/visualization/occupancy.py +11 -0
accelforge/model/main.py +222 -0
accelforge/plotting/__init__.py +2 -0
accelforge/plotting/mappings.py +219 -0
accelforge/plotting/specs.py +57 -0
accelforge/util/__init__.py +4 -0
accelforge/util/_base_analysis_types.py +24 -0
accelforge/util/_basetypes.py +1089 -0
accelforge/util/_frozenset.py +36 -0
accelforge/util/_isl.py +29 -0
accelforge/util/_itertools.py +14 -0
accelforge/util/_mathfuncs.py +57 -0
accelforge/util/_parse_expressions.py +339 -0
accelforge/util/_picklecache.py +32 -0
accelforge/util/_setexpressions.py +268 -0
accelforge/util/_sympy/__init__.py +0 -0
accelforge/util/_sympy/broadcast_max.py +18 -0
accelforge/util/_visualization.py +112 -0
accelforge/util/_yaml.py +579 -0
accelforge/util/parallel.py +193 -0
accelforge-0.0.1.dist-info/METADATA +64 -0
accelforge-0.0.1.dist-info/RECORD +258 -0
accelforge-0.0.1.dist-info/WHEEL +5 -0
accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
accelforge-0.0.1.dist-info/top_level.txt +5 -0
docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
docs/_build/html/_sources/fastfusion.rst.txt +20 -0
docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
docs/_build/html/_sources/index.rst.txt +87 -0
docs/_build/html/_sources/modules.rst.txt +7 -0
docs/_build/html/_sources/notes/citation.rst.txt +45 -0
docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
docs/_build/html/_sources/notes/spec.rst.txt +36 -0
docs/source/_ext/include_attrs.py +213 -0
docs/source/_ext/include_docstring.py +364 -0
docs/source/_ext/include_functions.py +154 -0
docs/source/_ext/include_notebook.py +131 -0
docs/source/_ext/include_yaml.py +119 -0
docs/source/_ext/inherited_attributes.py +222 -0
docs/source/_ext/paths.py +4 -0
docs/source/conf.py +79 -0
examples/arches/compute_in_memory/_include.yaml +74 -0
examples/arches/compute_in_memory/_include_functions.py +229 -0
examples/arches/compute_in_memory/_load_spec.py +57 -0
examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
examples/arches/compute_in_memory/components/misc.py +195 -0
examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
examples/arches/compute_in_memory/isaac.yaml +233 -0
examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
examples/arches/eyeriss.yaml +68 -0
examples/arches/fanout_variations/at_glb.yaml +31 -0
examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
examples/arches/fanout_variations/at_mac.yaml +31 -0
examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
examples/arches/nvdla.yaml +47 -0
examples/arches/simple.yaml +28 -0
examples/arches/tpu_v4i.yaml +67 -0
examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
examples/misc/component_annotated.yaml +33 -0
examples/workloads/gpt3_6.7B.yaml +124 -0
examples/workloads/matmuls.yaml +20 -0
examples/workloads/mobilenet_28.yaml +81 -0
examples/workloads/mobilenet_various_separate.yaml +106 -0
examples/workloads/three_matmuls_annotated.yaml +59 -0
notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
notebooks/compute_in_memory/_scripts.py +339 -0
notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
notebooks/paths.py +4 -0
notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
notebooks/tutorials/FFM.ipynb +3498 -0
notebooks/tutorials/_include.py +48 -0
notebooks/tutorials/component_energy_area.ipynb +363 -0
tests/Q_mapping.yaml +38 -0
tests/__init__.py +0 -0
tests/conv.mapping.yaml +27 -0
tests/conv.workload.yaml +13 -0
tests/conv_sym.mapping.yaml +43 -0
tests/copy.mapping.yaml +35 -0
tests/copy.workload.yaml +15 -0
tests/distribuffers/__init__.py +0 -0
tests/distribuffers/multicast/test_cases.yaml +482 -0
tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
tests/distribuffers/spec/distributed.yaml +100 -0
tests/distribuffers/spec/logical_arch.yaml +32 -0
tests/distribuffers/spec/physical_arch.yaml +69 -0
tests/distribuffers/test_binding.py +48 -0
tests/frontend/__init__.py +0 -0
tests/frontend/test_mapping_viz.py +52 -0
tests/mapper/__init__.py +0 -0
tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
tests/mapper/test_mapping_to_isl.py +90 -0
tests/mapper/test_spatial_reuse_analysis.py +67 -0
tests/mapper/test_temporal_reuse_analysis.py +56 -0
tests/mapper/util.py +58 -0
tests/matmul.mapping.yaml +29 -0
tests/matmul.workload.yaml +12 -0
tests/matmul_spatial.mapping.yaml +44 -0
tests/mha.renames.yaml +65 -0
tests/mha.workload.yaml +67 -0
tests/mha.yaml +59 -0
tests/mha_full.workload.yaml +67 -0
tests/mobilenet.workload.yaml +35 -0
tests/mobilenet_long.workload.yaml +64 -0
tests/pmappingcache.py +24 -0
tests/processing_stage.arch.yaml +40 -0
tests/snowcat.arch.yaml +36 -0
tests/test_ffm_join_pmappings.py +106 -0
tests/test_ffm_make_pmappings.py +82 -0
tests/test_ffm_make_tile_shapes.py +49 -0
tests/test_mapper.py +100 -0
tests/test_model.py +37 -0
tests/test_plotting.py +72 -0
tests/test_processing_stage.py +46 -0
tests/test_symbolic_model.py +248 -0
tests/test_workload.py +141 -0

notebooks/compute_in_memory/_scripts.py ADDED Viewed

@@ -0,0 +1,339 @@
+import sys
+import os
+from IPython.display import display, Markdown
+THIS_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(THIS_SCRIPT_DIR)
+sys.path.append(
+    os.path.join(THIS_SCRIPT_DIR, "..", "..", "examples", "arches", "compute_in_memory")
+)
+from _load_spec import get_spec as _get_spec
+import accelforge as af
+def display_markdown(markdown):
+    display(Markdown(markdown))
+def get_spec(name: str, add_dummy_main_memory: bool = False) -> af.Spec:
+    return _get_spec(name, add_dummy_main_memory=add_dummy_main_memory)
+# import difflib
+# import re
+# import svgutils
+# from IPython.display import SVG, display, Markdown
+# from .utils import *
+# DIAGRAM_DEFAULT_IGNORE = ("system", "macro_in_system", "1bit_x_1bit_mac")
+# def grab_from_yaml_file(
+#     yaml_file, startfrom=None, same_indent=True, include_lines_before=0
+# ):
+#     with open(yaml_file, "r") as f:
+#         contents = f.readlines()
+#     start, end = 0, len(contents)
+#     n_whitespace = 0
+#     if startfrom is None:
+#         return "".join(contents)
+#     for i, line in enumerate(contents):
+#         if re.findall(r"\b\s*" + startfrom + r"\b", line):
+#             start = i
+#             n_whitespace = len(re.findall(r"^\s*", line)[0])
+#             break
+#     else:
+#         raise ValueError(f"{startfrom} not found in {yaml_file}")
+#     for i, line in enumerate(contents[start + 1 :]):
+#         ws = len(re.findall(r"^\s*", line)[0])
+#         if ws < n_whitespace or (not same_indent and ws == n_whitespace):
+#             end = start + i + 1
+#             break
+#     return "".join(
+#         c[n_whitespace:] for c in contents[start - include_lines_before : end]
+#     )
+# def scale_svg(svg, scale=0.5):
+#     svg = svgutils.transform.fromstring(svg.decode("ascii"))
+#     svg = svgutils.compose.Figure(svg.width, svg.height, svg.getroot())
+#     svg = svg.scale(scale)
+#     svg.width = svg.width * scale
+#     svg.height = svg.height * scale
+#     return svg
+# def display_diagram(diagram, scale=0.5):
+#     display(SVG(scale_svg(diagram.create_svg(), scale).tostr()))
+# def display_markdown(markdown):
+#     display(Markdown(markdown))
+# def display_yaml_file(*args, **kwargs):
+#     display_yaml_str(grab_from_yaml_file(*args, **kwargs))
+# def display_yaml_str(yaml_str):
+#     display_markdown(f"```yaml\n{yaml_str}```")
+# def get_yaml_file_markdown(yaml_file, *args, **kwargs):
+#     return f"```yaml\n{grab_from_yaml_file(yaml_file, *args, **kwargs)}```"
+# def get_yaml_str_markdown(yaml_str):
+#     return f"```yaml\n{yaml_str}```"
+def display_important_variables(name: str):
+    result = []
+    result.append(f"Some of the important variables for {name}:\n")
+    def pfmat(key, value, note=""):
+        result.append(f"- *{key}*: {value} {note if note else ''}")
+    s: af.Spec = get_spec(name)
+    s.calculate_component_area_energy_latency_leak(einsum_name=s.workload.einsums[0].name)
+    def getvalue(key):
+        return s.variables.get(key, s.arch.arch_globals_dependent_on_workload.get(key, None))
+    for v in [
+        ("array_wordlines", "rows in the array"),
+        ("array_bitlines", "columns in the array"),
+        (
+            "array_parallel_inputs",
+            "input slice(s) consumed in each cycle.",
+        ),
+        (
+            "array_parallel_weights",
+            "weights slice(s) used for computation in each cycle.",
+        ),
+        ("array_parallel_outputs", "partial sums produced in each cycle."),
+        ("tech_node", "m"),
+        ("adc_resolution", "bit(s)"),
+        ("dac_resolution", "bit(s)"),
+        ("n_adc_per_bank", "ADC(s)"),
+        ("supported_input_bits", "bit(s)"),
+        ("supported_output_bits", "bit(s)"),
+        ("supported_weight_bits", "bit(s)"),
+        ("bits_per_cell", "bit(s)"),
+        (
+            "cim_unit_width_cells",
+            "adjacent cell(s) in a wordline store bit(s) in one weight slice and process one input & output slice together",
+        ),
+        (
+            "cim_unit_depth_cells",
+            "adjacent cell(s) in a bitline operate in separate cycles",
+        ),
+        "cell_config",
+        ("cycle_period", "second(s)"),
+    ]:
+        if isinstance(v, tuple):
+            pfmat(v[0], getvalue(v[0]), v[1])
+        else:
+            pfmat(v, s.variables.get(v, None))
+    display_markdown("\n".join(result))
+# def clean_old_output_files(max_files=50):
+#     out_path = os.path.join(THIS_SCRIPT_DIR, "..", "outputs")
+#     files = sorted(
+#         list(os.path.join(out_path, f) for f in os.listdir(out_path)),
+#         key=lambda x: os.path.getmtime(x),
+#     )
+#     while len(files) > max_files:
+#         shutil.rmtree(
+#             files.pop(0),
+#             ignore_errors=True,
+#         )
+# def run_test(
+#     macro_name: str,
+#     test_name: str,
+#     show_doc: bool = True,
+#     *args,
+#     **kwargs,
+# ):
+#     test_func = get_test(macro_name, test_name)
+#     if show_doc:
+#         doc = test_func.__doc__
+#         doc = "\n".join([line[1:] for line in doc.split("\n")])
+#         display_markdown(doc)
+#     t = test_func(*args, **kwargs)
+#     clean_old_output_files()
+#     return t
+# def diff_str(a, b):
+#     new_a, new_b = [], []
+#     a = re.findall(r"[\w\.]+|\s+|.", a)
+#     b = re.findall(r"[\w\.]+|\s+|.", b)
+#     # print(f'Diffing {a} and {b}')
+#     matcher = difflib.SequenceMatcher(None, a, b)
+#     for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+#         if tag == "equal":
+#             new_a.extend(a[i1:i2])
+#             new_b.extend(b[j1:j2])
+#         elif tag == "replace":
+#             new_a.extend([f"\033[31m{l}\033[0m" for l in a[i1:i2]])
+#             new_b.extend([f"\033[31m{l}\033[0m" for l in b[j1:j2]])
+#         elif tag == "delete":
+#             new_a.extend([f"\033[31m{l}\033[0m" for l in a[i1:i2]])
+#         elif tag == "insert":
+#             new_b.extend([f"\033[31m{l}\033[0m" for l in b[j1:j2]])
+#     return "".join(new_a), "".join(new_b)
+# def print_side_by_side(a, b):
+#     a_lines = a.splitlines()
+#     b_lines = b.splitlines()
+#     # Use difflib to match up lines
+#     matcher = difflib.SequenceMatcher(None, a_lines, b_lines)
+#     # Insert blank lines to line up the matches
+#     a = []
+#     b = []
+#     for _, i1, i2, j1, j2 in matcher.get_opcodes():
+#         a.extend(a_lines[i1:i2])
+#         b.extend(b_lines[j1:j2])
+#         a.extend([""] * (len(b) - len(a)))
+#         b.extend([""] * (len(a) - len(b)))
+#     max_a_len = max(len(line) for line in a)
+#     a = [line.ljust(max_a_len) for line in a]
+#     for i in range(len(a)):
+#         a[i], b[i] = diff_str(a[i], b[i])
+#         if a[i] and not b[i]:
+#             a[i] = f"\033[31m{a[i]}\033[0m"
+#         elif not a[i] and b[i]:
+#             b[i] = f"\033[31m{b[i]}\033[0m"
+#     for a_line, b_line in zip(a, b):
+#         print(f"{a_line}   |   {b_line}")
+from math import isclose
+import matplotlib.pyplot as plt
+def bar_stacked(
+    data: dict[dict[str, float]],
+    xlabel: str,
+    ylabel: str,
+    title: str,
+    ax: plt.Axes,
+):
+    """Create a stacked bar chart from nested dictionary data.
+    Args:
+        data: Nested dict where outer keys are x-axis categories,
+              inner keys are stack categories, values are heights
+        xlabel: Label for x-axis
+        ylabel: Label for y-axis
+        title: Chart title
+        ax: Matplotlib axes to plot on
+    """
+    import numpy as np
+    # Get all categories
+    x_categories = list(data.keys())
+    stack_categories = list(set(k for inner_dict in data.values() for k in inner_dict.keys()))
+    # Prepare data for stacking
+    x_pos = np.arange(len(x_categories))
+    bottoms = np.zeros(len(x_categories))
+    # Plot each stack category
+    for stack_cat in stack_categories:
+        heights = [data[x_cat].get(stack_cat, 0) for x_cat in x_categories]
+        ax.bar(x_pos, heights, label=stack_cat, bottom=bottoms)
+        bottoms += heights
+    # Set labels and formatting
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+    ax.set_title(title)
+    ax.set_xticks(x_pos)
+    ax.set_xticklabels(x_categories, rotation=45, ha='right')
+    ax.legend()
+    ax.grid(axis='y', alpha=0.3)
+def bar_comparison(
+    data_dict: dict[str, dict[str, float]],
+    xlabel: str,
+    ylabel: str,
+    title: str,
+    ax: plt.Axes,
+):
+    """Create grouped bar chart comparing multiple datasets.
+    Args:
+        data_dict: Dict where keys are series names (e.g., "Modeled", "Expected"),
+                   values are dicts mapping category to value
+        xlabel: Label for x-axis
+        ylabel: Label for y-axis
+        title: Chart title
+        ax: Matplotlib axes to plot on
+    """
+    import numpy as np
+    # Get categories (use first dataset's keys)
+    categories = list(next(iter(data_dict.values())).keys())
+    series_names = list(data_dict.keys())
+    # Set up bar positions
+    x = np.arange(len(categories))
+    width = 0.8 / len(series_names)  # Total width divided by number of series
+    # Plot each series
+    for i, series_name in enumerate(series_names):
+        offset = (i - len(series_names)/2 + 0.5) * width
+        values = [data_dict[series_name][cat] for cat in categories]
+        ax.bar(x + offset, values, width, label=series_name)
+    # Set labels and formatting
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+    ax.set_title(title)
+    ax.set_xticks(x)
+    ax.set_xticklabels(categories, rotation=45, ha='right')
+    ax.legend()
+    ax.grid(axis='y', alpha=0.3)
+def bar(
+    data: dict[str, float],
+    xlabel: str,
+    ylabel: str,
+    title: str,
+    ax: plt.Axes,
+):
+    """Create a simple bar chart from a dictionary.
+    Args:
+        data: Dict mapping category names to values
+        xlabel: Label for x-axis
+        ylabel: Label for y-axis
+        title: Chart title
+        ax: Matplotlib axes to plot on
+    """
+    import numpy as np
+    categories = list(data.keys())
+    values = list(data.values())
+    x = np.arange(len(categories))
+    ax.bar(x, values)
+    # Set labels and formatting
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+    ax.set_title(title)
+    ax.set_xticks(x)
+    ax.set_xticklabels(categories, rotation=45, ha='right')
+    ax.grid(axis='y', alpha=0.3)

notebooks/compute_in_memory/isaac.guide.ipynb ADDED Viewed

@@ -0,0 +1,270 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Model of \"ISAAC: A Convolutional Neural Network Accelerator with In-Situ Analog Arithmetic in Crossbars!\", ISCA 2016\n",
+    "\n",
+    "Paper by Ali Shafiee, Anirban Nag, Naveen Muralimanohar, Rajeev Balasubramonian,\n",
+    "John Paul Strachan, Miao Hu, R. Stanley Williams, and Vivek Srikumar.\n",
+    "\n",
+    "ISAAC is a ReRAM-based analog CiM accelerator. It explores several concepts in\n",
+    "CiM acceleration, including storing different layers in different arrays and\n",
+    "pipelining inputs/outputs between."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from _scripts import (\n",
+    "    display_important_variables,\n",
+    "    get_spec,\n",
+    "    bar_comparison,\n",
+    "    bar_stacked,\n",
+    "    bar,\n",
+    ")\n",
+    "display_important_variables('isaac')\n",
+    "get_spec('isaac').arch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "### Tile Level\n",
+    "\n",
+    "Twelve macros (called IMAs in the paper) are organized into a tile. Each tile\n",
+    "includes a 64kB eDRAM buffer storing 16b inputs/outputs and quantization\n",
+    "circuits. The original paper included sigmoid units at this level, but we\n",
+    "replaced them with quantization circuits to match the other works. ISAAC uses\n",
+    "16b fixed-point quantization for all operands.\n",
+    "\n",
+    "- *Input Path*: Inputs are stored in the eDRAM. An inter-macro network sends\n",
+    "  inputs to macros in the tile.\n",
+    "- *Weight Path*: Weights are kept static in inference and do not move through\n",
+    "  this level.\n",
+    "- *Output Path*: Outputs are gathered from macros via the inter-tile network.\n",
+    "  They are quantized before being stored in the eDRAM.\n",
+    "\n",
+    "Next, there are 12 macros in each tile. Inputs and outputs are unicast between\n",
+    "macros.\n",
+    "\n",
+    "### Macro Level\n",
+    "\n",
+    "Eight arrays are organized into a macro with an input register and output\n",
+    "register. An input network sends input vectors to arrays.\n",
+    "\n",
+    "The eight arrays can process up to 8×128 = 1024 inputs across all rows, so the\n",
+    "input register is sized 2kB (2B per input). The output register is sized 256B\n",
+    "(2B per output, 128 outputs total (8 arrays × 128 columns × 2b per column / 16b\n",
+    "per output)). While the paper does not do this, we double output buffer size to\n",
+    "account for higher-precision accumulation that is important for lower-precision\n",
+    "quantization.\n",
+    "\n",
+    "- *Input Path*: Inputs are stored in the input buffer and multicast between\n",
+    "  arrays.\n",
+    "- *Weight Path*: Weights are kept static in inference and do not move through\n",
+    "  this level.\n",
+    "- *Output Path*: Outputs are stored in the output buffer and spatially reduced\n",
+    "  between arrays. Before the output buffer, a shift+add circuit accumulates\n",
+    "  outputs and corrects for offsets caused by slicing.\n",
+    "\n",
+    "Next, there are 8 arrays in each macro. Inputs and outputs can be spatially\n",
+    "reused across arrays with a multicast/reduction network.\n",
+    "\n",
+    "### Array Level\n",
+    "\n",
+    "Arrays consist of 128 × 128 ReRAMs. Each array is programmed with weights from\n",
+    "one DNN layer, and each weight filter uses 8 array columns (16b weights, 2b per\n",
+    "column). 1-bit DACs encode inputs across 16 cycles and 8-bit ADCs convert\n",
+    "outputs from each column.\n",
+    "\n",
+    "We note that the original ISAAC paper included a contribution to decrease\n",
+    "required ADC precision. Instead of supporting between 0 and the maximum output\n",
+    "of a column, ISAAC supported only half of the range. They ensured that all\n",
+    "column outputs would be in this range at program time. If the average weight\n",
+    "slice value in a column was less than half of the maximum output, the column\n",
+    "could not saturate the ADC. If the average weight slice value was greater than\n",
+    "half of the maximum output, ISAAC would store the negated value of the weights.\n",
+    "To correct for this, ISAAC would need to record sums of the input values, record\n",
+    "which weight columns were negated, and perform arithmetic to recover the real\n",
+    "sums from the negated sums.\n",
+    "\n",
+    "When we modeled ISAAC's accuracy, we found that this technique was not helpful\n",
+    "across any tested workloads because weights tended to have about half of the\n",
+    "maximum value and input bits tended to have >50% sparsity, so on average output\n",
+    "of a column was around 25% of the output range anyway and never exceeded 50%. We\n",
+    "can therefore just use the lower half of the ADC range to achieve the same\n",
+    "result (lower ADC precision) without any of the additional complexity introduced\n",
+    "by this strategy. For this reason, we don't model this technique in our ISAAC\n",
+    "model.\n",
+    "\n",
+    "Inputs and weights are both assumed to be 16b unsigned fixed-point numbers.\n",
+    "Signed inputs and weights are converted by adding a bias to the inputs and\n",
+    "weights.\n",
+    "\n",
+    "- *Input Path*: Inputs pass through a 1-bit DACs and appear on the rows of the\n",
+    "  array.\n",
+    "- *Weight Path*: Weights are stored in the array and are not moved during\n",
+    "  inference.\n",
+    "- *Output Path*: Outputs are read from the columns of the array with 8-bit ADCs.\n",
+    "\n",
+    "Next, there are 128 columns in each array. Inputs are reused between columns\n",
+    "(*i.e.,* each input-carrying wire connects to all columns), while outputs and\n",
+    "weights are not reused.\n",
+    "\n",
+    "### Column Level\n",
+    "\n",
+    "Each column consists of 128 ReRAM devices. Columns store 2b weight slices.\n",
+    "\n",
+    "- *Input Path*: Each input is passed directly to a row in the column.\n",
+    "- *Weight Path*: Weights are not moved during inference.\n",
+    "- *Output Path*: Outputs pass through a current mirror to buffer their values\n",
+    "  before exiting the column.\n",
+    "\n",
+    "### Row Level\n",
+    "\n",
+    "Each row in a column has one ReRAM device which stores an offset-encoded 2b\n",
+    "weight slice.\n",
+    "\n",
+    "- *Input Path*: The input is used for a MAC operation.\n",
+    "- *Weight Path*: A 2b weight is stored in the ReRAM device and is used for a MAC\n",
+    "  operation.\n",
+    "- *Output Path*: The output is supplied by a MAC operation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import accelforge as af\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Energy Breakdown\n",
+    "This test explores the energy, area, and latency of the accelerator\n",
+    "computing MVM operations. We note a few differences from the original ISAAC\n",
+    "paper. Notably, we made a few changes to the quantization, and we use\n",
+    "data-value-dependent models while ISAAC used a simple fixed-power model.\n",
+    "\n",
+    "We note:\n",
+    "- Energy is dominated by the ADC and memory cells due to the high ADC precision\n",
+    "  and large number of slices.\n",
+    "- Area is dominated by ADC.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "af.set_n_parallel_jobs(1)\n",
+    "spec = get_spec('isaac', add_dummy_main_memory=True)\n",
+    "spec.mapper.afm.metrics = af.mapper.FFM.Metrics.ENERGY\n",
+    "results = af.mapper.FFM.map_workload_to_arch(spec)\n",
+    "energy = results.per_compute().energy(per_component=True)\n",
+    "\n",
+    "spec_energy_area = spec.calculate_component_area_energy_latency_leak()\n",
+    "area = spec_energy_area.arch.per_component_total_area\n",
+    "\n",
+    "fig, ax = plt.subplots(1, 1, figsize=(10, 5))\n",
+    "bar(\n",
+    "    energy,\n",
+    "    \"Component\",\n",
+    "    \"Energy/Compute (J)\",\n",
+    "    \"Energy Breakdown\",\n",
+    "    ax,\n",
+    ")\n",
+    "\n",
+    "fig, ax = plt.subplots(1, 1, figsize=(10, 5))\n",
+    "bar(\n",
+    "    area,\n",
+    "    \"Component\",\n",
+    "    \"Area (m^2)\",\n",
+    "    \"Area Breakdown\",\n",
+    "    ax,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import sys\n",
+    "# import os\n",
+    "\n",
+    "# # fmt: oaf\n",
+    "# THIS_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))\n",
+    "# MACRO_NAME = os.path.basename(THIS_SCRIPT_DIR)\n",
+    "# sys.path.append(os.path.abspath(os.path.join(THIS_SCRIPT_DIR, '..', '..', '..', '..')))\n",
+    "# from scripts import utils as utl\n",
+    "# import scripts\n",
+    "# # fmt: on\n",
+    "\n",
+    "# def test_full_dnn(dnn_name: str):\n",
+    "#     \"\"\"\n",
+    "#     This test explores the energy, area, and latency of the accelerator when\n",
+    "#     running full DNN workloads.\n",
+    "#     \"\"\"\n",
+    "#     dnn_dir = utl.path_from_model_dir(f\"workloads/{dnn_name}\")\n",
+    "#     layer_paths = [\n",
+    "#         os.path.join(dnn_dir, l) for l in os.listdir(dnn_dir) if l.endswith(\".yaml\")\n",
+    "#     ]\n",
+    "\n",
+    "#     layer_paths = [l for l in layer_paths if \"From einsum\" not in open(l, \"r\").read()]\n",
+    "\n",
+    "#     results = utl.parallel_test(\n",
+    "#         utl.delayed(utl.run_layer)(\n",
+    "#             macro=MACRO_NAME,\n",
+    "#             layer=l,\n",
+    "#             tile=\"isaac\",\n",
+    "#             chip=\"large_router\",\n",
+    "#         )\n",
+    "#         for l in layer_paths\n",
+    "#     )\n",
+    "#     results.clear_zero_energies()\n",
+    "#     return results\n",
+    "\n",
+    "\n",
+    "# if __name__ == \"__main__\":\n",
+    "#     test_energy_breakdown()\n",
+    "#     test_full_dnn(\"resnet18\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}