PyPI - accelforge - Versions diffs - 0.0.1__py3-none-any.whl - Mend

accelforge 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (258) hide show

accelforge/__init__.py +21 -0
accelforge/_accelerated_imports.py +16 -0
accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
accelforge/_deprecate/_simanneal/simanneal.py +666 -0
accelforge/_deprecate/_simanneal/tracking.py +105 -0
accelforge/_deprecate/_simanneal/wrappers.py +218 -0
accelforge/_deprecate/_simanneal2/__init__.py +7 -0
accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
accelforge/_deprecate/_simanneal2/tracking.py +116 -0
accelforge/_deprecate/compatibility_util.py +181 -0
accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
accelforge/_deprecate/tags.py +69 -0
accelforge/_deprecate/viz/__init__.py +0 -0
accelforge/_deprecate/viz/interactive.py +159 -0
accelforge/_deprecate/viz/reservationtree.py +307 -0
accelforge/_deprecate/viz/ski_slope.py +88 -0
accelforge/_version.py +15 -0
accelforge/examples.py +39 -0
accelforge/frontend/__init__.py +10 -0
accelforge/frontend/_binding.py +129 -0
accelforge/frontend/_workload_isl/__init__.py +2 -0
accelforge/frontend/_workload_isl/_isl.py +149 -0
accelforge/frontend/_workload_isl/_symbolic.py +141 -0
accelforge/frontend/arch copy.py +1544 -0
accelforge/frontend/arch.py +1642 -0
accelforge/frontend/config.py +63 -0
accelforge/frontend/mapper/__init__.py +5 -0
accelforge/frontend/mapper/ffm.py +126 -0
accelforge/frontend/mapper/mapper.py +7 -0
accelforge/frontend/mapper/metrics.py +30 -0
accelforge/frontend/mapping/__init__.py +1 -0
accelforge/frontend/mapping/mapping.py +1736 -0
accelforge/frontend/model.py +14 -0
accelforge/frontend/renames.py +150 -0
accelforge/frontend/spec copy.py +230 -0
accelforge/frontend/spec.py +301 -0
accelforge/frontend/variables.py +12 -0
accelforge/frontend/workload.py +952 -0
accelforge/mapper/FFM/__init__.py +9 -0
accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
accelforge/mapper/FFM/data.py +61 -0
accelforge/mapper/FFM/main copy.py +236 -0
accelforge/mapper/FFM/main.py +208 -0
accelforge/mapper/FFM/mappings.py +510 -0
accelforge/mapper/FFM/pmappings.py +310 -0
accelforge/mapper/__init__.py +4 -0
accelforge/mapper.py +0 -0
accelforge/model/__init__.py +1 -0
accelforge/model/_looptree/__init__.py +0 -0
accelforge/model/_looptree/accesses.py +335 -0
accelforge/model/_looptree/capacity/__init__.py +1 -0
accelforge/model/_looptree/capacity/aggregators.py +36 -0
accelforge/model/_looptree/capacity/capacity.py +47 -0
accelforge/model/_looptree/energy.py +150 -0
accelforge/model/_looptree/equivalent_ranks.py +29 -0
accelforge/model/_looptree/latency/__init__.py +1 -0
accelforge/model/_looptree/latency/latency.py +98 -0
accelforge/model/_looptree/latency/memory.py +120 -0
accelforge/model/_looptree/latency/processors.py +92 -0
accelforge/model/_looptree/mapping_utilities.py +71 -0
accelforge/model/_looptree/reuse/__init__.py +4 -0
accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
accelforge/model/_looptree/reuse/isl/des.py +59 -0
accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
accelforge/model/_looptree/run.py +122 -0
accelforge/model/_looptree/types.py +26 -0
accelforge/model/_looptree/visualization/__init__.py +0 -0
accelforge/model/_looptree/visualization/occupancy.py +11 -0
accelforge/model/main.py +222 -0
accelforge/plotting/__init__.py +2 -0
accelforge/plotting/mappings.py +219 -0
accelforge/plotting/specs.py +57 -0
accelforge/util/__init__.py +4 -0
accelforge/util/_base_analysis_types.py +24 -0
accelforge/util/_basetypes.py +1089 -0
accelforge/util/_frozenset.py +36 -0
accelforge/util/_isl.py +29 -0
accelforge/util/_itertools.py +14 -0
accelforge/util/_mathfuncs.py +57 -0
accelforge/util/_parse_expressions.py +339 -0
accelforge/util/_picklecache.py +32 -0
accelforge/util/_setexpressions.py +268 -0
accelforge/util/_sympy/__init__.py +0 -0
accelforge/util/_sympy/broadcast_max.py +18 -0
accelforge/util/_visualization.py +112 -0
accelforge/util/_yaml.py +579 -0
accelforge/util/parallel.py +193 -0
accelforge-0.0.1.dist-info/METADATA +64 -0
accelforge-0.0.1.dist-info/RECORD +258 -0
accelforge-0.0.1.dist-info/WHEEL +5 -0
accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
accelforge-0.0.1.dist-info/top_level.txt +5 -0
docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
docs/_build/html/_sources/fastfusion.rst.txt +20 -0
docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
docs/_build/html/_sources/index.rst.txt +87 -0
docs/_build/html/_sources/modules.rst.txt +7 -0
docs/_build/html/_sources/notes/citation.rst.txt +45 -0
docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
docs/_build/html/_sources/notes/spec.rst.txt +36 -0
docs/source/_ext/include_attrs.py +213 -0
docs/source/_ext/include_docstring.py +364 -0
docs/source/_ext/include_functions.py +154 -0
docs/source/_ext/include_notebook.py +131 -0
docs/source/_ext/include_yaml.py +119 -0
docs/source/_ext/inherited_attributes.py +222 -0
docs/source/_ext/paths.py +4 -0
docs/source/conf.py +79 -0
examples/arches/compute_in_memory/_include.yaml +74 -0
examples/arches/compute_in_memory/_include_functions.py +229 -0
examples/arches/compute_in_memory/_load_spec.py +57 -0
examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
examples/arches/compute_in_memory/components/misc.py +195 -0
examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
examples/arches/compute_in_memory/isaac.yaml +233 -0
examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
examples/arches/eyeriss.yaml +68 -0
examples/arches/fanout_variations/at_glb.yaml +31 -0
examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
examples/arches/fanout_variations/at_mac.yaml +31 -0
examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
examples/arches/nvdla.yaml +47 -0
examples/arches/simple.yaml +28 -0
examples/arches/tpu_v4i.yaml +67 -0
examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
examples/misc/component_annotated.yaml +33 -0
examples/workloads/gpt3_6.7B.yaml +124 -0
examples/workloads/matmuls.yaml +20 -0
examples/workloads/mobilenet_28.yaml +81 -0
examples/workloads/mobilenet_various_separate.yaml +106 -0
examples/workloads/three_matmuls_annotated.yaml +59 -0
notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
notebooks/compute_in_memory/_scripts.py +339 -0
notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
notebooks/paths.py +4 -0
notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
notebooks/tutorials/FFM.ipynb +3498 -0
notebooks/tutorials/_include.py +48 -0
notebooks/tutorials/component_energy_area.ipynb +363 -0
tests/Q_mapping.yaml +38 -0
tests/__init__.py +0 -0
tests/conv.mapping.yaml +27 -0
tests/conv.workload.yaml +13 -0
tests/conv_sym.mapping.yaml +43 -0
tests/copy.mapping.yaml +35 -0
tests/copy.workload.yaml +15 -0
tests/distribuffers/__init__.py +0 -0
tests/distribuffers/multicast/test_cases.yaml +482 -0
tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
tests/distribuffers/spec/distributed.yaml +100 -0
tests/distribuffers/spec/logical_arch.yaml +32 -0
tests/distribuffers/spec/physical_arch.yaml +69 -0
tests/distribuffers/test_binding.py +48 -0
tests/frontend/__init__.py +0 -0
tests/frontend/test_mapping_viz.py +52 -0
tests/mapper/__init__.py +0 -0
tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
tests/mapper/test_mapping_to_isl.py +90 -0
tests/mapper/test_spatial_reuse_analysis.py +67 -0
tests/mapper/test_temporal_reuse_analysis.py +56 -0
tests/mapper/util.py +58 -0
tests/matmul.mapping.yaml +29 -0
tests/matmul.workload.yaml +12 -0
tests/matmul_spatial.mapping.yaml +44 -0
tests/mha.renames.yaml +65 -0
tests/mha.workload.yaml +67 -0
tests/mha.yaml +59 -0
tests/mha_full.workload.yaml +67 -0
tests/mobilenet.workload.yaml +35 -0
tests/mobilenet_long.workload.yaml +64 -0
tests/pmappingcache.py +24 -0
tests/processing_stage.arch.yaml +40 -0
tests/snowcat.arch.yaml +36 -0
tests/test_ffm_join_pmappings.py +106 -0
tests/test_ffm_make_pmappings.py +82 -0
tests/test_ffm_make_tile_shapes.py +49 -0
tests/test_mapper.py +100 -0
tests/test_model.py +37 -0
tests/test_plotting.py +72 -0
tests/test_processing_stage.py +46 -0
tests/test_symbolic_model.py +248 -0
tests/test_workload.py +141 -0

docs/source/conf.py ADDED Viewed

@@ -0,0 +1,79 @@
+import os
+import sys
+sys.path.insert(0, os.path.abspath('_ext'))
+sys.path.insert(0, os.path.abspath('../..'))  # Make your repo importable
+import locale
+locale.setlocale(locale.LC_ALL, 'C.UTF-8')
+# -- Project information -----------------------------------------------------
+project = 'accelforge'
+author = 'Tanner Andrulis, Michael Gilbert'
+release = '0.1.0'
+# -- HTML output -------------------------------------------------------------
+html_theme = 'sphinx_rtd_theme'
+# html_theme = 'furo'
+# html_theme = 'pydata_sphinx_theme'
+# pip3 install sphinx-furo-theme
+extensions = [
+    'sphinx.ext.autodoc',            # Pull docstrings
+    'sphinx.ext.napoleon',           # NumPy / Google style docstrings
+    'sphinx.ext.autosummary',        # Generate autodoc summaries
+    'sphinx.ext.viewcode',           # Add links to source code
+    'sphinx_autodoc_typehints',      # Include type hints
+    'sphinx.ext.intersphinx',        # Link to other projects' documentation
+    'include_docstring',             # Include docstrings
+    'include_notebook',              # Include notebooks
+    'include_attrs',                 # Include attributes & their docstrings
+    'include_functions',             # Include functions & their docstrings
+    'inherited_attributes',          # Inherit docstrings from parent classes
+    'include_yaml',                  # Include subsets of YAML files
+    'sphinx_copybutton',             # Add copy button to code blocks
+]
+autodoc_default_options = {
+    'members': True,
+    'undoc-members': False,
+    'exclude-members': 'model_config,model_fields,__pydantic_fields__,model_post_init',
+}
+# ---------- Autodoc settings ----------
+# Show type hints inline in signatures
+autodoc_typehints = "signature"
+autodoc_typehints_format = "short"
+# Preserve default values
+autodoc_preserve_defaults = True
+# Force multi-line for long constructor signatures (Sphinx 7+)
+autodoc_class_signature = "separated"
+# ---------- HTML CSS to wrap signatures ----------
+# Create docs/source/_static/custom.css with:
+# .signature {
+#     white-space: pre-wrap !important;
+#     word-break: break-word;
+# }
+# html_static_path = ["_static"]
+# html_css_files = ["custom.css"]
+# html_js_files = ["custom.js"]
+# ---------- Optional: Napoleon settings ----------
+# If using Google/NumPy style docstrings
+napoleon_use_param = True
+napoleon_use_rtype = True
+napoleon_use_ivar = True
+nitpicky = True
+intersphinx_mapping = {
+    'python': ('https://docs.python.org/3', None),
+    'numpy': ('https://numpy.org/doc/stable/', None),
+    'pandas': ('https://pandas.pydata.org/docs/', None),
+    # 'matplotlib': ('https://matplotlib.org/stable/contents.html', None),
+    'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None),
+    'scikit-learn': ('https://scikit-learn.org/stable/documentation.html', None),
+    'hwcomponents': ('https://accelergy-project.github.io/hwcomponents/', None),
+}

examples/arches/compute_in_memory/_include.yaml ADDED Viewed

@@ -0,0 +1,74 @@
+cim_component_attributes: &cim_component_attributes
+  # These are for NeuroSim
+  rows:                 array_wordlines
+  cols:                 array_bitlines
+  cols_active_at_once:  array_parallel_outputs
+  cell_config:          cell_config
+  average_input_value:  average_input_value
+  average_cell_value:   average_weight_value
+  voltage:              voltage
+  temporal_dac_bits:    temporal_dac_resolution
+  read_pulse_width:     read_pulse_width
+  # These are for the ADC plug-in. Set defaults for them
+  # so that if the user does not specify, we won't get
+  # an error.
+  resolution:       adc_resolution
+  n_adcs:           n_adc_per_bank
+  width:            encoded_output_bits
+# These will be applied to the weight drivers
+weight_drivers_attributes: &weight_drivers_attributes
+  <<: *cim_component_attributes
+  rows:                 array_wordlines
+  cols:                 array_bitlines
+  cols_active_at_once:  array_parallel_outputs
+  cell_config:          cell_config
+  average_input_value:  average_input_value
+  average_cell_value:   average_weight_value
+  cycle_seconds:        cycle_period
+  voltage:              voltage
+  temporal_dac_bits:    temporal_dac_resolution
+  sequential:           True
+  read_pulse_width:     0 # Irrelevant for weight programming
+variables_global: &variables_global
+  weight_bits: weight.bits_per_value
+  input_bits:  input.bits_per_value
+  output_bits: output.bits_per_value
+  array_parallel_inputs:  get_array_fanout_reuse_output(spec)
+  array_parallel_outputs: get_array_fanout_reuse_input(spec)
+  array_parallel_weights: get_array_fanout_total(spec)
+  array_wordlines: array_parallel_inputs * cim_unit_width_cells
+  array_bitlines: array_parallel_outputs * cim_unit_depth_cells
+  dac_resolution: max(voltage_dac_resolution, temporal_dac_resolution)
+  cols_active_at_once:  array_parallel_outputs
+  # Calculate the number of slices needed to store the input and weight bits and
+  # the number of bits in each slice
+  in_b: encoded_input_bits # Shorthands so the following lines aren't super long
+  w_b: encoded_weight_bits
+  max_input_bits_per_slice: min(dac_resolution, in_b)
+  max_weight_bits_per_slice: min(cim_unit_width_cells * bits_per_cell, w_b)
+  average_input_bits_per_slice: encoded_input_bits / n_input_slices
+  average_weight_bits_per_slice: encoded_weight_bits / n_weight_slices
+  # This is for the bitwise-multiplication of the input and weight slices
+  n_virtual_macs: max_input_bits_per_slice * max_weight_bits_per_slice * encoded_output_bits
+  # Calculate statistics for input and weight values and bits after encoding
+  ehtas: encoded_hist_to_avg_slice # Shorthands so the following lines aren't super long
+  in_enc_fn: input_encoding_func
+  w_enc_fn: weight_encoding_func
+  average_input_value:     ehtas(in_enc_fn(inputs_hist), in_b, max_input_bits_per_slice)
+  average_weight_value:    ehtas(w_enc_fn(weights_hist), w_b, max_weight_bits_per_slice)
+  input_bit_distribution:  ehtas(in_enc_fn(inputs_hist), in_b, 1, return_per_slice=True)
+  weight_bit_distribution: ehtas(w_enc_fn(weights_hist), w_b, 1, return_per_slice=True)
+  min_weight_slices: ceil(min_supported_weight_bits / bits_per_cell / cim_unit_width_cells)
+  min_input_slices: ceil(min_supported_input_bits / max_input_bits_per_slice)
+  n_input_slices:       max(ceil(in_b / max_input_bits_per_slice), min_input_slices)
+  n_weight_slices:      max(ceil(w_b / max_weight_bits_per_slice), min_weight_slices)
+  n_sliced_psums:       n_input_slices * n_weight_slices

examples/arches/compute_in_memory/_include_functions.py ADDED Viewed

@@ -0,0 +1,229 @@
+import accelforge as af
+def get_array_fanout_reuse_input(spec: af.Spec) -> int:
+    n_rows = 1
+    for leaf in spec.arch.get_nodes_of_type(af.arch.Leaf):
+        if "array_reuse_input" in leaf.spatial:
+            fanout = leaf.spatial["array_reuse_input"]["fanout"]
+            assert isinstance(fanout, (int, float)), f"fanout {leaf.name}.spatial.array_reuse_input.fanout is not a number"
+            n_rows *= fanout
+    return n_rows
+def get_array_fanout_reuse_output(spec: af.Spec) -> int:
+    n_cols = 1
+    for leaf in spec.arch.get_nodes_of_type(af.arch.Leaf):
+        if "array_reuse_output" in leaf.spatial:
+            fanout = leaf.spatial["array_reuse_output"]["fanout"]
+            assert isinstance(fanout, (int, float)), f"fanout {leaf.name}.spatial.array_reuse_output.fanout is not a number"
+            n_cols *= fanout
+    return n_cols
+def get_array_fanout_total(spec: af.Spec) -> int:
+    return get_array_fanout_reuse_input(spec) * get_array_fanout_reuse_output(spec)
+# Sign magnitude
+# 1. Scale X to [-1, 1]: x = NORM_1_TO_NEG1(x, INPUTS_VALUE_DISTRIBUTION)
+# 2. Convert to signed: x = abs(x) * (2 ** (INPUT_BITS - 1) - 1)
+# 2. x = round(x * (2 ** INPUT_BITS - 1))
+from math import log2
+from typing import List, NamedTuple, Union
+class ProbableBits(NamedTuple):
+    bits: list
+    probability: float
+# ==============================================================================
+# Encoding functions
+# ==============================================================================
+def magnitude_encode_hist(weights) -> List[ProbableBits]:
+    """
+    A signed value is encoded as a positive or negative magnitude of that value.
+    Signed hardware is requireed.
+    """
+    nbits = get_num_bits(weights)
+    encoded = []
+    halfwidth = len(weights) / 2
+    for i, w in enumerate(weights):
+        normed = norm(i, len(weights), -halfwidth + 0.5, halfwidth + 0.5)
+        encoded.append(ProbableBits(to_bits_unsigned(abs(normed), nbits)[1:], w))
+    return norm_encoded_hist(encoded)
+def two_part_magnitude_encode_hist(weights):
+    """
+    Two (devices, timesteps, components, etc.) encode each signed value. If the
+    value is positive, the first device encodes the magnitude of the value. If the
+    value is negative, the second device encodes the magnitude of the value. The
+    other device encodes 0.
+    """
+    m = magnitude_encode_hist(weights)
+    m2 = []
+    for e in m:
+        m2.append(ProbableBits(e.bits, e.probability / 2))
+        m2.append(ProbableBits([0] * len(e.bits), e.probability / 2))
+    return m2
+def offset_encode_hist(weights):
+    """
+    A signed value is encoded as the the value minus the negative minimum value.
+    This maps a range of [-min, max] to [0, max - min]. The bias must be added
+    back after computation.
+    """
+    nbits = get_num_bits(weights)
+    encoded = []
+    for i, w in enumerate(weights):
+        normed = norm(i, len(weights), 0, len(weights))
+        encoded.append(ProbableBits(to_bits_unsigned(normed, nbits), w))
+    return norm_encoded_hist(encoded)
+def offset_encode_if_signed_hist(weights):
+    """
+    Offset encode a value only if it is signed. Otherwise, don't apply any bias and just
+    use the positive values.
+    """
+    if is_hist_signed(weights):
+        return offset_encode_hist(weights)
+    return magnitude_encode_hist(weights)
+def two_part_magnitude_encode_if_signed_hist(weights):
+    """
+    Two part magnitude encode a value only if it is signed. Otherwise, use only posiive
+    values.
+    """
+    if is_hist_signed(weights):
+        return two_part_magnitude_encode_hist(weights)
+    return magnitude_encode_hist(weights)
+def xnor_encode_hist(weights):
+    """
+    XNOR encoding based on Jia JSSCC 2020.
+    """
+    nbits = get_num_bits(weights)
+    encoded = []
+    halfwidth = len(weights) / 2
+    for i, w in enumerate(weights):
+        normed = norm(i, len(weights), -halfwidth + 0.5, halfwidth + 0.5)
+        bits = []
+        for j in list(range(nbits - 1, -1, -1)) + [-1, -1]:
+            bits.append(int(normed > 0))
+            normed -= 2**j * (2 * bits[-1] - 1)
+        assert normed == 0, f"normed={normed} is not 0"
+        encoded.append(ProbableBits(bits, w))
+    return norm_encoded_hist(encoded)
+def zero_gated_xnor_encode_hist(weights):
+    """
+    XNOR encoding with zero gating based on Jia JSSCC 2020.
+    """
+    encoded = xnor_encode_hist(weights)
+    zero_idx = len(encoded) // 2
+    encoded[zero_idx] = ProbableBits(
+        [0] * len(encoded[zero_idx].bits), encoded[zero_idx].probability
+    )
+    return encoded
+# ==============================================================================
+# Helper functions
+# ==============================================================================
+def assert_hist_pow2_minus1(hist):
+    x = 1
+    while x <= len(hist):
+        x *= 2
+    assert x - 1 == len(
+        hist
+    ), f"Histogram length {len(hist)} is not a power of 2 minus 1."
+def norm_encoded_hist(encoded_hist: List[ProbableBits]):
+    sum_probs = sum([e.probability for e in encoded_hist])
+    return [ProbableBits(e.bits, e.probability / sum_probs) for e in encoded_hist]
+def get_num_bits(hist):
+    n_bits = 0
+    while 2**n_bits < len(hist) + 1:
+        n_bits += 1
+    assert (
+        2**n_bits == len(hist) + 1
+    ), f"Number of histogram bins + 1 must be a power of 2, got {len(hist)}."
+    return n_bits
+def is_hist_signed(hist):
+    return sum(hist[: len(hist) // 2]) != 0
+def hist_to_magnitude(hist):
+    assert_hist_pow2_minus1(hist)
+    new_hist = [0] * (len(hist) // 2)
+    hist_center = len(hist) // 2
+    for i in range(len(new_hist)):
+        new_hist[i] = hist[hist_center + i] + hist[hist_center - i]
+    assert_hist_pow2_minus1(new_hist)
+    return new_hist
+def to_bits_unsigned(x, nbits):
+    x = round(x)
+    assert 0 <= x < 2**nbits, f"x={x} is not in range [0, 2^{nbits})"
+    return [int(i) for i in bin(x)[2 : nbits + 2].zfill(nbits)]
+def norm(x, nbins, rmin, rmax):
+    return x / nbins * (rmax - rmin) + rmin
+def encoded_hist_to_avg_slice(
+    encoded_hist: List[ProbableBits],
+    total_bits: int,
+    bits_per_slice: Union[list, int],
+    partial_slices_use_full_range: bool = False,
+    return_per_slice: bool = False,
+):
+    if isinstance(bits_per_slice, int):
+        bits_per_slice = [bits_per_slice] * (total_bits // bits_per_slice)
+        if sum(bits_per_slice) != total_bits:
+            bits_per_slice.append(total_bits - sum(bits_per_slice))
+    assert total_bits == sum(bits_per_slice), (
+        f"Sum of bits per slice {sum(bits_per_slice)} != total_bits " f"{total_bits}"
+    )
+    bit2slice = []
+    max_val = max(2 ** max(bits_per_slice) - 1, 1)
+    for i, b in enumerate(bits_per_slice):
+        m = max(2**b - 1, 1) if partial_slices_use_full_range else max_val
+        bit2slice += [(i, max((2 ** (b - j - 1)), 1) / m) for j in range(b)]
+    avg_slice_values = [0] * len(bits_per_slice)
+    for e in encoded_hist:
+        for i in range(total_bits):
+            slice_idx, scale = bit2slice[i]
+            if i >= len(e.bits):
+                bit_value = sum(e.bits) / len(e.bits)
+            else:
+                bit_value = e.bits[i]
+            avg_slice_values[slice_idx] += bit_value * e.probability * scale
+    if return_per_slice:
+        return avg_slice_values
+    return sum(avg_slice_values) / len(avg_slice_values)
+if __name__ == "__main__":
+    input_dist = [16 - abs(16 - i) for i in range(31)]
+    print(f"input_dist: {input_dist}")
+    for e in xnor_encode_hist(input_dist):
+        print(e)

examples/arches/compute_in_memory/_load_spec.py ADDED Viewed

@@ -0,0 +1,57 @@
+import accelforge as af
+import os
+THIS_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+VARIABLES_GLOBAL_PATH = os.path.join(THIS_SCRIPT_DIR, "_include.yaml")
+def get_spec(
+    arch_name: str,
+    compare_with_arch_name: str | None = None,
+    add_dummy_main_memory: bool = False,
+) -> af.Spec:
+    """
+    Gets the spec for the given architecture. If `compare_with_arch_name` is given, the
+    variables_iso will be grabbed from `compare_with_arch_name` in order to match
+    attributes for fair comparison.
+    Parameters
+    ----------
+    arch_name: str
+        The name of the architecture to get the spec for.
+    compare_with_arch_name: str | None
+        The name of the architecture to compare with. If not given, variables will be
+        taken from the given `arch_name`.
+    Returns
+    -------
+    spec: af.Spec
+        The spec for the given architecture.
+    """
+    if compare_with_arch_name is None:
+        compare_with_name = arch_name
+    else:
+        compare_with_name = compare_with_arch_name
+    arch_name = os.path.join(THIS_SCRIPT_DIR, f"{arch_name}.yaml")
+    compare_with_name = os.path.join(THIS_SCRIPT_DIR, f"{compare_with_name}.yaml")
+    variables = af.Variables.from_yaml(arch_name, top_key="variables")
+    arch = af.Arch.from_yaml(arch_name, top_key="arch")
+    workload = af.Workload.from_yaml(arch_name, top_key="workload")
+    spec = af.Spec(arch=arch, variables=variables, workload=workload)
+    spec.config.expression_custom_functions.append(
+        os.path.join(THIS_SCRIPT_DIR, "_include_functions.py")
+    )
+    spec.config.component_models.append(
+        os.path.join(THIS_SCRIPT_DIR, "components/*.py")
+    )
+    if add_dummy_main_memory:
+        main_memory = af.arch.Memory(
+            name="MainMemory",
+            component_class="Dummy",
+            size=float("inf"),
+            tensors={"keep": "~weight"}
+        )
+        spec.arch.nodes.insert(0, main_memory)
+    return spec

examples/arches/compute_in_memory/components/c2c_multiplier.py ADDED Viewed

@@ -0,0 +1,181 @@
+from hwcomponents.scaling import linear
+from hwcomponents_neurosim import NOTGate
+from hwcomponents import ComponentModel, action
+from misc import Capacitor
+class C2CMultiplier(ComponentModel):
+    """
+    The C2C multiplier looks like the following:
+    - For operand A as an analog voltage
+    - Operand B is a binary digital value with bits B0, B1, B2... from least to most
+      significant
+    The circuit looks like:
+         2C      2C         2C         2C         2C         2C
+      G──||───┰──||──────┰──||──────┰──||──────┰──||──────┰──||──── -> OUT
+              = C        = C        = C        = C        = C
+              │          │          │          │          │
+               ╲─── B0    ╲─── B1    ╲─── B2    ╲─── B3    ╲─── B4
+             │  G       │  G       │  G       │  G       │  G
+      A──────┴──────────┴──────────┴──────────┴──────────┴─────────
+    Energy is consumed when: 1. A increases, and all the B capacitors are charged 2. Any
+    B bit goes 0->1, and the corresponding capacitor is charged
+    USAGE: In your architecture, initialize a both a C2CMultiplier and a
+    C2CMultiplierPortB. Have the "a" port process the analog operand and have the "b"
+    port process the digital operand.
+    The C2CMultiplier component has area accounted for. The C2CMultiplierPortB component
+    does not have any area!
+    Parameters
+    ----------
+    resolution: int
+        The resolution of the multiplier.
+    voltage: float
+        The voltage of the multiplier in volts.
+    unit_capacitance: float
+        The unit capacitance of the multiplier in Farads.
+    a_hist: list[float]
+        The histogram of the analog operand's values. This is a histogram of the values,
+        assumed to be spaced between 0 and voltage, inclusive.
+    b_bit_distribution: list[float]
+        The distribution of the binary operand's bits. Each is a probability of a given
+        bit being 1.
+    tech_node: str
+        The tech node of the multiplier in meters.
+    """
+    priority = 0.5
+    def __init__(
+        self,
+        resolution: int,
+        voltage: float,
+        unit_capacitance: float,
+        a_hist: list[float],
+        b_bit_distribution: list[float],
+        tech_node: str,
+    ):
+        self.voltage = voltage
+        self.unit_capacitance = unit_capacitance
+        self.a_hist = a_hist
+        self.b_bit_distribution = b_bit_distribution
+        self.tech_node = tech_node
+        self.unit_cap = Capacitor(
+            capacitance=unit_capacitance,
+            voltage=voltage,
+            tech_node=tech_node,
+        )
+        self.unit2_cap = Capacitor(
+            capacitance=unit_capacitance * 2,
+            voltage=voltage,
+            tech_node=tech_node,
+        )
+        self.inverter = NOTGate(tech_node=self.tech_node, cycle_period=1e-9)
+        a_rms = (sum(i**2 * p for i, p in enumerate(a_hist)) / sum(a_hist)) ** 0.5
+        self.a_rms = a_rms * voltage / (len(a_hist) - 1)
+        if not all(0 <= p <= 1 for p in b_bit_distribution):
+            raise ValueError("Bit probabilities must be between 0 and 1")
+        self.b_lo2hi_probability = sum(p * (1 - p) for p in b_bit_distribution) / len(
+            b_bit_distribution
+        )
+        # Pass gates are 2 transistors, 100F^2 each
+        control_pass_gate_area = 2 * self.tech_node**2 * 100
+        cap_area = self.unit_cap.area + self.unit2_cap.area
+        inverter_area = self.inverter.area
+        # Assume pass gates don't leak
+        inverter_leak = self.inverter.leak_power
+        cap_leak = self.unit_cap.leak_power + self.unit2_cap.leak_power
+        super().__init__(
+            area=cap_area + inverter_area + control_pass_gate_area,
+            leak_power=cap_leak + inverter_leak,
+        )
+        self.resolution: float = self.scale(
+            "resolution",
+            resolution,
+            1,
+            area_scale_function=linear,
+            energy_scale_function=linear,
+            latency_scale_function=None,
+            leak_power_scale_function=linear,
+        )
+    @action
+    def switch_a(self):
+        """
+        Charge all capacitors to the values in a_hist.
+        """
+        # Count energy by just charging one of the capacitors and multiplying by the
+        # number of bits.
+        energy_latency = self.unit_cap.switch(self.a_hist)
+        energy_latency += self.unit2_cap.switch(self.a_hist)
+        # The reference node sees a cap of unit_capacitance * 1.67 / resolution per bit
+        # on average assuming a uniform-ish distribution of bits
+        energy, latency = energy_latency
+        energy *= 1.67 / self.resolution
+        return energy, latency
+    @action
+    def switch_b(self):
+        """
+        Connect capacitors to A with probability b_lo2hi_probability.
+        """
+        energy_latency = self.unit_cap.raise_voltage_to(self.a_rms)
+        energy_latency += self.unit2_cap.raise_voltage_to(self.a_rms)
+        energy, latency = energy_latency
+        energy *= self.b_lo2hi_probability
+        return energy, latency
+    @action
+    def read(self):
+        """
+        Returns the energy and latency to send a value through the multiplier's analog
+        port. If you are only using the read() action, then also initialize a
+        C2CMultiplierPortB to have it process the digital operand with the read()
+        action.
+        """
+        return self.switch_a()
+class C2CMultiplierPortB(C2CMultiplier):
+    def __init__(
+        self,
+        resolution: int,
+        voltage: float,
+        unit_capacitance: float,
+        a_hist: list[float],
+        b_bit_distribution: list[float],
+        tech_node: str,
+    ):
+        super().__init__(
+            resolution=resolution,
+            voltage=voltage,
+            unit_capacitance=unit_capacitance,
+            a_hist=a_hist,
+            b_bit_distribution=b_bit_distribution,
+            tech_node=tech_node,
+        )
+        self.area_scale = 0
+    @action
+    def read(self):
+        """
+        Returns the energy and latency to send a value through the multiplier's digital
+        port. If you are only using the read() action, then also initialize a
+        C2CMultiplier to have it process the analog operand with the read() action.
+        """
+        return self.switch_b()