accelforge 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of accelforge might be problematic. Click here for more details.
- accelforge/__init__.py +21 -0
- accelforge/_accelerated_imports.py +16 -0
- accelforge/_deprecate/_simanneal/evalmapping.py +271 -0
- accelforge/_deprecate/_simanneal/mapspaceglobals.py +298 -0
- accelforge/_deprecate/_simanneal/simanneal.py +666 -0
- accelforge/_deprecate/_simanneal/tracking.py +105 -0
- accelforge/_deprecate/_simanneal/wrappers.py +218 -0
- accelforge/_deprecate/_simanneal2/__init__.py +7 -0
- accelforge/_deprecate/_simanneal2/simanneal.py +493 -0
- accelforge/_deprecate/_simanneal2/tracking.py +116 -0
- accelforge/_deprecate/compatibility_util.py +181 -0
- accelforge/_deprecate/layerdeduplication/__init__.py +2 -0
- accelforge/_deprecate/layerdeduplication/group_similar_einsums.py +160 -0
- accelforge/_deprecate/layerdeduplication/grouped_einsums.py +84 -0
- accelforge/_deprecate/mapping_filter_tags/__init__.py +2 -0
- accelforge/_deprecate/mapping_filter_tags/ffmt.py +212 -0
- accelforge/_deprecate/mapping_filter_tags/onesplit.py +24 -0
- accelforge/_deprecate/mapping_filter_tags/util.py +24 -0
- accelforge/_deprecate/tags.py +69 -0
- accelforge/_deprecate/viz/__init__.py +0 -0
- accelforge/_deprecate/viz/interactive.py +159 -0
- accelforge/_deprecate/viz/reservationtree.py +307 -0
- accelforge/_deprecate/viz/ski_slope.py +88 -0
- accelforge/_version.py +15 -0
- accelforge/examples.py +39 -0
- accelforge/frontend/__init__.py +10 -0
- accelforge/frontend/_binding.py +129 -0
- accelforge/frontend/_workload_isl/__init__.py +2 -0
- accelforge/frontend/_workload_isl/_isl.py +149 -0
- accelforge/frontend/_workload_isl/_symbolic.py +141 -0
- accelforge/frontend/arch copy.py +1544 -0
- accelforge/frontend/arch.py +1642 -0
- accelforge/frontend/config.py +63 -0
- accelforge/frontend/mapper/__init__.py +5 -0
- accelforge/frontend/mapper/ffm.py +126 -0
- accelforge/frontend/mapper/mapper.py +7 -0
- accelforge/frontend/mapper/metrics.py +30 -0
- accelforge/frontend/mapping/__init__.py +1 -0
- accelforge/frontend/mapping/mapping.py +1736 -0
- accelforge/frontend/model.py +14 -0
- accelforge/frontend/renames.py +150 -0
- accelforge/frontend/spec copy.py +230 -0
- accelforge/frontend/spec.py +301 -0
- accelforge/frontend/variables.py +12 -0
- accelforge/frontend/workload.py +952 -0
- accelforge/mapper/FFM/__init__.py +9 -0
- accelforge/mapper/FFM/_join_pmappings/__init__.py +0 -0
- accelforge/mapper/FFM/_join_pmappings/compatibility.py +653 -0
- accelforge/mapper/FFM/_join_pmappings/compress_pmappings.py +140 -0
- accelforge/mapper/FFM/_join_pmappings/join_pmappings.py +703 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_dataframe.py +901 -0
- accelforge/mapper/FFM/_join_pmappings/pmapping_group.py +337 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/__init__.py +0 -0
- accelforge/mapper/FFM/_make_pmappings/contraints/constraints.py +360 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_loops.py +373 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_pmapping_templates.py +463 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_reservations.py +95 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storage_order.py +382 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmapping_templates/make_storages.py +155 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings.py +411 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/__init__.py +1 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_pmappings_from_templates.py +407 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/make_tile_shapes.py +1681 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/run_model.py +170 -0
- accelforge/mapper/FFM/_make_pmappings/make_pmappings_from_templates/symbol_relations.py +174 -0
- accelforge/mapper/FFM/_make_pmappings/pmapper_job.py +282 -0
- accelforge/mapper/FFM/_pareto_df/df_convention.py +273 -0
- accelforge/mapper/FFM/_pareto_df/pareto copy.py +836 -0
- accelforge/mapper/FFM/_pareto_df/pareto.py +508 -0
- accelforge/mapper/FFM/data.py +61 -0
- accelforge/mapper/FFM/main copy.py +236 -0
- accelforge/mapper/FFM/main.py +208 -0
- accelforge/mapper/FFM/mappings.py +510 -0
- accelforge/mapper/FFM/pmappings.py +310 -0
- accelforge/mapper/__init__.py +4 -0
- accelforge/mapper.py +0 -0
- accelforge/model/__init__.py +1 -0
- accelforge/model/_looptree/__init__.py +0 -0
- accelforge/model/_looptree/accesses.py +335 -0
- accelforge/model/_looptree/capacity/__init__.py +1 -0
- accelforge/model/_looptree/capacity/aggregators.py +36 -0
- accelforge/model/_looptree/capacity/capacity.py +47 -0
- accelforge/model/_looptree/energy.py +150 -0
- accelforge/model/_looptree/equivalent_ranks.py +29 -0
- accelforge/model/_looptree/latency/__init__.py +1 -0
- accelforge/model/_looptree/latency/latency.py +98 -0
- accelforge/model/_looptree/latency/memory.py +120 -0
- accelforge/model/_looptree/latency/processors.py +92 -0
- accelforge/model/_looptree/mapping_utilities.py +71 -0
- accelforge/model/_looptree/reuse/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/__init__.py +1 -0
- accelforge/model/_looptree/reuse/isl/des.py +59 -0
- accelforge/model/_looptree/reuse/isl/isl_functions.py +374 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/__init__.py +4 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/analyze_mapping.py +297 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/skews_from_mapping.py +236 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/tiling.py +685 -0
- accelforge/model/_looptree/reuse/isl/mapping_to_isl/types.py +188 -0
- accelforge/model/_looptree/reuse/isl/spatial.py +260 -0
- accelforge/model/_looptree/reuse/isl/temporal.py +182 -0
- accelforge/model/_looptree/reuse/symbolic/__init__.py +1 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy 2.py +1346 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic copy.py +1408 -0
- accelforge/model/_looptree/reuse/symbolic/symbolic.py +1396 -0
- accelforge/model/_looptree/run.py +122 -0
- accelforge/model/_looptree/types.py +26 -0
- accelforge/model/_looptree/visualization/__init__.py +0 -0
- accelforge/model/_looptree/visualization/occupancy.py +11 -0
- accelforge/model/main.py +222 -0
- accelforge/plotting/__init__.py +2 -0
- accelforge/plotting/mappings.py +219 -0
- accelforge/plotting/specs.py +57 -0
- accelforge/util/__init__.py +4 -0
- accelforge/util/_base_analysis_types.py +24 -0
- accelforge/util/_basetypes.py +1089 -0
- accelforge/util/_frozenset.py +36 -0
- accelforge/util/_isl.py +29 -0
- accelforge/util/_itertools.py +14 -0
- accelforge/util/_mathfuncs.py +57 -0
- accelforge/util/_parse_expressions.py +339 -0
- accelforge/util/_picklecache.py +32 -0
- accelforge/util/_setexpressions.py +268 -0
- accelforge/util/_sympy/__init__.py +0 -0
- accelforge/util/_sympy/broadcast_max.py +18 -0
- accelforge/util/_visualization.py +112 -0
- accelforge/util/_yaml.py +579 -0
- accelforge/util/parallel.py +193 -0
- accelforge-0.0.1.dist-info/METADATA +64 -0
- accelforge-0.0.1.dist-info/RECORD +258 -0
- accelforge-0.0.1.dist-info/WHEEL +5 -0
- accelforge-0.0.1.dist-info/licenses/LICENSE +19 -0
- accelforge-0.0.1.dist-info/top_level.txt +5 -0
- docs/_build/html/_sources/fastfusion.frontend.mapper.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.frontend.rst.txt +70 -0
- docs/_build/html/_sources/fastfusion.frontend.workload.rst.txt +21 -0
- docs/_build/html/_sources/fastfusion.mapper.FFM.rst.txt +37 -0
- docs/_build/html/_sources/fastfusion.mapper.rst.txt +18 -0
- docs/_build/html/_sources/fastfusion.rst.txt +20 -0
- docs/_build/html/_sources/fastfusion.util.rst.txt +21 -0
- docs/_build/html/_sources/index.rst.txt +87 -0
- docs/_build/html/_sources/modules.rst.txt +7 -0
- docs/_build/html/_sources/notes/citation.rst.txt +45 -0
- docs/_build/html/_sources/notes/definitions.rst.txt +43 -0
- docs/_build/html/_sources/notes/faqs.rst.txt +39 -0
- docs/_build/html/_sources/notes/modeling/accelerator_energy_latency.rst.txt +72 -0
- docs/_build/html/_sources/notes/modeling/component_energy_area.rst.txt +96 -0
- docs/_build/html/_sources/notes/modeling/mapping.rst.txt +100 -0
- docs/_build/html/_sources/notes/modeling.rst.txt +33 -0
- docs/_build/html/_sources/notes/parsing/arithmetic_parsing.rst.txt +136 -0
- docs/_build/html/_sources/notes/parsing/setexpressions.rst.txt +63 -0
- docs/_build/html/_sources/notes/parsing/yaml_parsing.rst.txt +176 -0
- docs/_build/html/_sources/notes/quickstart_and_installation.rst.txt +9 -0
- docs/_build/html/_sources/notes/spec/architecture.rst.txt +133 -0
- docs/_build/html/_sources/notes/spec/mapping.rst.txt +12 -0
- docs/_build/html/_sources/notes/spec/workload.rst.txt +83 -0
- docs/_build/html/_sources/notes/spec.rst.txt +36 -0
- docs/source/_ext/include_attrs.py +213 -0
- docs/source/_ext/include_docstring.py +364 -0
- docs/source/_ext/include_functions.py +154 -0
- docs/source/_ext/include_notebook.py +131 -0
- docs/source/_ext/include_yaml.py +119 -0
- docs/source/_ext/inherited_attributes.py +222 -0
- docs/source/_ext/paths.py +4 -0
- docs/source/conf.py +79 -0
- examples/arches/compute_in_memory/_include.yaml +74 -0
- examples/arches/compute_in_memory/_include_functions.py +229 -0
- examples/arches/compute_in_memory/_load_spec.py +57 -0
- examples/arches/compute_in_memory/components/c2c_multiplier.py +181 -0
- examples/arches/compute_in_memory/components/dac_c2c_r2r.py +605 -0
- examples/arches/compute_in_memory/components/misc.py +195 -0
- examples/arches/compute_in_memory/components/util/bit_functions.py +51 -0
- examples/arches/compute_in_memory/components/zero_comparator.py +92 -0
- examples/arches/compute_in_memory/isaac.yaml +233 -0
- examples/arches/compute_in_memory/memory_cells/ecram_demo.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_isaac_isca_2016.yaml +64 -0
- examples/arches/compute_in_memory/memory_cells/rram_neurosim_default.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/rram_raella_isca_2023.yaml +70 -0
- examples/arches/compute_in_memory/memory_cells/rram_wan_nature_2022.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_colonnade_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_example.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_jia_jssc_2020.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_sinangil_jssc_2021.yaml +63 -0
- examples/arches/compute_in_memory/memory_cells/sram_wang_vlsi_2022.yaml +63 -0
- examples/arches/compute_in_memory/wang_vlsi_2022.yaml +289 -0
- examples/arches/eyeriss.yaml +68 -0
- examples/arches/fanout_variations/at_glb.yaml +31 -0
- examples/arches/fanout_variations/at_glb_with_fanout_node.yaml +34 -0
- examples/arches/fanout_variations/at_mac.yaml +31 -0
- examples/arches/fanout_variations/at_mac_with_constraints.yaml +38 -0
- examples/arches/fanout_variations/at_mac_with_fanout_node.yaml +34 -0
- examples/arches/nvdla.yaml +47 -0
- examples/arches/simple.yaml +28 -0
- examples/arches/tpu_v4i.yaml +67 -0
- examples/mappings/unfused_matmuls_to_simple.yaml +33 -0
- examples/misc/component_annotated.yaml +33 -0
- examples/workloads/gpt3_6.7B.yaml +124 -0
- examples/workloads/matmuls.yaml +20 -0
- examples/workloads/mobilenet_28.yaml +81 -0
- examples/workloads/mobilenet_various_separate.yaml +106 -0
- examples/workloads/three_matmuls_annotated.yaml +59 -0
- notebooks/.ipynb_checkpoints/fastfusion_arch_study_michael-checkpoint.ipynb +359 -0
- notebooks/compute_in_memory/_scripts.py +339 -0
- notebooks/compute_in_memory/isaac.guide.ipynb +270 -0
- notebooks/compute_in_memory/wang_vlsi_2022.ipynb +602 -0
- notebooks/paths.py +4 -0
- notebooks/tutorials/.ipynb_checkpoints/1_FFM-checkpoint.ipynb +3110 -0
- notebooks/tutorials/FFM.ipynb +3498 -0
- notebooks/tutorials/_include.py +48 -0
- notebooks/tutorials/component_energy_area.ipynb +363 -0
- tests/Q_mapping.yaml +38 -0
- tests/__init__.py +0 -0
- tests/conv.mapping.yaml +27 -0
- tests/conv.workload.yaml +13 -0
- tests/conv_sym.mapping.yaml +43 -0
- tests/copy.mapping.yaml +35 -0
- tests/copy.workload.yaml +15 -0
- tests/distribuffers/__init__.py +0 -0
- tests/distribuffers/multicast/test_cases.yaml +482 -0
- tests/distribuffers/spec/binding/valid_bindings.yaml +97 -0
- tests/distribuffers/spec/distributed.yaml +100 -0
- tests/distribuffers/spec/logical_arch.yaml +32 -0
- tests/distribuffers/spec/physical_arch.yaml +69 -0
- tests/distribuffers/test_binding.py +48 -0
- tests/frontend/__init__.py +0 -0
- tests/frontend/test_mapping_viz.py +52 -0
- tests/mapper/__init__.py +0 -0
- tests/mapper/configs/conv1d/conv1d.mapping.yaml +31 -0
- tests/mapper/configs/conv1d/conv1d.workload.yaml +11 -0
- tests/mapper/configs/two_conv1d/two_conv1d.expected.yaml +38 -0
- tests/mapper/configs/two_conv1d/two_conv1d.mapping.yaml +54 -0
- tests/mapper/configs/two_conv1d/two_conv1d.workload.yaml +19 -0
- tests/mapper/test_mapping_to_isl.py +90 -0
- tests/mapper/test_spatial_reuse_analysis.py +67 -0
- tests/mapper/test_temporal_reuse_analysis.py +56 -0
- tests/mapper/util.py +58 -0
- tests/matmul.mapping.yaml +29 -0
- tests/matmul.workload.yaml +12 -0
- tests/matmul_spatial.mapping.yaml +44 -0
- tests/mha.renames.yaml +65 -0
- tests/mha.workload.yaml +67 -0
- tests/mha.yaml +59 -0
- tests/mha_full.workload.yaml +67 -0
- tests/mobilenet.workload.yaml +35 -0
- tests/mobilenet_long.workload.yaml +64 -0
- tests/pmappingcache.py +24 -0
- tests/processing_stage.arch.yaml +40 -0
- tests/snowcat.arch.yaml +36 -0
- tests/test_ffm_join_pmappings.py +106 -0
- tests/test_ffm_make_pmappings.py +82 -0
- tests/test_ffm_make_tile_shapes.py +49 -0
- tests/test_mapper.py +100 -0
- tests/test_model.py +37 -0
- tests/test_plotting.py +72 -0
- tests/test_processing_stage.py +46 -0
- tests/test_symbolic_model.py +248 -0
- tests/test_workload.py +141 -0
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
from numbers import Number
|
|
2
|
+
from typing import Optional, List
|
|
3
|
+
from hwcomponents.scaling import tech_node_area
|
|
4
|
+
from util.bit_functions import rescale_sum_to_1
|
|
5
|
+
from hwcomponents import ComponentModel, action
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Capacitor(ComponentModel):
|
|
9
|
+
"""
|
|
10
|
+
A capacitor.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
capacitance: float
|
|
15
|
+
The capacitance of this capacitor in Farads.
|
|
16
|
+
tech_node: int
|
|
17
|
+
The tech_node node in meters.
|
|
18
|
+
voltage: float
|
|
19
|
+
The supply voltage in volts.
|
|
20
|
+
cap_per_m2: float
|
|
21
|
+
The capacitance per square meter in Farads per square meter.
|
|
22
|
+
border_area: float
|
|
23
|
+
The border area around the capacitor in square meters.
|
|
24
|
+
voltage_raise_threshold: float
|
|
25
|
+
Latency is calculated as the time it takes to raise voltage to this proportion
|
|
26
|
+
of the target voltage.
|
|
27
|
+
supply_resistance: float
|
|
28
|
+
The supply resistance in ohms. If 0, then voltage is assumed to converge
|
|
29
|
+
instantly.
|
|
30
|
+
|
|
31
|
+
Attributes
|
|
32
|
+
----------
|
|
33
|
+
capacitance: float
|
|
34
|
+
The capacitance of this capacitor in Farads.
|
|
35
|
+
tech_node: int
|
|
36
|
+
The tech_node node in meters.
|
|
37
|
+
voltage: float
|
|
38
|
+
The supply voltage in volts.
|
|
39
|
+
cap_per_m2: float
|
|
40
|
+
The capacitance per square meter in Farads per square meter.
|
|
41
|
+
border_area: float
|
|
42
|
+
The border area around the capacitor in square meters.
|
|
43
|
+
voltage_raise_threshold: float
|
|
44
|
+
Latency is calculated as the time it takes to raise voltage to this proportion
|
|
45
|
+
of the target voltage.
|
|
46
|
+
supply_resistance: float
|
|
47
|
+
The supply resistance in ohms. If 0, then voltage is assumed to converge
|
|
48
|
+
instantly.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
priority = 0.5
|
|
52
|
+
"""
|
|
53
|
+
Priority determines which model is used when multiple models are available for a
|
|
54
|
+
given component. Higher priority models are used first. Must be a number between 0
|
|
55
|
+
and 1.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
capacitance: Number,
|
|
61
|
+
tech_node: float,
|
|
62
|
+
voltage: Number = 0.7,
|
|
63
|
+
cap_per_m2: Optional[Number] = "1e-3 scaled by tech node",
|
|
64
|
+
border_area: Optional[Number] = "1e-12 scaled by tech node",
|
|
65
|
+
):
|
|
66
|
+
self.capacitance = capacitance
|
|
67
|
+
self.voltage = voltage
|
|
68
|
+
|
|
69
|
+
if cap_per_m2 == "1e-3 scaled by tech node":
|
|
70
|
+
cap_per_m2 = 2.3e-3 * tech_node_area(tech_node, 22e-9)
|
|
71
|
+
if border_area == "1e-12 scaled by tech node":
|
|
72
|
+
border_area = 1e-12 * tech_node_area(tech_node, 22e-9)
|
|
73
|
+
|
|
74
|
+
self.cap_per_m2 = cap_per_m2
|
|
75
|
+
self.border_area = border_area
|
|
76
|
+
|
|
77
|
+
super().__init__(
|
|
78
|
+
area=self.capacitance / self.cap_per_m2 + self.border_area, leak_power=0
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
@action
|
|
82
|
+
def raise_voltage_to(
|
|
83
|
+
self,
|
|
84
|
+
target_voltage: float,
|
|
85
|
+
supply_voltage: float = None,
|
|
86
|
+
) -> float:
|
|
87
|
+
"""
|
|
88
|
+
Raise the voltage to the target voltage using the supply voltage as a supply.
|
|
89
|
+
|
|
90
|
+
Parameters
|
|
91
|
+
----------
|
|
92
|
+
target_voltage: float
|
|
93
|
+
The target voltage to raise the voltage to.
|
|
94
|
+
supply_voltage: float
|
|
95
|
+
The supply voltage to use as a supply. If None, then the supply voltage is
|
|
96
|
+
assumed to be the voltage set in the attributes of this capacitor.
|
|
97
|
+
|
|
98
|
+
Returns
|
|
99
|
+
-------
|
|
100
|
+
energy, latency: tuple
|
|
101
|
+
The energy required to raise the voltage to the target voltage. Latency is
|
|
102
|
+
0.
|
|
103
|
+
"""
|
|
104
|
+
if supply_voltage is None:
|
|
105
|
+
supply_voltage = self.voltage
|
|
106
|
+
assert target_voltage <= supply_voltage, (
|
|
107
|
+
f"Can not raise voltage to {target_voltage} when supply voltage "
|
|
108
|
+
f"is {supply_voltage}."
|
|
109
|
+
)
|
|
110
|
+
return self.capacitance * target_voltage * supply_voltage, 0
|
|
111
|
+
|
|
112
|
+
@action
|
|
113
|
+
def switch(
|
|
114
|
+
self,
|
|
115
|
+
value_probabilities: List[Number],
|
|
116
|
+
zero_between_values: bool = True,
|
|
117
|
+
supply_voltage: float = None,
|
|
118
|
+
) -> float:
|
|
119
|
+
"""
|
|
120
|
+
Calculates the expected energy to switch voltage to the values in
|
|
121
|
+
value_probabilities.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
value_probabilities: List[Number]
|
|
126
|
+
The probabilities of the values to switch to. This is a histogram, assumed
|
|
127
|
+
to be spaced between 0 and supply_voltage, inclusive.
|
|
128
|
+
zero_between_values: bool
|
|
129
|
+
Whether to zero the voltage between values.
|
|
130
|
+
supply_voltage: float
|
|
131
|
+
The supply voltage to use as a supply. If None, then the supply voltage is
|
|
132
|
+
assumed to be the voltage set in the attributes of this capacitor.
|
|
133
|
+
|
|
134
|
+
Returns
|
|
135
|
+
-------
|
|
136
|
+
energy, latency: tuple
|
|
137
|
+
The energy required to switch the voltage to the values in
|
|
138
|
+
value_probabilities. Latency is 0.
|
|
139
|
+
"""
|
|
140
|
+
supply_voltage = self.voltage if supply_voltage is None else supply_voltage
|
|
141
|
+
expected_energy = 0
|
|
142
|
+
value_probabilities = rescale_sum_to_1(value_probabilities)
|
|
143
|
+
for v0, p0 in enumerate(value_probabilities):
|
|
144
|
+
for v1, p1 in enumerate(value_probabilities):
|
|
145
|
+
v0 = 0 if zero_between_values else v0
|
|
146
|
+
if v1 < v0:
|
|
147
|
+
continue
|
|
148
|
+
e0 = self.raise_voltage_to(
|
|
149
|
+
v0 / (len(value_probabilities) - 1) * self.voltage, supply_voltage
|
|
150
|
+
)[0]
|
|
151
|
+
e1 = self.raise_voltage_to(
|
|
152
|
+
v1 / (len(value_probabilities) - 1) * self.voltage, supply_voltage
|
|
153
|
+
)[0]
|
|
154
|
+
expected_energy += (e1 - e0) * p0 * p1
|
|
155
|
+
return expected_energy, 0
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class Wire(Capacitor):
|
|
159
|
+
"""
|
|
160
|
+
A wire.
|
|
161
|
+
|
|
162
|
+
Parameters
|
|
163
|
+
----------
|
|
164
|
+
length: Number
|
|
165
|
+
The length of the wire in meters.
|
|
166
|
+
capacitance_per_m: Number
|
|
167
|
+
The capacitance per meter in Farads per meter.
|
|
168
|
+
voltage: Number
|
|
169
|
+
The supply voltage of the wire in volts.
|
|
170
|
+
|
|
171
|
+
Attributes
|
|
172
|
+
----------
|
|
173
|
+
length: Number
|
|
174
|
+
The length of the wire in meters.
|
|
175
|
+
capacitance_per_m: Number
|
|
176
|
+
The capacitance per meter in Farads per meter.
|
|
177
|
+
voltage: Number
|
|
178
|
+
The supply voltage of the wire in volts.
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
def __init__(
|
|
182
|
+
self,
|
|
183
|
+
length: Number,
|
|
184
|
+
capacitance_per_m: Number = 2e-10,
|
|
185
|
+
voltage: Number = 0.7,
|
|
186
|
+
**kwargs,
|
|
187
|
+
):
|
|
188
|
+
super().__init__(
|
|
189
|
+
capacitance=length * capacitance_per_m,
|
|
190
|
+
voltage=voltage,
|
|
191
|
+
)
|
|
192
|
+
self.length = length
|
|
193
|
+
self.capacitance_per_m = capacitance_per_m
|
|
194
|
+
self.voltage = voltage
|
|
195
|
+
self.area_scale = 0
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def rescale_sum_to_1(array: List[float], do_not_change_index: int = -1) -> List[float]:
|
|
6
|
+
"""Rescales all list elements such that the sum is 1."""
|
|
7
|
+
sum_array = sum([a for i, a in enumerate(array) if i != do_not_change_index])
|
|
8
|
+
target_sum = 1 - array[do_not_change_index] if do_not_change_index >= 0 else 1
|
|
9
|
+
scaleby = target_sum / sum_array
|
|
10
|
+
return [a * scaleby if i != do_not_change_index else a for i, a in enumerate(array)]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def set_element_rescale_sum_to_1(array: List[float], index: int, value: float):
|
|
14
|
+
"""
|
|
15
|
+
Sets an element of a list, then rescales all list elements such that the sum is 1.
|
|
16
|
+
"""
|
|
17
|
+
array[index] = value
|
|
18
|
+
return rescale_sum_to_1(array, index)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def value2bits(value: int, resolution: int) -> List[int]:
|
|
22
|
+
"""Converts a value to a list of bits."""
|
|
23
|
+
return [int(i) for i in bin(value)[2:].zfill(resolution)]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def bit_distribution_2_hist(
|
|
27
|
+
bit_distribution: List[float], zero_prob: float = None
|
|
28
|
+
) -> List[float]:
|
|
29
|
+
"""Converts a bit distribution to a value distribution."""
|
|
30
|
+
hist = [1] * 2 ** len(bit_distribution)
|
|
31
|
+
for value in range(2 ** len(bit_distribution)):
|
|
32
|
+
bits = value2bits(value, len(bit_distribution))
|
|
33
|
+
for i, prob in enumerate(bit_distribution):
|
|
34
|
+
hist[value] *= prob if bits[i] else 1 - prob
|
|
35
|
+
|
|
36
|
+
if zero_prob is not None:
|
|
37
|
+
set_element_rescale_sum_to_1(hist, 0, zero_prob)
|
|
38
|
+
return rescale_sum_to_1(hist)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def hist_2_bit_distribution(hist: List[float]) -> List[float]:
|
|
42
|
+
"""Converts a value distribution to a bit distribution."""
|
|
43
|
+
sum_hist = sum(hist)
|
|
44
|
+
hist = [i / sum_hist for i in hist]
|
|
45
|
+
|
|
46
|
+
bit_distribution = [0] * math.ceil(math.log(len(hist), 2))
|
|
47
|
+
for value in range(len(hist)):
|
|
48
|
+
for i, bit in enumerate(value2bits(value, len(bit_distribution))):
|
|
49
|
+
bit_distribution[i] += hist[value] * bit
|
|
50
|
+
|
|
51
|
+
return bit_distribution
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from math import ceil, log2
|
|
2
|
+
from hwcomponents import ComponentModel, action
|
|
3
|
+
from hwcomponents.scaling import linear, quadratic, reciprocal
|
|
4
|
+
from hwcomponents_library.library.aladdin import AladdinComparator, AladdinCounter
|
|
5
|
+
from hwcomponents_neurosim import FlipFlop
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ZeroComparator(ComponentModel):
|
|
9
|
+
"""
|
|
10
|
+
Counts the number of zeros in a list of values. Includes a flag for each zero.
|
|
11
|
+
|
|
12
|
+
Based on the zero gating logic in the paper: A Programmable Heterogeneous
|
|
13
|
+
Microprocessor Based on Bit-Scalable In-Memory Computing, by Hongyang Jia, Hossein
|
|
14
|
+
Valavi, Yinqi Tang, Jintao Zhang, and Naveen Verma, JSSC 2020
|
|
15
|
+
10.1109/JSSC.2020.2987714
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
n_comparators: int
|
|
20
|
+
The number of comparators to include.
|
|
21
|
+
n_bits: int
|
|
22
|
+
The number of bits of each comparator.
|
|
23
|
+
tech_node: str
|
|
24
|
+
The technology node of the comparators.
|
|
25
|
+
voltage: float
|
|
26
|
+
The voltage of the comparators.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
priority = 0.5
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
n_comparators: int,
|
|
34
|
+
n_bits: int,
|
|
35
|
+
tech_node: str,
|
|
36
|
+
voltage: float = 0.85
|
|
37
|
+
):
|
|
38
|
+
self.n_comparators = n_comparators
|
|
39
|
+
self.n_bits = n_bits
|
|
40
|
+
|
|
41
|
+
# Scale up the comparator to handle all the comparators
|
|
42
|
+
self.comparator = AladdinComparator(
|
|
43
|
+
n_bits=n_bits,
|
|
44
|
+
tech_node=tech_node,
|
|
45
|
+
)
|
|
46
|
+
self.comparator.energy_scale *= n_comparators
|
|
47
|
+
self.comparator.area_scale *= n_comparators
|
|
48
|
+
|
|
49
|
+
# Flip flops are used one bit at a time, so we only make one bit and scale the
|
|
50
|
+
# energy and latency
|
|
51
|
+
self.flip_flop = FlipFlop(
|
|
52
|
+
n_bits=1,
|
|
53
|
+
tech_node=tech_node,
|
|
54
|
+
)
|
|
55
|
+
self.flip_flop.energy_scale *= n_bits
|
|
56
|
+
self.flip_flop.latency_scale *= n_bits
|
|
57
|
+
|
|
58
|
+
# Zero counter is shared between all the comparators, so scale the energy and
|
|
59
|
+
# latency to activate with each one
|
|
60
|
+
self.zeros_counter = AladdinCounter(
|
|
61
|
+
n_bits=ceil(log2(n_comparators)),
|
|
62
|
+
tech_node=tech_node,
|
|
63
|
+
)
|
|
64
|
+
self.zeros_counter.energy_scale *= n_comparators
|
|
65
|
+
self.zeros_counter.latency_scale *= n_comparators
|
|
66
|
+
|
|
67
|
+
super().__init__(
|
|
68
|
+
subcomponents=[
|
|
69
|
+
self.comparator,
|
|
70
|
+
self.flip_flop,
|
|
71
|
+
self.zeros_counter,
|
|
72
|
+
],
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
for subcomponent in self.subcomponents:
|
|
76
|
+
subcomponent.scale(
|
|
77
|
+
"voltage",
|
|
78
|
+
voltage,
|
|
79
|
+
0.85,
|
|
80
|
+
area_scale_function=linear,
|
|
81
|
+
energy_scale_function=quadratic,
|
|
82
|
+
latency_scale_function=reciprocal,
|
|
83
|
+
leak_power_scale_function=linear,
|
|
84
|
+
|
|
85
|
+
)
|
|
86
|
+
subcomponent.leak_power_scale *= 0.02 # Low-leakage technology
|
|
87
|
+
|
|
88
|
+
@action
|
|
89
|
+
def read(self) -> tuple[float, float]:
|
|
90
|
+
self.comparator.read()
|
|
91
|
+
self.flip_flop.read()
|
|
92
|
+
self.zeros_counter.read()
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
# @inproceedings{10.1109/ISCA.2016.12,
|
|
2
|
+
# author = {Shafiee, Ali and Nag, Anirban and Muralimanohar, Naveen and Balasubramonian, Rajeev and Strachan, John Paul and Hu, Miao and Williams, R. Stanley and Srikumar, Vivek},
|
|
3
|
+
# title = {ISAAC: a convolutional neural network accelerator with in-situ analog arithmetic in crossbars},
|
|
4
|
+
# year = {2016},
|
|
5
|
+
# isbn = {9781467389471},
|
|
6
|
+
# publisher = {IEEE Press},
|
|
7
|
+
# url = {https://doi.org/10.1109/ISCA.2016.12},
|
|
8
|
+
# doi = {10.1109/ISCA.2016.12},
|
|
9
|
+
# abstract = {A number of recent efforts have attempted to design accelerators for popular machine learning algorithms, such as those involving convolutional and deep neural networks (CNNs and DNNs). These algorithms typically involve a large number of multiply-accumulate (dot-product) operations. A recent project, DaDianNao, adopts a near data processing approach, where a specialized neural functional unit performs all the digital arithmetic operations and receives input weights from adjacent eDRAM banks.This work explores an in-situ processing approach, where memristor crossbar arrays not only store input weights, but are also used to perform dot-product operations in an analog manner. While the use of crossbar memory as an analog dot-product engine is well known, no prior work has designed or characterized a full-fledged accelerator based on crossbars. In particular, our work makes the following contributions: (i) We design a pipelined architecture, with some crossbars dedicated for each neural network layer, and eDRAM buffers that aggregate data between pipeline stages. (ii) We define new data encoding techniques that are amenable to analog computations and that can reduce the high overheads of analog-to-digital conversion (ADC). (iii) We define the many supporting digital components required in an analog CNN accelerator and carry out a design space exploration to identify the best balance of memristor storage/compute, ADCs, and eDRAM storage on a chip. On a suite of CNN and DNN workloads, the proposed ISAAC architecture yields improvements of 14.8\texttimes{}, 5.5\texttimes{}, and 7.5\texttimes{} in throughput, energy, and computational density (respectively), relative to the state-of-the-art DaDianNao architecture.},
|
|
10
|
+
# booktitle = {Proceedings of the 43rd International Symposium on Computer Architecture},
|
|
11
|
+
# pages = {14–26},
|
|
12
|
+
# numpages = {13},
|
|
13
|
+
# keywords = {CNN, DNN, accelerator, analog, memristor, neural},
|
|
14
|
+
# location = {Seoul, Republic of Korea},
|
|
15
|
+
# series = {ISCA '16}
|
|
16
|
+
# }
|
|
17
|
+
|
|
18
|
+
{{include_text('_include.yaml')}}
|
|
19
|
+
{{add_to_path('./memory_cells')}}
|
|
20
|
+
|
|
21
|
+
arch:
|
|
22
|
+
arch_globals_dependent_on_workload:
|
|
23
|
+
<<: *variables_global
|
|
24
|
+
# ===========================================================================
|
|
25
|
+
# Encoding-dependent parameters
|
|
26
|
+
# ===========================================================================
|
|
27
|
+
encoded_input_bits: input_bits
|
|
28
|
+
encoded_weight_bits: weight_bits
|
|
29
|
+
encoded_output_bits: output_bits
|
|
30
|
+
|
|
31
|
+
input_encoding_func: offset_encode_hist
|
|
32
|
+
weight_encoding_func: offset_encode_hist
|
|
33
|
+
|
|
34
|
+
# For accuracy model. Can in-array accumulation include signed values?
|
|
35
|
+
# Signed accumulation not compatible with offset encoding (since offset
|
|
36
|
+
# encoding makes values non-negative).
|
|
37
|
+
signed_sum_across_inputs: False
|
|
38
|
+
signed_sum_across_weights: False
|
|
39
|
+
|
|
40
|
+
# ===========================================================================
|
|
41
|
+
# Architecture & CiM Array Structure
|
|
42
|
+
# ===========================================================================
|
|
43
|
+
# DEFINITIONS:
|
|
44
|
+
# - Cell: Smallest structure capable of storing memory. Note that a cell may
|
|
45
|
+
# store more than one bit. For example, a cell consisting of a RRAM
|
|
46
|
+
# device may store >1 bits, while a cell consisting of an SRAM
|
|
47
|
+
# bitcell may store only 1 bit.
|
|
48
|
+
# - CiM Unit: Smallest structure capable of computing an analog MAC.
|
|
49
|
+
# - CiM Unit Width Cells:
|
|
50
|
+
# Number of CiM unit cells that are accessed as one. These cells receive
|
|
51
|
+
# one analog input and compute one analog MAC per timestep.
|
|
52
|
+
# - CiM Unit Depth Cells:
|
|
53
|
+
# Number of independent groups of "CiM Unit Width" cells that form a CiM
|
|
54
|
+
# unit. Each of these groups is indepently addressible and operates in
|
|
55
|
+
# must be activated in a different timestep than the others.
|
|
56
|
+
cim_unit_width_cells: 1
|
|
57
|
+
cim_unit_depth_cells: 1
|
|
58
|
+
bits_per_cell: 2
|
|
59
|
+
|
|
60
|
+
# ===========================================================================
|
|
61
|
+
# Data Converters
|
|
62
|
+
# ===========================================================================
|
|
63
|
+
adc_resolution: 8
|
|
64
|
+
voltage_dac_resolution: 1
|
|
65
|
+
temporal_dac_resolution: 1
|
|
66
|
+
|
|
67
|
+
n_adc_per_bank: 1
|
|
68
|
+
|
|
69
|
+
# ===========================================================================
|
|
70
|
+
# Hardware
|
|
71
|
+
# ===========================================================================
|
|
72
|
+
cycle_period: 1e-9
|
|
73
|
+
read_pulse_width: 1e-9
|
|
74
|
+
|
|
75
|
+
extra_attributes_for_all_component_models:
|
|
76
|
+
<<: *cim_component_attributes
|
|
77
|
+
tech_node: tech_node
|
|
78
|
+
cycle_period: cycle_period
|
|
79
|
+
|
|
80
|
+
nodes:
|
|
81
|
+
- !Memory # Input buffer
|
|
82
|
+
name: InputBuffer
|
|
83
|
+
tensors: {keep: input}
|
|
84
|
+
size: MultiArrayFanout.get_fanout() * array_parallel_inputs * supported_input_bits
|
|
85
|
+
component_class: SmartBufferSRAM
|
|
86
|
+
|
|
87
|
+
- !Memory # Output buffer
|
|
88
|
+
name: OutputBuffer
|
|
89
|
+
tensors: {keep: output}
|
|
90
|
+
size: MultiArrayFanout.get_fanout() * array_parallel_outputs // min_weight_slices * supported_output_bits * 2
|
|
91
|
+
component_class: SmartBufferSRAM
|
|
92
|
+
|
|
93
|
+
- !ProcessingStage # Shift+add sums outputs from multiple slices
|
|
94
|
+
name: ShiftAdd
|
|
95
|
+
tensors: {keep: output}
|
|
96
|
+
direction: up
|
|
97
|
+
n_parallel_instances: MultiArrayFanout.get_fanout() # Match throughput with arrays
|
|
98
|
+
bits_per_action: output_bits / n_sliced_psums # n_sliced_psums reads to get an output
|
|
99
|
+
component_class: ISAACShiftAdd
|
|
100
|
+
extra_attributes_for_component_model:
|
|
101
|
+
n_bits: supported_output_bits
|
|
102
|
+
shift_register_n_bits: supported_output_bits * 2
|
|
103
|
+
|
|
104
|
+
- !Fanout # array: Independent array with memory elements and peripherals.
|
|
105
|
+
name: MultiArrayFanout
|
|
106
|
+
spatial:
|
|
107
|
+
- name: array
|
|
108
|
+
fanout: 8
|
|
109
|
+
|
|
110
|
+
- !ProcessingStage # ADC
|
|
111
|
+
name: ADC
|
|
112
|
+
tensors: {keep: output}
|
|
113
|
+
direction: up
|
|
114
|
+
bits_per_action: output_bits / n_sliced_psums # n_sliced_psums reads to get an output
|
|
115
|
+
component_class: ADC
|
|
116
|
+
energy_scale: adc_energy_scale
|
|
117
|
+
area_scale: adc_area_scale
|
|
118
|
+
extra_attributes_for_component_model:
|
|
119
|
+
throughput_scale: 1 / 100 # 100 cycles to process all outputs
|
|
120
|
+
throughput: 1 / cycle_period * cols_active_at_once * throughput_scale
|
|
121
|
+
n_bits: adc_resolution
|
|
122
|
+
|
|
123
|
+
- !ProcessingStage # Row drivers feed inputs onto the rows of the array
|
|
124
|
+
name: RowDrivers
|
|
125
|
+
tensors: {keep: input}
|
|
126
|
+
direction: down
|
|
127
|
+
bits_per_action: input_bits / n_input_slices # n_input_slices reads to send an input
|
|
128
|
+
component_class: ArrayRowDrivers
|
|
129
|
+
|
|
130
|
+
- !ProcessingStage # Column drivers precharge the array columns
|
|
131
|
+
name: ColumnDrivers
|
|
132
|
+
tensors: {keep: output}
|
|
133
|
+
direction: up
|
|
134
|
+
bits_per_action: output_bits / n_sliced_psums # n_sliced_psums reads to get an output
|
|
135
|
+
component_class: ArrayColumnDrivers
|
|
136
|
+
|
|
137
|
+
- !Fanout
|
|
138
|
+
name: ArrayFanout
|
|
139
|
+
spatial:
|
|
140
|
+
- name: array_reuse_input # Special name that determines array size
|
|
141
|
+
fanout: 128
|
|
142
|
+
may_reuse: input
|
|
143
|
+
reuse: input
|
|
144
|
+
min_usage: 1
|
|
145
|
+
usage_scale: n_weight_slices
|
|
146
|
+
- name: array_reuse_output # Special name that determines array size
|
|
147
|
+
fanout: 128
|
|
148
|
+
may_reuse: output
|
|
149
|
+
reuse: output
|
|
150
|
+
min_usage: 1
|
|
151
|
+
|
|
152
|
+
# This is the CiM unit that stores weights and computes MACs. Each CiM unit stores a
|
|
153
|
+
# different weight slice of up to cim_unit_width_cells bits. It may also store up to
|
|
154
|
+
# cim_unit_depth_cells independently-addressable weight slices, but may only compute
|
|
155
|
+
# MACs on one slice at a time. One of these components represents a collection of CiM
|
|
156
|
+
# units, that together hold one weight.
|
|
157
|
+
- !Memory
|
|
158
|
+
name: CimUnit
|
|
159
|
+
tensors: {keep: weight, no_refetch_from_above: weight, force_memory_hierarchy_order: False}
|
|
160
|
+
size: cim_unit_width_cells * cim_unit_depth_cells * bits_per_cell * n_weight_slices
|
|
161
|
+
# Requires (n_weight_slices * n_input_slices) = n_sliced_psums reads to fully use
|
|
162
|
+
# one weight
|
|
163
|
+
bits_per_action: weight.bits_per_value / n_sliced_psums
|
|
164
|
+
# Bind together n_weight_slices instances to hold one weight
|
|
165
|
+
n_parallel_instances: n_weight_slices
|
|
166
|
+
component_class: MemoryCell
|
|
167
|
+
extra_attributes_for_component_model:
|
|
168
|
+
n_instances: cim_unit_width_cells * cim_unit_depth_cells
|
|
169
|
+
|
|
170
|
+
# We account for compute energy in the CimUnit reads
|
|
171
|
+
- !Compute
|
|
172
|
+
name: FreeCompute
|
|
173
|
+
component_class: Dummy
|
|
174
|
+
enabled: len(All) == 3
|
|
175
|
+
|
|
176
|
+
# These variables pertain to the workload, microarch, and circuits. They should
|
|
177
|
+
# be matched between architectures when comparing for a fair comparison.
|
|
178
|
+
# Furthermore, this file should follow the same format for all architectures
|
|
179
|
+
# such that we can mix and match architectures with different iso files.
|
|
180
|
+
variables:
|
|
181
|
+
# ===========================================================================
|
|
182
|
+
# Workload, microarch, circuits. Things that should be matched
|
|
183
|
+
# between architectures when comparing.
|
|
184
|
+
# ===========================================================================
|
|
185
|
+
# Set by CiM processor if these values are available in the workload.
|
|
186
|
+
# Otherwise, use the defaults here.
|
|
187
|
+
inputs_hist: [0, 0, 0, 3, 1, 0, 0]
|
|
188
|
+
weights_hist: [0, 1, 3, 4, 3, 1, 0]
|
|
189
|
+
outputs_hist: inputs_hist
|
|
190
|
+
|
|
191
|
+
## Microarch ----------------------------------------------------------------
|
|
192
|
+
supported_input_bits: 8 # Maximum input bits supported by the arch.
|
|
193
|
+
supported_weight_bits: 8 # Maximum weight bits supported by the arch.
|
|
194
|
+
supported_output_bits: 8 # Maximum output bits supported by the arch.
|
|
195
|
+
min_supported_input_bits: 1 # Minimum input bits supported by the arch.
|
|
196
|
+
min_supported_weight_bits: 2 # Minimum weight bits supported by the arch.
|
|
197
|
+
min_supported_output_bits: 1 # Minimum output bits supported by the arch.
|
|
198
|
+
|
|
199
|
+
# Circuits ------------------------------------------------------------------
|
|
200
|
+
voltage: 1
|
|
201
|
+
tech_node: 32e-9 # nm
|
|
202
|
+
cell_config: "{{find_path('rram_isaac_isca_2016.yaml')}}"
|
|
203
|
+
voltage_energy_scale: voltage ** 2
|
|
204
|
+
voltage_latency_scale: voltage
|
|
205
|
+
|
|
206
|
+
# Calibration ---------------------------------------------------------------
|
|
207
|
+
adc_energy_scale: 1
|
|
208
|
+
adc_area_scale: 1
|
|
209
|
+
row_col_drivers_area_scale: 1
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
# This workload is sized to get peak throughput & energy efficiency.
|
|
213
|
+
workload:
|
|
214
|
+
rank_sizes:
|
|
215
|
+
M: 1
|
|
216
|
+
N: 16 * 8
|
|
217
|
+
K: 128
|
|
218
|
+
|
|
219
|
+
einsums:
|
|
220
|
+
- name: Matmul
|
|
221
|
+
tensor_accesses:
|
|
222
|
+
- {name: input, projection: [m, k], bits_per_value: 16}
|
|
223
|
+
- {name: weight, projection: [k, n], bits_per_value: 16}
|
|
224
|
+
- {name: output, projection: [m, n], output: True, bits_per_value: 16}
|
|
225
|
+
|
|
226
|
+
- name: Matmul2
|
|
227
|
+
tensor_accesses:
|
|
228
|
+
- {name: input2, projection: [m, k], bits_per_value: 32}
|
|
229
|
+
- {name: weight2, projection: [k, n], bits_per_value: 32}
|
|
230
|
+
- {name: output2, projection: [m, n], output: True, bits_per_value: 32}
|
|
231
|
+
renames: {input: input2, weight: weight2, output: output2}
|
|
232
|
+
|
|
233
|
+
renames: {} # Not needed for this workload
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------
|
|
2
|
+
# Device type and top-level parameters
|
|
3
|
+
|
|
4
|
+
# Device type
|
|
5
|
+
-MemCellType: SRAM # SRAM or RRAM
|
|
6
|
+
# -ReadMode: current # Comment this line for voltage
|
|
7
|
+
-DeviceRoadmap -1LP 1HP 2LSTP: 2
|
|
8
|
+
|
|
9
|
+
# Device size
|
|
10
|
+
-CellArea (F^2): 24
|
|
11
|
+
-CellAspectRatio: 1.25 # Width/Height
|
|
12
|
+
|
|
13
|
+
# Calibration parameters
|
|
14
|
+
-CellReadLeakEnergyMultiplier: 1
|
|
15
|
+
-CellWriteEnergyMultiplier: 1
|
|
16
|
+
-CellCapacitanceMultiplier: 0
|
|
17
|
+
-CellCapacitanceAdjust (F): 0.174e-15 # Capacitance = (Nominal Capacitance) * Multiplier + Adjust
|
|
18
|
+
|
|
19
|
+
# Used for RRAM with access transistors or SRAM
|
|
20
|
+
-AccessCMOSWidth (F): 0
|
|
21
|
+
|
|
22
|
+
# Affects NeuroSim-realized ADCs. NOTE: Most models use ADC plug-in ADCs and
|
|
23
|
+
# this has little to no effect.
|
|
24
|
+
-ReadMode: voltage # voltage or current
|
|
25
|
+
|
|
26
|
+
# -----------------------------------------------------------------------------
|
|
27
|
+
# SRAM-only parameters
|
|
28
|
+
# SRAM configuration. Only used if MemCellType is SRAM
|
|
29
|
+
-SRAMCellNMOSWidth (F): 2
|
|
30
|
+
-SRAMCellPMOSWidth (F): 2
|
|
31
|
+
-MinSenseVoltage (mV): 20
|
|
32
|
+
|
|
33
|
+
# -----------------------------------------------------------------------------
|
|
34
|
+
# RRAM-only parameters
|
|
35
|
+
# RRAM configuration. Only used if MemCellType is RRAM
|
|
36
|
+
-ResistanceOn (ohm): 625000
|
|
37
|
+
-ResistanceOff (ohm): 10000000
|
|
38
|
+
|
|
39
|
+
# Set parameters
|
|
40
|
+
-SetVoltage (V): 2 # From PRIME
|
|
41
|
+
-SetPulse (ns): 40
|
|
42
|
+
# -SetCurrent (uA): 400 # OVERWRITES PREVIOUS LINES IF UNCOMMENTED
|
|
43
|
+
# -SetEnergy (pJ): 8 # OVERWRITES PREVIOUS LINES IF UNCOMMENTED
|
|
44
|
+
# -SetPower (uW): 800 # OVERWRITES PREVIOUS LINES IF UNCOMMENTED
|
|
45
|
+
|
|
46
|
+
# Read parameters
|
|
47
|
+
-ReadVoltage (V): 0.5 # From PRIME
|
|
48
|
+
-ReadPulse (ns): 5
|
|
49
|
+
# -ReadCurrent (uA): 40 # OVERWRITES PREVIOUS LINES IF UNCOMMENTED
|
|
50
|
+
# -ReadEnergy (pJ): 0.008 # OVERWRITES PREVIOUS LINES IF UNCOMMENTED
|
|
51
|
+
# -ReadPower (uW): 8 # OVERWRITES PREVIOUS LINES IF UNCOMMENTED
|
|
52
|
+
|
|
53
|
+
# Access transistor parameters
|
|
54
|
+
-AccessType: None # CMOS or None
|
|
55
|
+
-AccessTransistorResistance (ohm): 1000
|
|
56
|
+
-AccessVoltage (V): 0.1 # Access transistor voltage
|
|
57
|
+
|
|
58
|
+
# -----------------------------------------------------------------------------
|
|
59
|
+
# Other parameters that can be set via cell component attributes:
|
|
60
|
+
|
|
61
|
+
# "voltage" is the global supply voltage
|
|
62
|
+
# "threshold_voltage" is the global threshold voltage
|
|
63
|
+
# "read_pulse_width" is the read pulse width
|