bpred 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bpred-0.2.0 → bpred-0.3.0}/CHANGELOG.md +14 -6
- {bpred-0.2.0 → bpred-0.3.0}/PKG-INFO +28 -12
- {bpred-0.2.0 → bpred-0.3.0}/README.md +25 -9
- {bpred-0.2.0 → bpred-0.3.0}/pyproject.toml +3 -3
- {bpred-0.2.0 → bpred-0.3.0}/src/bpred/__init__.py +5 -1
- {bpred-0.2.0 → bpred-0.3.0}/src/bpred/cli.py +18 -0
- bpred-0.3.0/src/bpred/local_history.py +161 -0
- bpred-0.3.0/tests/test_local_history.py +259 -0
- {bpred-0.2.0 → bpred-0.3.0}/.github/workflows/ci.yml +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/.gitignore +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/CLAUDE.md +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/CODE_OF_CONDUCT.md +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/CONTRIBUTING.md +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/LICENSE +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/SECURITY.md +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/assets/logo.png +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/docs/architecture.md +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/docs/charter.md +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/docs/logo-prompt.md +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/examples/README.md +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/examples/sample.trace +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/src/bpred/bimodal.py +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/src/bpred/counter.py +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/src/bpred/gshare.py +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/src/bpred/perceptron.py +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/src/bpred/py.typed +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/src/bpred/tournament.py +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/src/bpred/trace.py +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/tests/__init__.py +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/tests/test_bimodal.py +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/tests/test_counter.py +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/tests/test_gshare.py +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/tests/test_perceptron.py +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/tests/test_property.py +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/tests/test_tournament.py +0 -0
- {bpred-0.2.0 → bpred-0.3.0}/tests/test_trace.py +0 -0
|
@@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|
6
6
|
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.3.0] - 2026-06-23
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- `LocalHistoryPredictor`: Yeh and Patt (1991) PAg two-level adaptive predictor.
|
|
13
|
+
A per-branch local history table (BHT) indexed by PC feeds a shared pattern
|
|
14
|
+
history table (PHT) of 2-bit saturating counters indexed by the local history
|
|
15
|
+
pattern. Learns periodic per-branch patterns (for example strict alternating
|
|
16
|
+
T,N,T,N) that a bimodal predictor thrashes on; a period-k pattern is captured
|
|
17
|
+
once `history_bits >= k`.
|
|
18
|
+
Constructor: `LocalHistoryPredictor(history_bits=N, bht_size=B, pht_size=P)`.
|
|
19
|
+
Exported from `bpred` top-level package and available as the `local` CLI
|
|
20
|
+
subcommand.
|
|
21
|
+
|
|
8
22
|
## [0.2.0] - 2026-06-17
|
|
9
23
|
|
|
10
24
|
### Added
|
|
@@ -14,12 +28,6 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
|
|
|
14
28
|
Constructor: `PerceptronPredictor(history_length=H, table_size=N)`.
|
|
15
29
|
Exported from `bpred` top-level package.
|
|
16
30
|
|
|
17
|
-
### Notes
|
|
18
|
-
|
|
19
|
-
- PyPI publish is queued behind the new-project creation quota (currently
|
|
20
|
-
rate-limited). Build artifact passes `twine check`. Publish will follow once
|
|
21
|
-
the quota resets.
|
|
22
|
-
|
|
23
31
|
## [0.1.0] - 2026-06-17
|
|
24
32
|
|
|
25
33
|
### Added
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bpred
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Pure-Python simulator of classical CPU branch predictors: bimodal, gshare, and
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Pure-Python simulator of classical CPU branch predictors: bimodal, gshare, tournament, perceptron, and local-history
|
|
5
5
|
Project-URL: Homepage, https://github.com/amaar-mc/bpred
|
|
6
6
|
Project-URL: Repository, https://github.com/amaar-mc/bpred
|
|
7
7
|
Project-URL: Issues, https://github.com/amaar-mc/bpred/issues
|
|
@@ -28,7 +28,7 @@ License: MIT License
|
|
|
28
28
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
29
|
SOFTWARE.
|
|
30
30
|
License-File: LICENSE
|
|
31
|
-
Keywords: bimodal,branch-prediction,computer-architecture,cpu-simulator,education,gshare
|
|
31
|
+
Keywords: bimodal,branch-prediction,computer-architecture,cpu-simulator,education,gshare,perceptron,two-level-adaptive
|
|
32
32
|
Classifier: Development Status :: 3 - Alpha
|
|
33
33
|
Classifier: Intended Audience :: Education
|
|
34
34
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -56,12 +56,13 @@ Description-Content-Type: text/markdown
|
|
|
56
56
|
|
|
57
57
|
Pure-Python simulator of classical CPU branch predictors for computer architecture education.
|
|
58
58
|
|
|
59
|
-
Implements
|
|
59
|
+
Implements five predictors from first principles with zero runtime dependencies:
|
|
60
60
|
|
|
61
61
|
- **Bimodal** (Smith 1981) -- a table of n-bit saturating counters indexed by PC.
|
|
62
62
|
- **Gshare** (McFarling 1993) -- PC XOR global-history register indexes 2-bit counters.
|
|
63
63
|
- **Tournament** (McFarling 1993 / Alpha 21264) -- a meta-selector combining local and global sub-predictors.
|
|
64
64
|
- **Perceptron** (Jimenez and Lin 2001) -- a table of integer-weight perceptrons that can learn linearly-separable history patterns bimodal and gshare cannot capture.
|
|
65
|
+
- **Local-history / PAg** (Yeh and Patt 1991) -- a per-branch local history table feeds a shared pattern history table, learning periodic per-branch patterns that a bimodal predictor thrashes on.
|
|
65
66
|
|
|
66
67
|
Part of the same open-source computer architecture education series as [tomasulo](https://github.com/amaar-mc/tomasulo) (out-of-order execution) and scoreboarding.
|
|
67
68
|
|
|
@@ -71,18 +72,11 @@ Part of the same open-source computer architecture education series as [tomasulo
|
|
|
71
72
|
pip install bpred
|
|
72
73
|
```
|
|
73
74
|
|
|
74
|
-
PyPI publication is pending; install from source in the meantime:
|
|
75
|
-
|
|
76
|
-
```bash
|
|
77
|
-
git clone https://github.com/amaar-mc/bpred
|
|
78
|
-
cd bpred
|
|
79
|
-
pip install -e ".[dev]"
|
|
80
|
-
```
|
|
81
|
-
|
|
82
75
|
## Python API
|
|
83
76
|
|
|
84
77
|
```python
|
|
85
78
|
from bpred import BimodalPredictor, GsharePredictor, PerceptronPredictor, TournamentPredictor
|
|
79
|
+
from bpred import LocalHistoryPredictor
|
|
86
80
|
from bpred import run_trace, accuracy, mispredictions
|
|
87
81
|
|
|
88
82
|
# Bimodal: 2-bit counters, 1024-entry table
|
|
@@ -100,6 +94,9 @@ pred = TournamentPredictor(local=local, global_=global_, meta_bits=2)
|
|
|
100
94
|
# Perceptron: 12-bit history, 1024-entry table
|
|
101
95
|
pred = PerceptronPredictor(history_length=12, table_size=1024)
|
|
102
96
|
|
|
97
|
+
# Local-history (PAg): 8-bit per-branch history, 1024-entry BHT, 256-entry PHT
|
|
98
|
+
pred = LocalHistoryPredictor(history_bits=8, bht_size=1024, pht_size=256)
|
|
99
|
+
|
|
103
100
|
# Feed a trace
|
|
104
101
|
trace = [(0x1000, True), (0x1004, False), (0x1008, True)]
|
|
105
102
|
result = run_trace(pred, trace=trace)
|
|
@@ -123,11 +120,30 @@ that the predictor needs more warm-up branches to converge and the weights
|
|
|
123
120
|
grow without bound (in simulation; hardware clamps them to a fixed-point
|
|
124
121
|
range).
|
|
125
122
|
|
|
123
|
+
### Why use the local-history (PAg) predictor?
|
|
124
|
+
|
|
125
|
+
Bimodal predicts each branch from a single counter, so a branch whose outcome
|
|
126
|
+
follows a short repeating pattern -- the textbook `T, N, T, N, ...` of a loop
|
|
127
|
+
that runs an even number of times, for example -- makes the counter oscillate
|
|
128
|
+
and the predictor thrashes near 50%.
|
|
129
|
+
|
|
130
|
+
The local-history predictor (the PAg configuration of Yeh and Patt's two-level
|
|
131
|
+
adaptive scheme, 1991) gives every branch its own N-bit shift register of
|
|
132
|
+
recent outcomes in a branch history table (BHT). That local pattern then
|
|
133
|
+
indexes a shared pattern history table (PHT) of 2-bit counters, so each
|
|
134
|
+
distinct recent-history pattern gets its own counter. A period-k pattern is
|
|
135
|
+
learned to near-100% accuracy once `history_bits >= k`, because each phase of
|
|
136
|
+
the period maps to a different PHT entry. The first level is per-address
|
|
137
|
+
(`P`), the training is adaptive (`A`), and the second-level PHT is global
|
|
138
|
+
(`g`), which is what the name PAg encodes. The per-address-PHT variant (PAp)
|
|
139
|
+
is a natural extension left as future work.
|
|
140
|
+
|
|
126
141
|
## CLI
|
|
127
142
|
|
|
128
143
|
```
|
|
129
144
|
bpred bimodal --counter-bits 2 --table-size 1024 path/to/trace.trace
|
|
130
145
|
bpred gshare --history-bits 10 --table-size 1024 path/to/trace.trace
|
|
146
|
+
bpred local --history-bits 8 --bht-size 1024 --pht-size 256 path/to/trace.trace
|
|
131
147
|
bpred tournament \
|
|
132
148
|
--local-predictor bimodal --local-counter-bits 2 --local-table-size 1024 \
|
|
133
149
|
--global-predictor gshare --global-history-bits 10 --global-table-size 1024 \
|
|
@@ -6,12 +6,13 @@
|
|
|
6
6
|
|
|
7
7
|
Pure-Python simulator of classical CPU branch predictors for computer architecture education.
|
|
8
8
|
|
|
9
|
-
Implements
|
|
9
|
+
Implements five predictors from first principles with zero runtime dependencies:
|
|
10
10
|
|
|
11
11
|
- **Bimodal** (Smith 1981) -- a table of n-bit saturating counters indexed by PC.
|
|
12
12
|
- **Gshare** (McFarling 1993) -- PC XOR global-history register indexes 2-bit counters.
|
|
13
13
|
- **Tournament** (McFarling 1993 / Alpha 21264) -- a meta-selector combining local and global sub-predictors.
|
|
14
14
|
- **Perceptron** (Jimenez and Lin 2001) -- a table of integer-weight perceptrons that can learn linearly-separable history patterns bimodal and gshare cannot capture.
|
|
15
|
+
- **Local-history / PAg** (Yeh and Patt 1991) -- a per-branch local history table feeds a shared pattern history table, learning periodic per-branch patterns that a bimodal predictor thrashes on.
|
|
15
16
|
|
|
16
17
|
Part of the same open-source computer architecture education series as [tomasulo](https://github.com/amaar-mc/tomasulo) (out-of-order execution) and scoreboarding.
|
|
17
18
|
|
|
@@ -21,18 +22,11 @@ Part of the same open-source computer architecture education series as [tomasulo
|
|
|
21
22
|
pip install bpred
|
|
22
23
|
```
|
|
23
24
|
|
|
24
|
-
PyPI publication is pending; install from source in the meantime:
|
|
25
|
-
|
|
26
|
-
```bash
|
|
27
|
-
git clone https://github.com/amaar-mc/bpred
|
|
28
|
-
cd bpred
|
|
29
|
-
pip install -e ".[dev]"
|
|
30
|
-
```
|
|
31
|
-
|
|
32
25
|
## Python API
|
|
33
26
|
|
|
34
27
|
```python
|
|
35
28
|
from bpred import BimodalPredictor, GsharePredictor, PerceptronPredictor, TournamentPredictor
|
|
29
|
+
from bpred import LocalHistoryPredictor
|
|
36
30
|
from bpred import run_trace, accuracy, mispredictions
|
|
37
31
|
|
|
38
32
|
# Bimodal: 2-bit counters, 1024-entry table
|
|
@@ -50,6 +44,9 @@ pred = TournamentPredictor(local=local, global_=global_, meta_bits=2)
|
|
|
50
44
|
# Perceptron: 12-bit history, 1024-entry table
|
|
51
45
|
pred = PerceptronPredictor(history_length=12, table_size=1024)
|
|
52
46
|
|
|
47
|
+
# Local-history (PAg): 8-bit per-branch history, 1024-entry BHT, 256-entry PHT
|
|
48
|
+
pred = LocalHistoryPredictor(history_bits=8, bht_size=1024, pht_size=256)
|
|
49
|
+
|
|
53
50
|
# Feed a trace
|
|
54
51
|
trace = [(0x1000, True), (0x1004, False), (0x1008, True)]
|
|
55
52
|
result = run_trace(pred, trace=trace)
|
|
@@ -73,11 +70,30 @@ that the predictor needs more warm-up branches to converge and the weights
|
|
|
73
70
|
grow without bound (in simulation; hardware clamps them to a fixed-point
|
|
74
71
|
range).
|
|
75
72
|
|
|
73
|
+
### Why use the local-history (PAg) predictor?
|
|
74
|
+
|
|
75
|
+
Bimodal predicts each branch from a single counter, so a branch whose outcome
|
|
76
|
+
follows a short repeating pattern -- the textbook `T, N, T, N, ...` of a loop
|
|
77
|
+
that runs an even number of times, for example -- makes the counter oscillate
|
|
78
|
+
and the predictor thrashes near 50%.
|
|
79
|
+
|
|
80
|
+
The local-history predictor (the PAg configuration of Yeh and Patt's two-level
|
|
81
|
+
adaptive scheme, 1991) gives every branch its own N-bit shift register of
|
|
82
|
+
recent outcomes in a branch history table (BHT). That local pattern then
|
|
83
|
+
indexes a shared pattern history table (PHT) of 2-bit counters, so each
|
|
84
|
+
distinct recent-history pattern gets its own counter. A period-k pattern is
|
|
85
|
+
learned to near-100% accuracy once `history_bits >= k`, because each phase of
|
|
86
|
+
the period maps to a different PHT entry. The first level is per-address
|
|
87
|
+
(`P`), the training is adaptive (`A`), and the second-level PHT is global
|
|
88
|
+
(`g`), which is what the name PAg encodes. The per-address-PHT variant (PAp)
|
|
89
|
+
is a natural extension left as future work.
|
|
90
|
+
|
|
76
91
|
## CLI
|
|
77
92
|
|
|
78
93
|
```
|
|
79
94
|
bpred bimodal --counter-bits 2 --table-size 1024 path/to/trace.trace
|
|
80
95
|
bpred gshare --history-bits 10 --table-size 1024 path/to/trace.trace
|
|
96
|
+
bpred local --history-bits 8 --bht-size 1024 --pht-size 256 path/to/trace.trace
|
|
81
97
|
bpred tournament \
|
|
82
98
|
--local-predictor bimodal --local-counter-bits 2 --local-table-size 1024 \
|
|
83
99
|
--global-predictor gshare --global-history-bits 10 --global-table-size 1024 \
|
|
@@ -4,12 +4,12 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "bpred"
|
|
7
|
-
version = "0.
|
|
8
|
-
description = "Pure-Python simulator of classical CPU branch predictors: bimodal, gshare, and
|
|
7
|
+
version = "0.3.0"
|
|
8
|
+
description = "Pure-Python simulator of classical CPU branch predictors: bimodal, gshare, tournament, perceptron, and local-history"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {file = "LICENSE"}
|
|
11
11
|
authors = [{name = "Amaar Chughtai", email = "amaardevx@gmail.com"}]
|
|
12
|
-
keywords = ["computer-architecture", "branch-prediction", "gshare", "bimodal", "cpu-simulator", "education"]
|
|
12
|
+
keywords = ["computer-architecture", "branch-prediction", "gshare", "bimodal", "perceptron", "two-level-adaptive", "cpu-simulator", "education"]
|
|
13
13
|
classifiers = [
|
|
14
14
|
"Development Status :: 3 - Alpha",
|
|
15
15
|
"Intended Audience :: Education",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""bpred -- Pure-Python CPU branch predictor simulator.
|
|
2
2
|
|
|
3
|
-
Provides
|
|
3
|
+
Provides five classical branch predictors and trace-driven simulation
|
|
4
4
|
utilities for computer architecture education.
|
|
5
5
|
|
|
6
6
|
Predictors
|
|
@@ -13,6 +13,8 @@ TournamentPredictor
|
|
|
13
13
|
McFarling (1993) / Alpha 21264-style meta-selecting predictor.
|
|
14
14
|
PerceptronPredictor
|
|
15
15
|
Jimenez and Lin (2001) table of integer-weight perceptrons.
|
|
16
|
+
LocalHistoryPredictor
|
|
17
|
+
Yeh and Patt (1991) PAg two-level adaptive local-history predictor.
|
|
16
18
|
|
|
17
19
|
Functions
|
|
18
20
|
---------
|
|
@@ -26,6 +28,7 @@ mispredictions(*, trace_result)
|
|
|
26
28
|
|
|
27
29
|
from bpred.bimodal import BimodalPredictor
|
|
28
30
|
from bpred.gshare import GsharePredictor
|
|
31
|
+
from bpred.local_history import LocalHistoryPredictor
|
|
29
32
|
from bpred.perceptron import PerceptronPredictor
|
|
30
33
|
from bpred.tournament import TournamentPredictor
|
|
31
34
|
from bpred.trace import TraceResult, accuracy, mispredictions, run_trace
|
|
@@ -33,6 +36,7 @@ from bpred.trace import TraceResult, accuracy, mispredictions, run_trace
|
|
|
33
36
|
__all__ = [
|
|
34
37
|
"BimodalPredictor",
|
|
35
38
|
"GsharePredictor",
|
|
39
|
+
"LocalHistoryPredictor",
|
|
36
40
|
"PerceptronPredictor",
|
|
37
41
|
"TournamentPredictor",
|
|
38
42
|
"TraceResult",
|
|
@@ -9,6 +9,7 @@ from pathlib import Path
|
|
|
9
9
|
from bpred import (
|
|
10
10
|
BimodalPredictor,
|
|
11
11
|
GsharePredictor,
|
|
12
|
+
LocalHistoryPredictor,
|
|
12
13
|
TournamentPredictor,
|
|
13
14
|
accuracy,
|
|
14
15
|
mispredictions,
|
|
@@ -83,6 +84,17 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
83
84
|
gshare.add_argument("--table-size", type=int, required=True)
|
|
84
85
|
gshare.add_argument("tracefile", type=Path)
|
|
85
86
|
|
|
87
|
+
# ------------------------------------------------------------------
|
|
88
|
+
# local (PAg)
|
|
89
|
+
# ------------------------------------------------------------------
|
|
90
|
+
local_p = sub.add_parser(
|
|
91
|
+
"local", help="Local-history PAg (Yeh and Patt 1991) predictor"
|
|
92
|
+
)
|
|
93
|
+
local_p.add_argument("--history-bits", type=int, required=True)
|
|
94
|
+
local_p.add_argument("--bht-size", type=int, required=True)
|
|
95
|
+
local_p.add_argument("--pht-size", type=int, required=True)
|
|
96
|
+
local_p.add_argument("tracefile", type=Path)
|
|
97
|
+
|
|
86
98
|
# ------------------------------------------------------------------
|
|
87
99
|
# tournament
|
|
88
100
|
# ------------------------------------------------------------------
|
|
@@ -121,6 +133,12 @@ def _build_predictor(args: argparse.Namespace) -> BranchPredictor:
|
|
|
121
133
|
history_bits=args.history_bits,
|
|
122
134
|
table_size=args.table_size,
|
|
123
135
|
)
|
|
136
|
+
if args.predictor == "local":
|
|
137
|
+
return LocalHistoryPredictor(
|
|
138
|
+
history_bits=args.history_bits,
|
|
139
|
+
bht_size=args.bht_size,
|
|
140
|
+
pht_size=args.pht_size,
|
|
141
|
+
)
|
|
124
142
|
if args.predictor == "tournament":
|
|
125
143
|
local: BranchPredictor = BimodalPredictor(
|
|
126
144
|
counter_bits=args.local_counter_bits,
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Local-history two-level adaptive branch predictor (PAg).
|
|
2
|
+
|
|
3
|
+
Reference: T.-Y. Yeh and Y. N. Patt, "Two-Level Adaptive Training Branch
|
|
4
|
+
Prediction," in Proceedings of the 24th Annual International Symposium on
|
|
5
|
+
Microarchitecture (MICRO 24), pp. 51-61, 1991.
|
|
6
|
+
|
|
7
|
+
This implements the PAg configuration of the Yeh and Patt two-level scheme:
|
|
8
|
+
|
|
9
|
+
- A *per-address* branch history table (BHT) indexed by PC. Each BHT entry is
|
|
10
|
+
an N-bit shift register holding the recent taken/not-taken outcomes of that
|
|
11
|
+
specific branch (P = per-address first level).
|
|
12
|
+
- A single *global* pattern history table (PHT) of 2-bit saturating counters,
|
|
13
|
+
shared across all branches and indexed by the local history pattern read from
|
|
14
|
+
the BHT (g = global second level).
|
|
15
|
+
|
|
16
|
+
The "PAg" name decodes as: P (per-address first level) A (adaptive) g (global
|
|
17
|
+
second level). Contrast with the per-address second level "PAp" variant, noted
|
|
18
|
+
as future work below.
|
|
19
|
+
|
|
20
|
+
Indexing
|
|
21
|
+
--------
|
|
22
|
+
The local history register is an integer in [0, 2^history_bits - 1]. The BHT
|
|
23
|
+
is indexed by ``pc % bht_size``. The PHT is indexed by ``history % pht_size``;
|
|
24
|
+
with the natural sizing ``pht_size == 2**history_bits`` this is a direct,
|
|
25
|
+
collision-free mapping from every distinct history pattern to its own counter.
|
|
26
|
+
|
|
27
|
+
PAp (future work)
|
|
28
|
+
-----------------
|
|
29
|
+
A PAp predictor gives each branch its own private PHT instead of sharing one
|
|
30
|
+
global PHT. That is a strict generalisation (a 2-D PHT indexed by branch and
|
|
31
|
+
then by pattern) and is intentionally out of scope here to keep PAg focused.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
from bpred.counter import SaturatingCounter
|
|
37
|
+
|
|
38
|
+
_COUNTER_BITS = 2
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LocalHistoryPredictor:
|
|
42
|
+
"""Two-level adaptive predictor with per-branch local history (PAg).
|
|
43
|
+
|
|
44
|
+
Each branch has its own ``history_bits``-wide local history register stored
|
|
45
|
+
in the branch history table (BHT). A single shared pattern history table
|
|
46
|
+
(PHT) of 2-bit saturating counters is indexed by that local history pattern.
|
|
47
|
+
|
|
48
|
+
predict(pc):
|
|
49
|
+
Read the branch's local history, index the PHT with it, and predict
|
|
50
|
+
taken iff the selected counter is in a taken state.
|
|
51
|
+
|
|
52
|
+
update(pc, taken):
|
|
53
|
+
Update the indexed PHT counter with the actual outcome, then shift the
|
|
54
|
+
outcome into that branch's local history register (most recent outcome
|
|
55
|
+
in the least-significant bit).
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
_bht: list[int]
|
|
59
|
+
_pht: list[SaturatingCounter]
|
|
60
|
+
_history_bits: int
|
|
61
|
+
_bht_size: int
|
|
62
|
+
_pht_size: int
|
|
63
|
+
_history_mask: int
|
|
64
|
+
|
|
65
|
+
def __init__(self, *, history_bits: int, bht_size: int, pht_size: int) -> None:
|
|
66
|
+
"""Create a LocalHistoryPredictor (PAg).
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
history_bits: Width N of each per-branch local history register.
|
|
70
|
+
Must be >= 1.
|
|
71
|
+
bht_size: Number of entries in the branch history table (BHT).
|
|
72
|
+
Must be >= 1. Indexed by ``pc % bht_size``.
|
|
73
|
+
pht_size: Number of 2-bit counters in the shared pattern history
|
|
74
|
+
table (PHT). Must be >= 1. Indexed by ``history % pht_size``;
|
|
75
|
+
use ``2**history_bits`` for a collision-free mapping.
|
|
76
|
+
"""
|
|
77
|
+
if history_bits < 1:
|
|
78
|
+
raise ValueError(f"history_bits must be >= 1, got {history_bits}")
|
|
79
|
+
if bht_size < 1:
|
|
80
|
+
raise ValueError(f"bht_size must be >= 1, got {bht_size}")
|
|
81
|
+
if pht_size < 1:
|
|
82
|
+
raise ValueError(f"pht_size must be >= 1, got {pht_size}")
|
|
83
|
+
|
|
84
|
+
self._history_bits = history_bits
|
|
85
|
+
self._bht_size = bht_size
|
|
86
|
+
self._pht_size = pht_size
|
|
87
|
+
self._history_mask = (1 << history_bits) - 1
|
|
88
|
+
# All local histories start empty (all not-taken).
|
|
89
|
+
self._bht = [0] * bht_size
|
|
90
|
+
# All PHT counters start weakly taken (neutral).
|
|
91
|
+
initial = 1 << (_COUNTER_BITS - 1)
|
|
92
|
+
self._pht = [
|
|
93
|
+
SaturatingCounter(bits=_COUNTER_BITS, initial=initial)
|
|
94
|
+
for _ in range(pht_size)
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
# ------------------------------------------------------------------
|
|
98
|
+
# Properties
|
|
99
|
+
# ------------------------------------------------------------------
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def history_bits(self) -> int:
|
|
103
|
+
"""Width N of each per-branch local history register."""
|
|
104
|
+
return self._history_bits
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def bht_size(self) -> int:
|
|
108
|
+
"""Number of entries in the branch history table."""
|
|
109
|
+
return self._bht_size
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def pht_size(self) -> int:
|
|
113
|
+
"""Number of counters in the shared pattern history table."""
|
|
114
|
+
return self._pht_size
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def table_size(self) -> int:
|
|
118
|
+
"""Pattern history table size (satisfies the BranchPredictor protocol)."""
|
|
119
|
+
return self._pht_size
|
|
120
|
+
|
|
121
|
+
# ------------------------------------------------------------------
|
|
122
|
+
# Internal helpers
|
|
123
|
+
# ------------------------------------------------------------------
|
|
124
|
+
|
|
125
|
+
def _bht_index(self, *, pc: int) -> int:
|
|
126
|
+
"""BHT index: pc mod bht_size."""
|
|
127
|
+
return pc % self._bht_size
|
|
128
|
+
|
|
129
|
+
def _local_history(self, *, pc: int) -> int:
|
|
130
|
+
"""Return the local history pattern for the branch at *pc*."""
|
|
131
|
+
return self._bht[self._bht_index(pc=pc)]
|
|
132
|
+
|
|
133
|
+
def _pht_index(self, *, history: int) -> int:
|
|
134
|
+
"""PHT index: history mod pht_size."""
|
|
135
|
+
return history % self._pht_size
|
|
136
|
+
|
|
137
|
+
def _shift_history(self, *, pc: int, taken: bool) -> None:
|
|
138
|
+
"""Shift *taken* into the LSB of the branch's local history register."""
|
|
139
|
+
idx = self._bht_index(pc=pc)
|
|
140
|
+
self._bht[idx] = ((self._bht[idx] << 1) | int(taken)) & self._history_mask
|
|
141
|
+
|
|
142
|
+
# ------------------------------------------------------------------
|
|
143
|
+
# Public interface
|
|
144
|
+
# ------------------------------------------------------------------
|
|
145
|
+
|
|
146
|
+
def predict(self, *, pc: int) -> bool:
|
|
147
|
+
"""Return the taken prediction for the branch at *pc*."""
|
|
148
|
+
history = self._local_history(pc=pc)
|
|
149
|
+
return self._pht[self._pht_index(history=history)].predict()
|
|
150
|
+
|
|
151
|
+
def update(self, *, pc: int, taken: bool) -> None:
|
|
152
|
+
"""Update the PHT counter, then shift the outcome into local history."""
|
|
153
|
+
history = self._local_history(pc=pc)
|
|
154
|
+
self._pht[self._pht_index(history=history)].update(taken=taken)
|
|
155
|
+
self._shift_history(pc=pc, taken=taken)
|
|
156
|
+
|
|
157
|
+
def __repr__(self) -> str:
|
|
158
|
+
return (
|
|
159
|
+
f"LocalHistoryPredictor(history_bits={self._history_bits}, "
|
|
160
|
+
f"bht_size={self._bht_size}, pht_size={self._pht_size})"
|
|
161
|
+
)
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""Tests for LocalHistoryPredictor (PAg, Yeh and Patt 1991)."""
|
|
2
|
+
|
|
3
|
+
from bpred.bimodal import BimodalPredictor
|
|
4
|
+
from bpred.local_history import LocalHistoryPredictor
|
|
5
|
+
from bpred.trace import accuracy, run_trace
|
|
6
|
+
|
|
7
|
+
# ------------------------------------------------------------------
|
|
8
|
+
# Local history register behaviour
|
|
9
|
+
# ------------------------------------------------------------------
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_local_history_starts_empty() -> None:
|
|
13
|
+
"""Every BHT entry starts at 0 (no recorded outcomes)."""
|
|
14
|
+
p = LocalHistoryPredictor(history_bits=4, bht_size=8, pht_size=16)
|
|
15
|
+
for pc in range(8):
|
|
16
|
+
assert p._local_history(pc=pc) == 0 # noqa: SLF001
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_local_history_shifts_lsb_first() -> None:
|
|
20
|
+
"""Outcomes shift into the LSB of the per-branch history register."""
|
|
21
|
+
p = LocalHistoryPredictor(history_bits=4, bht_size=8, pht_size=16)
|
|
22
|
+
p.update(pc=0, taken=True)
|
|
23
|
+
assert p._local_history(pc=0) == 0b0001 # noqa: SLF001
|
|
24
|
+
p.update(pc=0, taken=True)
|
|
25
|
+
assert p._local_history(pc=0) == 0b0011 # noqa: SLF001
|
|
26
|
+
p.update(pc=0, taken=False)
|
|
27
|
+
assert p._local_history(pc=0) == 0b0110 # noqa: SLF001
|
|
28
|
+
p.update(pc=0, taken=True)
|
|
29
|
+
assert p._local_history(pc=0) == 0b1101 # noqa: SLF001
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_local_history_capped_at_history_bits() -> None:
|
|
33
|
+
"""A branch's local history never exceeds 2^history_bits - 1."""
|
|
34
|
+
p = LocalHistoryPredictor(history_bits=3, bht_size=4, pht_size=8)
|
|
35
|
+
mask = (1 << 3) - 1
|
|
36
|
+
for _ in range(10):
|
|
37
|
+
p.update(pc=0, taken=True)
|
|
38
|
+
assert p._local_history(pc=0) <= mask # noqa: SLF001
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_per_branch_history_is_independent() -> None:
|
|
42
|
+
"""Two branches that map to distinct BHT entries keep separate histories."""
|
|
43
|
+
p = LocalHistoryPredictor(history_bits=4, bht_size=8, pht_size=16)
|
|
44
|
+
p.update(pc=0, taken=True)
|
|
45
|
+
p.update(pc=1, taken=False)
|
|
46
|
+
assert p._local_history(pc=0) == 0b0001 # noqa: SLF001
|
|
47
|
+
assert p._local_history(pc=1) == 0b0000 # noqa: SLF001
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# ------------------------------------------------------------------
|
|
51
|
+
# Periodic pattern learning (the PAg headline capability)
|
|
52
|
+
# ------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_alternating_pattern_beats_bimodal() -> None:
|
|
56
|
+
"""A strict alternating T,N,T,N,... trace is learned to high accuracy.
|
|
57
|
+
|
|
58
|
+
With one bit of local history the PAg predictor distinguishes the two
|
|
59
|
+
phases of the period and reaches ~100% accuracy after warm-up. A bimodal
|
|
60
|
+
predictor with a single counter is stuck near 50% because it has no
|
|
61
|
+
history and always predicts the majority class.
|
|
62
|
+
"""
|
|
63
|
+
n = 400
|
|
64
|
+
alternating_trace = [(0x300, i % 2 == 0) for i in range(n)]
|
|
65
|
+
|
|
66
|
+
local = LocalHistoryPredictor(history_bits=1, bht_size=4, pht_size=2)
|
|
67
|
+
bimod = BimodalPredictor(counter_bits=2, table_size=1)
|
|
68
|
+
|
|
69
|
+
local_acc = accuracy(trace_result=run_trace(local, trace=alternating_trace))
|
|
70
|
+
bimod_acc = accuracy(trace_result=run_trace(bimod, trace=alternating_trace))
|
|
71
|
+
|
|
72
|
+
assert local_acc > 0.95, f"local accuracy too low: {local_acc:.3f}"
|
|
73
|
+
assert bimod_acc <= 0.55, f"bimodal unexpectedly good: {bimod_acc:.3f}"
|
|
74
|
+
assert local_acc > bimod_acc, (
|
|
75
|
+
f"local ({local_acc:.3f}) did not beat bimodal ({bimod_acc:.3f})"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_length_k_pattern_captured_when_history_bits_ge_k() -> None:
|
|
80
|
+
"""A length-k periodic pattern is learned once history_bits >= k.
|
|
81
|
+
|
|
82
|
+
Pattern of period 4: T, T, N, N repeating. Predicting the next outcome
|
|
83
|
+
from the previous k outcomes requires distinguishing all positions in the
|
|
84
|
+
period, which needs at least k history bits. With history_bits == 4 the
|
|
85
|
+
PAg predictor reaches near-perfect accuracy after warm-up.
|
|
86
|
+
"""
|
|
87
|
+
period = [True, True, False, False]
|
|
88
|
+
k = len(period)
|
|
89
|
+
n = 400
|
|
90
|
+
trace = [(0x400, period[i % k]) for i in range(n)]
|
|
91
|
+
|
|
92
|
+
p = LocalHistoryPredictor(history_bits=k, bht_size=4, pht_size=1 << k)
|
|
93
|
+
acc = accuracy(trace_result=run_trace(p, trace=trace))
|
|
94
|
+
assert acc > 0.95, f"period-{k} pattern not learned: {acc:.3f}"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_short_history_cannot_capture_long_period() -> None:
|
|
98
|
+
"""history_bits below the period leaves the predictor unable to separate
|
|
99
|
+
distinct phases that share the same short suffix.
|
|
100
|
+
|
|
101
|
+
Period 4 pattern T,T,N,N with only 1 history bit: the suffixes '...T' and
|
|
102
|
+
'...N' each precede both a same and a different outcome, so a single shared
|
|
103
|
+
counter per pattern cannot reach the near-perfect accuracy of the
|
|
104
|
+
full-history case.
|
|
105
|
+
"""
|
|
106
|
+
period = [True, True, False, False]
|
|
107
|
+
n = 400
|
|
108
|
+
trace = [(0x500, period[i % 4]) for i in range(n)]
|
|
109
|
+
|
|
110
|
+
short = LocalHistoryPredictor(history_bits=1, bht_size=4, pht_size=2)
|
|
111
|
+
full = LocalHistoryPredictor(history_bits=4, bht_size=4, pht_size=16)
|
|
112
|
+
|
|
113
|
+
short_acc = accuracy(trace_result=run_trace(short, trace=trace))
|
|
114
|
+
full_acc = accuracy(trace_result=run_trace(full, trace=trace))
|
|
115
|
+
|
|
116
|
+
assert full_acc > 0.95
|
|
117
|
+
assert short_acc < full_acc
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# ------------------------------------------------------------------
|
|
121
|
+
# Strongly-biased branches
|
|
122
|
+
# ------------------------------------------------------------------
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_always_taken_eventually_accurate() -> None:
|
|
126
|
+
"""An always-taken branch reaches high accuracy after warm-up."""
|
|
127
|
+
p = LocalHistoryPredictor(history_bits=4, bht_size=16, pht_size=16)
|
|
128
|
+
trace = [(0x100, True)] * 50
|
|
129
|
+
acc = accuracy(trace_result=run_trace(p, trace=trace))
|
|
130
|
+
assert acc >= 0.9
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def test_always_not_taken_eventually_accurate() -> None:
|
|
134
|
+
"""An always-not-taken branch reaches high accuracy after warm-up."""
|
|
135
|
+
p = LocalHistoryPredictor(history_bits=4, bht_size=16, pht_size=16)
|
|
136
|
+
trace = [(0x200, False)] * 50
|
|
137
|
+
acc = accuracy(trace_result=run_trace(p, trace=trace))
|
|
138
|
+
assert acc >= 0.9
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# ------------------------------------------------------------------
|
|
142
|
+
# Counter saturation through the PHT
|
|
143
|
+
# ------------------------------------------------------------------
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def test_pht_counter_saturates() -> None:
|
|
147
|
+
"""Repeated taken outcomes drive the indexed PHT counter to its max."""
|
|
148
|
+
# history_bits=1, so an always-taken branch settles on history pattern 1
|
|
149
|
+
# and repeatedly hits PHT[1], saturating it at max_value (3 for 2-bit).
|
|
150
|
+
p = LocalHistoryPredictor(history_bits=1, bht_size=4, pht_size=2)
|
|
151
|
+
for _ in range(10):
|
|
152
|
+
p.update(pc=0, taken=True)
|
|
153
|
+
assert p._pht[1].value == p._pht[1].max_value # noqa: SLF001
|
|
154
|
+
assert p._pht[1].value == 3 # noqa: SLF001
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def test_pht_starts_weakly_taken() -> None:
|
|
158
|
+
"""All PHT counters start in the weakly-taken neutral state (value 2)."""
|
|
159
|
+
p = LocalHistoryPredictor(history_bits=2, bht_size=4, pht_size=4)
|
|
160
|
+
for counter in p._pht: # noqa: SLF001
|
|
161
|
+
assert counter.value == 2
|
|
162
|
+
assert counter.predict() is True
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# ------------------------------------------------------------------
|
|
166
|
+
# Table sizing
|
|
167
|
+
# ------------------------------------------------------------------
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def test_tables_sized_as_documented() -> None:
|
|
171
|
+
"""BHT and PHT are sized exactly as requested."""
|
|
172
|
+
p = LocalHistoryPredictor(history_bits=3, bht_size=8, pht_size=8)
|
|
173
|
+
assert len(p._bht) == 8 # noqa: SLF001
|
|
174
|
+
assert len(p._pht) == 8 # noqa: SLF001
|
|
175
|
+
assert p.history_bits == 3
|
|
176
|
+
assert p.bht_size == 8
|
|
177
|
+
assert p.pht_size == 8
|
|
178
|
+
assert p.table_size == 8
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# ------------------------------------------------------------------
|
|
182
|
+
# Interface conformance
|
|
183
|
+
# ------------------------------------------------------------------
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def test_predict_returns_bool() -> None:
|
|
187
|
+
"""predict() must return bool, matching the BranchPredictor protocol."""
|
|
188
|
+
p = LocalHistoryPredictor(history_bits=4, bht_size=16, pht_size=16)
|
|
189
|
+
assert isinstance(p.predict(pc=0x1000), bool)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def test_update_returns_none() -> None:
|
|
193
|
+
"""update() must return None."""
|
|
194
|
+
p = LocalHistoryPredictor(history_bits=4, bht_size=16, pht_size=16)
|
|
195
|
+
assert p.update(pc=0x1000, taken=True) is None
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def test_run_trace_compatible() -> None:
|
|
199
|
+
"""LocalHistoryPredictor is accepted by run_trace (protocol conformance)."""
|
|
200
|
+
p = LocalHistoryPredictor(history_bits=4, bht_size=16, pht_size=16)
|
|
201
|
+
trace = [(0x100, True), (0x104, False), (0x108, True)]
|
|
202
|
+
result = run_trace(p, trace=trace)
|
|
203
|
+
assert result.total == 3
|
|
204
|
+
assert len(result.predictions) == 3
|
|
205
|
+
assert all(isinstance(pred, bool) for pred in result.predictions)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def test_deterministic() -> None:
|
|
209
|
+
"""Same trace produces identical results on two fresh predictors."""
|
|
210
|
+
trace = [(i * 8, i % 3 != 0) for i in range(40)]
|
|
211
|
+
p1 = LocalHistoryPredictor(history_bits=4, bht_size=16, pht_size=16)
|
|
212
|
+
p2 = LocalHistoryPredictor(history_bits=4, bht_size=16, pht_size=16)
|
|
213
|
+
r1 = run_trace(p1, trace=trace)
|
|
214
|
+
r2 = run_trace(p2, trace=trace)
|
|
215
|
+
assert r1.predictions == r2.predictions
|
|
216
|
+
assert r1.correct == r2.correct
|
|
217
|
+
assert r1.hits == r2.hits
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
# ------------------------------------------------------------------
|
|
221
|
+
# Validation
|
|
222
|
+
# ------------------------------------------------------------------
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def test_invalid_history_bits_raises() -> None:
|
|
226
|
+
"""history_bits < 1 must raise ValueError."""
|
|
227
|
+
try:
|
|
228
|
+
LocalHistoryPredictor(history_bits=0, bht_size=4, pht_size=4)
|
|
229
|
+
assert False, "expected ValueError"
|
|
230
|
+
except ValueError:
|
|
231
|
+
pass
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def test_invalid_bht_size_raises() -> None:
|
|
235
|
+
"""bht_size < 1 must raise ValueError."""
|
|
236
|
+
try:
|
|
237
|
+
LocalHistoryPredictor(history_bits=4, bht_size=0, pht_size=4)
|
|
238
|
+
assert False, "expected ValueError"
|
|
239
|
+
except ValueError:
|
|
240
|
+
pass
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def test_invalid_pht_size_raises() -> None:
|
|
244
|
+
"""pht_size < 1 must raise ValueError."""
|
|
245
|
+
try:
|
|
246
|
+
LocalHistoryPredictor(history_bits=4, bht_size=4, pht_size=0)
|
|
247
|
+
assert False, "expected ValueError"
|
|
248
|
+
except ValueError:
|
|
249
|
+
pass
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def test_repr() -> None:
|
|
253
|
+
"""__repr__ includes class name and all three parameters."""
|
|
254
|
+
p = LocalHistoryPredictor(history_bits=4, bht_size=16, pht_size=16)
|
|
255
|
+
r = repr(p)
|
|
256
|
+
assert "LocalHistoryPredictor" in r
|
|
257
|
+
assert "history_bits=4" in r
|
|
258
|
+
assert "bht_size=16" in r
|
|
259
|
+
assert "pht_size=16" in r
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|