boltzmann9 0.1.4__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- boltzmann9/__init__.py +38 -0
- boltzmann9/__main__.py +4 -0
- boltzmann9/cli.py +389 -0
- boltzmann9/config.py +58 -0
- boltzmann9/data.py +145 -0
- boltzmann9/data_generator.py +234 -0
- boltzmann9/model.py +867 -0
- boltzmann9/pipeline.py +216 -0
- boltzmann9/preprocessor.py +627 -0
- boltzmann9/project.py +195 -0
- boltzmann9/run_utils.py +262 -0
- boltzmann9/tester.py +167 -0
- boltzmann9/utils.py +42 -0
- boltzmann9/visualization.py +115 -0
- {boltzmann9-0.1.4.dist-info → boltzmann9-0.1.7.dist-info}/METADATA +1 -1
- boltzmann9-0.1.7.dist-info/RECORD +19 -0
- boltzmann9-0.1.7.dist-info/top_level.txt +1 -0
- boltzmann9-0.1.4.dist-info/RECORD +0 -5
- boltzmann9-0.1.4.dist-info/top_level.txt +0 -1
- {boltzmann9-0.1.4.dist-info → boltzmann9-0.1.7.dist-info}/WHEEL +0 -0
- {boltzmann9-0.1.4.dist-info → boltzmann9-0.1.7.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
"""Synthetic data generation for Boltzmann Machine experiments.
|
|
2
|
+
|
|
3
|
+
This module generates synthetic time-series data using a discretized
|
|
4
|
+
Langevin equation (stochastic harmonic oscillator) for testing RBMs.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class GeneratorConfig:
|
|
18
|
+
"""Configuration for synthetic data generation.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
n_samples: Number of time steps/samples to generate.
|
|
22
|
+
dt: Time step size for discretization.
|
|
23
|
+
r_min: Minimum allowed value for R.
|
|
24
|
+
r_max: Maximum allowed value for R.
|
|
25
|
+
k_bins: Number of bins for discretizing R.
|
|
26
|
+
spring_k: Spring strength (how strongly R is pulled to equilibrium).
|
|
27
|
+
sigma: Noise strength (stochastic forcing).
|
|
28
|
+
eq_interval: Steps between equilibrium position updates.
|
|
29
|
+
m0: Initial equilibrium value.
|
|
30
|
+
sigma_eq: Size of random shift when equilibrium jumps.
|
|
31
|
+
lookahead: Steps ahead to look for decision variable.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
n_samples: int = 5000
|
|
35
|
+
dt: float = 0.1
|
|
36
|
+
r_min: float = -2.0
|
|
37
|
+
r_max: float = 2.0
|
|
38
|
+
k_bins: int = 16
|
|
39
|
+
spring_k: float = 5.0
|
|
40
|
+
sigma: float = 1.0
|
|
41
|
+
eq_interval: int = 100
|
|
42
|
+
m0: float = 0.25
|
|
43
|
+
sigma_eq: float = 0.0
|
|
44
|
+
lookahead: int = 10
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class SyntheticDataGenerator:
|
|
48
|
+
"""Generate synthetic stochastic time-series data.
|
|
49
|
+
|
|
50
|
+
Uses a discretized Langevin equation for a harmonic oscillator:
|
|
51
|
+
dR/dt = k * (m_t - R) + sigma * noise
|
|
52
|
+
|
|
53
|
+
The continuous values are discretized into K bins and encoded as binary.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, config: Optional[GeneratorConfig] = None):
|
|
57
|
+
"""Initialize generator with configuration.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
config: Generator configuration. Uses defaults if None.
|
|
61
|
+
"""
|
|
62
|
+
self.config = config or GeneratorConfig()
|
|
63
|
+
self._setup_bins()
|
|
64
|
+
|
|
65
|
+
def _setup_bins(self) -> None:
|
|
66
|
+
"""Set up bin edges and centers for discretization."""
|
|
67
|
+
cfg = self.config
|
|
68
|
+
self.bin_edges = np.linspace(cfg.r_min, cfg.r_max, cfg.k_bins + 1)
|
|
69
|
+
self.bin_centers = (self.bin_edges[:-1] + self.bin_edges[1:]) / 2
|
|
70
|
+
self.n_bits = int(np.ceil(np.log2(cfg.k_bins)))
|
|
71
|
+
|
|
72
|
+
def round_to_nearest_bin(self, r_continuous: float) -> tuple[int, float]:
|
|
73
|
+
"""Round a continuous R value to the nearest bin.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
r_continuous: Continuous R value.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Tuple of (bin_index, bin_center_value).
|
|
80
|
+
"""
|
|
81
|
+
idx = np.argmin(np.abs(self.bin_centers - r_continuous))
|
|
82
|
+
return idx, self.bin_centers[idx]
|
|
83
|
+
|
|
84
|
+
@staticmethod
|
|
85
|
+
def bin_index_to_binary(idx: int, n_bits: int) -> str:
|
|
86
|
+
"""Convert bin index to binary string (MSB first).
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
idx: Bin index.
|
|
90
|
+
n_bits: Number of bits to use.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Binary string representation.
|
|
94
|
+
"""
|
|
95
|
+
return format(idx, f"0{n_bits}b")
|
|
96
|
+
|
|
97
|
+
@staticmethod
|
|
98
|
+
def binary_to_list(binary_str: str) -> list[int]:
|
|
99
|
+
"""Convert binary string to list of integers.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
binary_str: Binary string representation.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
List of bit values (0 or 1).
|
|
106
|
+
"""
|
|
107
|
+
return [int(bit) for bit in binary_str]
|
|
108
|
+
|
|
109
|
+
def _update_equilibrium(self, m_prev: float) -> float:
|
|
110
|
+
"""Randomly move equilibrium and clip to valid range."""
|
|
111
|
+
cfg = self.config
|
|
112
|
+
m_new = m_prev + np.random.normal(0.0, cfg.sigma_eq)
|
|
113
|
+
return np.clip(m_new, cfg.r_min, cfg.r_max)
|
|
114
|
+
|
|
115
|
+
def _step_r(self, r_prev: float, m_t: float) -> float:
|
|
116
|
+
"""One step of discretized Langevin equation."""
|
|
117
|
+
cfg = self.config
|
|
118
|
+
drift = cfg.spring_k * (m_t - r_prev) * cfg.dt
|
|
119
|
+
diffusion = cfg.sigma * np.sqrt(cfg.dt) * np.random.normal()
|
|
120
|
+
r_new = r_prev + drift + diffusion
|
|
121
|
+
return np.clip(r_new, cfg.r_min, cfg.r_max)
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
def _forward_looking_decision(r_current: float, r_future: float) -> int:
|
|
125
|
+
"""Decision rule based on forward return.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
1 if future >= current, else 0.
|
|
129
|
+
"""
|
|
130
|
+
return 1 if (r_future - r_current) >= 0 else 0
|
|
131
|
+
|
|
132
|
+
def generate(self, seed: Optional[int] = None) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
133
|
+
"""Generate synthetic data.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
seed: Random seed for reproducibility.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Tuple of (full_dataframe, simplified_dataframe):
|
|
140
|
+
- full_dataframe: Complete simulation data with all columns.
|
|
141
|
+
- simplified_dataframe: Only binary R_t, R_t+lookahead, and x.
|
|
142
|
+
"""
|
|
143
|
+
if seed is not None:
|
|
144
|
+
np.random.seed(seed)
|
|
145
|
+
|
|
146
|
+
cfg = self.config
|
|
147
|
+
n_bits = self.n_bits
|
|
148
|
+
|
|
149
|
+
# Storage
|
|
150
|
+
r_values = []
|
|
151
|
+
r_discretized = []
|
|
152
|
+
r_bin_indices = []
|
|
153
|
+
r_binary_strings = []
|
|
154
|
+
r_binary_lists = []
|
|
155
|
+
m_values = []
|
|
156
|
+
|
|
157
|
+
r_t = 0.0
|
|
158
|
+
m_t = cfg.m0
|
|
159
|
+
|
|
160
|
+
# Simulation loop
|
|
161
|
+
for t in range(cfg.n_samples):
|
|
162
|
+
if t % cfg.eq_interval == 0 and t > 0:
|
|
163
|
+
m_t = self._update_equilibrium(m_t)
|
|
164
|
+
|
|
165
|
+
r_t = self._step_r(r_t, m_t)
|
|
166
|
+
bin_idx, r_disc = self.round_to_nearest_bin(r_t)
|
|
167
|
+
binary_str = self.bin_index_to_binary(bin_idx, n_bits)
|
|
168
|
+
binary_list = self.binary_to_list(binary_str)
|
|
169
|
+
|
|
170
|
+
r_values.append(r_t)
|
|
171
|
+
r_discretized.append(r_disc)
|
|
172
|
+
r_bin_indices.append(bin_idx)
|
|
173
|
+
r_binary_strings.append(binary_str)
|
|
174
|
+
r_binary_lists.append(binary_list)
|
|
175
|
+
m_values.append(m_t)
|
|
176
|
+
|
|
177
|
+
# Compute decision variable
|
|
178
|
+
x_values = []
|
|
179
|
+
for t in range(cfg.n_samples):
|
|
180
|
+
if t + cfg.lookahead < cfg.n_samples:
|
|
181
|
+
x_t = self._forward_looking_decision(
|
|
182
|
+
r_discretized[t], r_discretized[t + cfg.lookahead]
|
|
183
|
+
)
|
|
184
|
+
else:
|
|
185
|
+
x_t = np.nan
|
|
186
|
+
x_values.append(x_t)
|
|
187
|
+
|
|
188
|
+
# Build full dataframe
|
|
189
|
+
df = pd.DataFrame(
|
|
190
|
+
{
|
|
191
|
+
"t": np.arange(cfg.n_samples),
|
|
192
|
+
"R_continuous": r_values,
|
|
193
|
+
"R": r_discretized,
|
|
194
|
+
"R_bin_index": r_bin_indices,
|
|
195
|
+
"R_binary": r_binary_strings,
|
|
196
|
+
"equilibrium": m_values,
|
|
197
|
+
"x": x_values,
|
|
198
|
+
}
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
for i in range(n_bits):
|
|
202
|
+
df[f"R_bit_{i}"] = [bits[i] for bits in r_binary_lists]
|
|
203
|
+
|
|
204
|
+
# Build simplified dataframe (binary only)
|
|
205
|
+
dataframe_rows = []
|
|
206
|
+
for t in range(cfg.n_samples - cfg.lookahead):
|
|
207
|
+
row = {}
|
|
208
|
+
for i in range(n_bits):
|
|
209
|
+
row[f"R_t_bit_{i}"] = r_binary_lists[t][i]
|
|
210
|
+
for i in range(n_bits):
|
|
211
|
+
row[f"R_t+10_bit_{i}"] = r_binary_lists[t + cfg.lookahead][i]
|
|
212
|
+
row["x"] = x_values[t]
|
|
213
|
+
dataframe_rows.append(row)
|
|
214
|
+
|
|
215
|
+
simplified_df = pd.DataFrame(dataframe_rows)
|
|
216
|
+
|
|
217
|
+
return df, simplified_df
|
|
218
|
+
|
|
219
|
+
def print_info(self) -> None:
|
|
220
|
+
"""Print information about the binary encoding."""
|
|
221
|
+
cfg = self.config
|
|
222
|
+
n_bits = self.n_bits
|
|
223
|
+
|
|
224
|
+
print("=" * 60)
|
|
225
|
+
print("BINARY ENCODING INFO")
|
|
226
|
+
print("=" * 60)
|
|
227
|
+
print(f"Number of bins (K): {cfg.k_bins}")
|
|
228
|
+
print(f"Number of bits needed: {n_bits}")
|
|
229
|
+
print(f"\nBin index to binary mapping:")
|
|
230
|
+
for i in range(cfg.k_bins):
|
|
231
|
+
print(
|
|
232
|
+
f" Bin {i}: {self.bin_index_to_binary(i, n_bits)} -> "
|
|
233
|
+
f"R = {self.bin_centers[i]:.4f}"
|
|
234
|
+
)
|