boltzmann9 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,234 +0,0 @@
1
- """Synthetic data generation for Boltzmann Machine experiments.
2
-
3
- This module generates synthetic time-series data using a discretized
4
- Langevin equation (stochastic harmonic oscillator) for testing RBMs.
5
- """
6
-
7
- from __future__ import annotations
8
-
9
- from dataclasses import dataclass
10
- from typing import Optional
11
-
12
- import numpy as np
13
- import pandas as pd
14
-
15
-
16
- @dataclass
17
- class GeneratorConfig:
18
- """Configuration for synthetic data generation.
19
-
20
- Attributes:
21
- n_samples: Number of time steps/samples to generate.
22
- dt: Time step size for discretization.
23
- r_min: Minimum allowed value for R.
24
- r_max: Maximum allowed value for R.
25
- k_bins: Number of bins for discretizing R.
26
- spring_k: Spring strength (how strongly R is pulled to equilibrium).
27
- sigma: Noise strength (stochastic forcing).
28
- eq_interval: Steps between equilibrium position updates.
29
- m0: Initial equilibrium value.
30
- sigma_eq: Size of random shift when equilibrium jumps.
31
- lookahead: Steps ahead to look for decision variable.
32
- """
33
-
34
- n_samples: int = 5000
35
- dt: float = 0.1
36
- r_min: float = -2.0
37
- r_max: float = 2.0
38
- k_bins: int = 16
39
- spring_k: float = 5.0
40
- sigma: float = 1.0
41
- eq_interval: int = 100
42
- m0: float = 0.25
43
- sigma_eq: float = 0.0
44
- lookahead: int = 10
45
-
46
-
47
- class SyntheticDataGenerator:
48
- """Generate synthetic stochastic time-series data.
49
-
50
- Uses a discretized Langevin equation for a harmonic oscillator:
51
- dR/dt = k * (m_t - R) + sigma * noise
52
-
53
- The continuous values are discretized into K bins and encoded as binary.
54
- """
55
-
56
- def __init__(self, config: Optional[GeneratorConfig] = None):
57
- """Initialize generator with configuration.
58
-
59
- Args:
60
- config: Generator configuration. Uses defaults if None.
61
- """
62
- self.config = config or GeneratorConfig()
63
- self._setup_bins()
64
-
65
- def _setup_bins(self) -> None:
66
- """Set up bin edges and centers for discretization."""
67
- cfg = self.config
68
- self.bin_edges = np.linspace(cfg.r_min, cfg.r_max, cfg.k_bins + 1)
69
- self.bin_centers = (self.bin_edges[:-1] + self.bin_edges[1:]) / 2
70
- self.n_bits = int(np.ceil(np.log2(cfg.k_bins)))
71
-
72
- def round_to_nearest_bin(self, r_continuous: float) -> tuple[int, float]:
73
- """Round a continuous R value to the nearest bin.
74
-
75
- Args:
76
- r_continuous: Continuous R value.
77
-
78
- Returns:
79
- Tuple of (bin_index, bin_center_value).
80
- """
81
- idx = np.argmin(np.abs(self.bin_centers - r_continuous))
82
- return idx, self.bin_centers[idx]
83
-
84
- @staticmethod
85
- def bin_index_to_binary(idx: int, n_bits: int) -> str:
86
- """Convert bin index to binary string (MSB first).
87
-
88
- Args:
89
- idx: Bin index.
90
- n_bits: Number of bits to use.
91
-
92
- Returns:
93
- Binary string representation.
94
- """
95
- return format(idx, f"0{n_bits}b")
96
-
97
- @staticmethod
98
- def binary_to_list(binary_str: str) -> list[int]:
99
- """Convert binary string to list of integers.
100
-
101
- Args:
102
- binary_str: Binary string representation.
103
-
104
- Returns:
105
- List of bit values (0 or 1).
106
- """
107
- return [int(bit) for bit in binary_str]
108
-
109
- def _update_equilibrium(self, m_prev: float) -> float:
110
- """Randomly move equilibrium and clip to valid range."""
111
- cfg = self.config
112
- m_new = m_prev + np.random.normal(0.0, cfg.sigma_eq)
113
- return np.clip(m_new, cfg.r_min, cfg.r_max)
114
-
115
- def _step_r(self, r_prev: float, m_t: float) -> float:
116
- """One step of discretized Langevin equation."""
117
- cfg = self.config
118
- drift = cfg.spring_k * (m_t - r_prev) * cfg.dt
119
- diffusion = cfg.sigma * np.sqrt(cfg.dt) * np.random.normal()
120
- r_new = r_prev + drift + diffusion
121
- return np.clip(r_new, cfg.r_min, cfg.r_max)
122
-
123
- @staticmethod
124
- def _forward_looking_decision(r_current: float, r_future: float) -> int:
125
- """Decision rule based on forward return.
126
-
127
- Returns:
128
- 1 if future >= current, else 0.
129
- """
130
- return 1 if (r_future - r_current) >= 0 else 0
131
-
132
- def generate(self, seed: Optional[int] = None) -> tuple[pd.DataFrame, pd.DataFrame]:
133
- """Generate synthetic data.
134
-
135
- Args:
136
- seed: Random seed for reproducibility.
137
-
138
- Returns:
139
- Tuple of (full_dataframe, simplified_dataframe):
140
- - full_dataframe: Complete simulation data with all columns.
141
- - simplified_dataframe: Only binary R_t, R_t+lookahead, and x.
142
- """
143
- if seed is not None:
144
- np.random.seed(seed)
145
-
146
- cfg = self.config
147
- n_bits = self.n_bits
148
-
149
- # Storage
150
- r_values = []
151
- r_discretized = []
152
- r_bin_indices = []
153
- r_binary_strings = []
154
- r_binary_lists = []
155
- m_values = []
156
-
157
- r_t = 0.0
158
- m_t = cfg.m0
159
-
160
- # Simulation loop
161
- for t in range(cfg.n_samples):
162
- if t % cfg.eq_interval == 0 and t > 0:
163
- m_t = self._update_equilibrium(m_t)
164
-
165
- r_t = self._step_r(r_t, m_t)
166
- bin_idx, r_disc = self.round_to_nearest_bin(r_t)
167
- binary_str = self.bin_index_to_binary(bin_idx, n_bits)
168
- binary_list = self.binary_to_list(binary_str)
169
-
170
- r_values.append(r_t)
171
- r_discretized.append(r_disc)
172
- r_bin_indices.append(bin_idx)
173
- r_binary_strings.append(binary_str)
174
- r_binary_lists.append(binary_list)
175
- m_values.append(m_t)
176
-
177
- # Compute decision variable
178
- x_values = []
179
- for t in range(cfg.n_samples):
180
- if t + cfg.lookahead < cfg.n_samples:
181
- x_t = self._forward_looking_decision(
182
- r_discretized[t], r_discretized[t + cfg.lookahead]
183
- )
184
- else:
185
- x_t = np.nan
186
- x_values.append(x_t)
187
-
188
- # Build full dataframe
189
- df = pd.DataFrame(
190
- {
191
- "t": np.arange(cfg.n_samples),
192
- "R_continuous": r_values,
193
- "R": r_discretized,
194
- "R_bin_index": r_bin_indices,
195
- "R_binary": r_binary_strings,
196
- "equilibrium": m_values,
197
- "x": x_values,
198
- }
199
- )
200
-
201
- for i in range(n_bits):
202
- df[f"R_bit_{i}"] = [bits[i] for bits in r_binary_lists]
203
-
204
- # Build simplified dataframe (binary only)
205
- dataframe_rows = []
206
- for t in range(cfg.n_samples - cfg.lookahead):
207
- row = {}
208
- for i in range(n_bits):
209
- row[f"R_t_bit_{i}"] = r_binary_lists[t][i]
210
- for i in range(n_bits):
211
- row[f"R_t+10_bit_{i}"] = r_binary_lists[t + cfg.lookahead][i]
212
- row["x"] = x_values[t]
213
- dataframe_rows.append(row)
214
-
215
- simplified_df = pd.DataFrame(dataframe_rows)
216
-
217
- return df, simplified_df
218
-
219
- def print_info(self) -> None:
220
- """Print information about the binary encoding."""
221
- cfg = self.config
222
- n_bits = self.n_bits
223
-
224
- print("=" * 60)
225
- print("BINARY ENCODING INFO")
226
- print("=" * 60)
227
- print(f"Number of bins (K): {cfg.k_bins}")
228
- print(f"Number of bits needed: {n_bits}")
229
- print(f"\nBin index to binary mapping:")
230
- for i in range(cfg.k_bins):
231
- print(
232
- f" Bin {i}: {self.bin_index_to_binary(i, n_bits)} -> "
233
- f"R = {self.bin_centers[i]:.4f}"
234
- )