flux-bootstrap 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.3
2
+ Name: flux-bootstrap
3
+ Version: 0.1.0
4
+ Requires-Dist: aiohttp>=3.13.3
5
+ Requires-Dist: aiofiles>=25.1.0
6
+ Requires-Dist: pyyaml>=6.0.3
7
+ Requires-Dist: setproctitle>=1.3.7
8
+ Requires-Dist: click>=8.3.1
9
+ Requires-Python: >=3.13
@@ -0,0 +1,45 @@
1
+ [project]
2
+ name = "flux-bootstrap"
3
+ version = "0.1.0"
4
+ requires-python = ">=3.13"
5
+ dependencies = [
6
+ "aiohttp>=3.13.3",
7
+ "aiofiles>=25.1.0",
8
+ "pyyaml>=6.0.3",
9
+ "setproctitle>=1.3.7",
10
+ "click>=8.3.1",
11
+ ]
12
+
13
+ [project.scripts]
14
+ flux-bootstrap = "flux_bootstrap.main:run"
15
+
16
+ [dependency-groups]
17
+ dev = [
18
+ "ruff",
19
+ "ty",
20
+ ]
21
+
22
+ [tool.uv]
23
+ package = true
24
+
25
+ [build-system]
26
+ requires = ["uv_build>=0.9.22,<0.10.0"]
27
+ build-backend = "uv_build"
28
+
29
+ [tool.ruff]
30
+ target-version = "py313"
31
+ line-length = 88
32
+
33
+ [tool.ruff.lint]
34
+ select = [
35
+ "E", # pycodestyle errors
36
+ "W", # pycodestyle warnings
37
+ "F", # pyflakes
38
+ "I", # isort
39
+ "B", # flake8-bugbear
40
+ "C4", # flake8-comprehensions
41
+ "UP", # pyupgrade
42
+ "ARG", # flake8-unused-arguments
43
+ "SIM", # flake8-simplify
44
+ "TCH", # flake8-type-checking
45
+ ]
@@ -0,0 +1,34 @@
1
+ """flux-bootstrap - Flux blockchain bootstrap downloader.
2
+
3
+ This library provides an easy way to download and extract Flux blockchain
4
+ bootstrap files from CDN with progress tracking, resume support, and automatic
5
+ CDN failover.
6
+
7
+ Example:
8
+ >>> from flux_bootstrap import download_bootstrap_async
9
+ >>> import asyncio
10
+ >>>
11
+ >>> async def main():
12
+ ... def on_progress(progress):
13
+ ... print(f"Progress: {progress['percent']:.1f}%")
14
+ ... success = await download_bootstrap_async(
15
+ ... "/path/to/destination",
16
+ ... progress_callback=on_progress
17
+ ... )
18
+ ... return success
19
+ >>> asyncio.run(main())
20
+ """
21
+
22
+ from flux_bootstrap.api import (
23
+ ProgressInfo,
24
+ download_bootstrap,
25
+ download_bootstrap_async,
26
+ )
27
+
28
+ __all__ = [
29
+ "download_bootstrap",
30
+ "download_bootstrap_async",
31
+ "ProgressInfo",
32
+ ]
33
+
34
+ __version__ = "0.1.0"
@@ -0,0 +1,183 @@
1
+ """Public library API for flux-downloader."""
2
+
3
+ import asyncio
4
+ import multiprocessing
5
+ import typing
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+
9
+ from flux_bootstrap.data_structures import DEFAULT_API_URL, DEFAULT_CDN_URL
10
+ from flux_bootstrap.main import _download_bootstrap_impl
11
+
12
+
13
+ @dataclass
14
+ class ProgressInfo:
15
+ """Progress information for download callbacks.
16
+
17
+ Attributes:
18
+ bytes_downloaded: Total bytes downloaded across all parts
19
+ total_bytes: Total expected bytes
20
+ percent: Download percentage (0-100)
21
+ speed_mbps: Current download speed in Mbps
22
+ cdn_server: Current CDN hostname (e.g., "cdn-1.runonflux.io")
23
+ current_part: Current part being downloaded (1-indexed)
24
+ total_parts: Total number of parts
25
+ source: Source type ("cdn")
26
+ """
27
+
28
+ bytes_downloaded: int
29
+ total_bytes: int
30
+ percent: float
31
+ speed_mbps: float
32
+ cdn_server: str | None
33
+ current_part: int
34
+ total_parts: int
35
+ source: str
36
+
37
+
38
+ async def download_bootstrap_async(
39
+ destination: str | Path,
40
+ *,
41
+ api_url: str | None = None,
42
+ cdn_url: str | None = None,
43
+ parts_dir: str | Path | None = None,
44
+ progress_callback: typing.Callable[[dict[str, typing.Any]], None] | None = None,
45
+ cancellation_event: asyncio.Event | None = None,
46
+ ) -> bool:
47
+ """Download and extract Flux bootstrap files (async API).
48
+
49
+ This is the primary async API for library use. Downloads blockchain bootstrap
50
+ files from CDN in parts, verifies with SHA256, and extracts to destination.
51
+
52
+ Args:
53
+ destination: Directory where bootstrap will be extracted
54
+ api_url: Optional API endpoint (default: https://cdn.runonflux.io/fluxd/api/latest_bootstrap)
55
+ cdn_url: Optional CDN base URL (default: https://cdn.runonflux.io)
56
+ parts_dir: Optional directory for part files (default: <destination>/bootstrap_parts)
57
+ progress_callback: Optional callback for progress updates. Called with dict containing:
58
+ - bytes_downloaded: int
59
+ - total_bytes: int
60
+ - percent: float (0-100)
61
+ - speed_mbps: float
62
+ - cdn_server: str | None
63
+ - current_part: int (1-indexed)
64
+ - total_parts: int
65
+ - source: str ("cdn")
66
+ cancellation_event: Optional asyncio.Event to signal cancellation
67
+
68
+ Returns:
69
+ True if download and extraction succeeded, False otherwise
70
+
71
+ Raises:
72
+ ValueError: If destination is invalid
73
+ RuntimeError: If download or extraction fails critically
74
+
75
+ Example:
76
+ >>> async def main():
77
+ ... def on_progress(progress):
78
+ ... print(f"Progress: {progress['percent']:.1f}%")
79
+ ... success = await download_bootstrap_async(
80
+ ... "/path/to/destination",
81
+ ... progress_callback=on_progress
82
+ ... )
83
+ ... return success
84
+ >>> asyncio.run(main())
85
+ """
86
+ # Validate and normalize destination
87
+ if not destination:
88
+ raise ValueError("destination cannot be empty")
89
+
90
+ dest_path = Path(destination).resolve()
91
+
92
+ # Set defaults
93
+ if api_url is None:
94
+ api_url = DEFAULT_API_URL
95
+
96
+ if cdn_url is None:
97
+ cdn_url = DEFAULT_CDN_URL
98
+
99
+ if parts_dir is None:
100
+ parts_dir_path = dest_path / "bootstrap_parts"
101
+ else:
102
+ parts_dir_path = Path(parts_dir).resolve()
103
+
104
+ # Create multiprocessing.Event for internal use (workers need it)
105
+ mp_shutdown_event = multiprocessing.Event()
106
+
107
+ # If user provided asyncio.Event, monitor it and bridge to multiprocessing.Event
108
+ monitor_task: asyncio.Task | None = None
109
+ if cancellation_event:
110
+
111
+ async def monitor_cancellation():
112
+ """Monitor asyncio.Event and forward to multiprocessing.Event."""
113
+ await cancellation_event.wait()
114
+ mp_shutdown_event.set()
115
+
116
+ monitor_task = asyncio.create_task(monitor_cancellation())
117
+
118
+ try:
119
+ # Call internal implementation with library mode flags
120
+ result = await _download_bootstrap_impl(
121
+ destination=dest_path,
122
+ parts_dir=parts_dir_path,
123
+ api_url=api_url,
124
+ cdn_url=cdn_url,
125
+ shutdown_event=mp_shutdown_event,
126
+ setup_logging_flag=False, # Library mode: caller controls logging
127
+ set_process_title=False, # Library mode: don't change process title
128
+ progress_callback=progress_callback,
129
+ )
130
+ return result
131
+
132
+ finally:
133
+ # Cancel monitor task if it's still running
134
+ if monitor_task and not monitor_task.done():
135
+ monitor_task.cancel()
136
+ try:
137
+ await monitor_task
138
+ except asyncio.CancelledError:
139
+ pass
140
+
141
+
142
+ def download_bootstrap(
143
+ destination: str | Path,
144
+ **kwargs,
145
+ ) -> bool:
146
+ """Download and extract Flux bootstrap files (sync API).
147
+
148
+ Synchronous wrapper for download_bootstrap_async(). Uses asyncio.run() to
149
+ execute the async version.
150
+
151
+ Args:
152
+ destination: Directory where bootstrap will be extracted
153
+ **kwargs: Additional arguments passed to download_bootstrap_async()
154
+
155
+ Returns:
156
+ True if download and extraction succeeded, False otherwise
157
+
158
+ Raises:
159
+ ValueError: If destination is invalid
160
+ RuntimeError: If download or extraction fails critically, or if called
161
+ from within an existing event loop
162
+
163
+ Example:
164
+ >>> def on_progress(progress):
165
+ ... print(f"Progress: {progress['percent']:.1f}%")
166
+ >>> success = download_bootstrap(
167
+ ... "/path/to/destination",
168
+ ... progress_callback=on_progress
169
+ ... )
170
+ """
171
+ # Check if we're already in an event loop
172
+ try:
173
+ asyncio.get_running_loop()
174
+ raise RuntimeError(
175
+ "download_bootstrap() cannot be called from within an async "
176
+ "context. Use download_bootstrap_async() instead."
177
+ )
178
+ except RuntimeError as e:
179
+ if "no running event loop" not in str(e).lower():
180
+ raise
181
+
182
+ # Run async version
183
+ return asyncio.run(download_bootstrap_async(destination, **kwargs))
@@ -0,0 +1,250 @@
1
+ """
2
+ CDN failover management for flux-downloader.
3
+
4
+ Provides intelligent CDN failover when the main proxy CDN fails or is too slow.
5
+ Uses x-served-by header to identify which backend CDN served the request and
6
+ tries alternative backends.
7
+ """
8
+
9
+ import logging
10
+ import re
11
+
12
+
13
+ def extract_backend_from_header(header_value: str) -> str | None:
14
+ """
15
+ Parse x-served-by header to identify backend CDN.
16
+
17
+ Examples:
18
+ - "cdn-1.runonflux.io" → "cdn-1"
19
+ - "cache-cdn-2-xyz" → "cdn-2"
20
+ - "cdn-3" → "cdn-3"
21
+
22
+ Args:
23
+ header_value: Value of x-served-by or similar header
24
+
25
+ Returns:
26
+ Backend identifier ("cdn-1", "cdn-2", "cdn-3"), or None if not found
27
+ """
28
+ if not header_value:
29
+ return None
30
+
31
+ match = re.search(r"cdn-([123])", header_value.lower())
32
+ return f"cdn-{match.group(1)}" if match else None
33
+
34
+
35
+ class SlowDownloadError(Exception):
36
+ """
37
+ Raised when download speed is below threshold for too long.
38
+
39
+ This exception triggers CDN failover to try an alternative CDN.
40
+ """
41
+
42
+ def __init__(self, speed_mbps: float, served_by: str | None):
43
+ """
44
+ Initialize slow download error.
45
+
46
+ Args:
47
+ speed_mbps: Actual download speed in megabits per second
48
+ served_by: Backend CDN that served the request (if known)
49
+ """
50
+ self.speed_mbps = speed_mbps
51
+ self.served_by = served_by
52
+ super().__init__(
53
+ f"Download too slow: {speed_mbps:.2f} Mbps (served by: {served_by})"
54
+ )
55
+
56
+
57
+ class CDNFailoverStrategy:
58
+ """
59
+ Manages CDN failover strategy for a single part download.
60
+
61
+ Implements 2+1+1+1 attempt strategy:
62
+ - Phase 1 (proxy): 2 attempts on main proxy CDN
63
+ - Phase 2 (direct): 1 attempt each on 3 direct backend CDNs
64
+
65
+ Smart ordering: If proxy identified which backend served the request,
66
+ try that backend LAST in direct phase (other backends first).
67
+ """
68
+
69
+ def __init__(self, proxy_url: str, direct_urls: list[str]):
70
+ """
71
+ Initialize failover strategy.
72
+
73
+ Args:
74
+ proxy_url: Main proxy CDN URL (e.g., https://cdn.runonflux.io)
75
+ direct_urls: List of direct backend CDN URLs
76
+ """
77
+ self.proxy_url = proxy_url
78
+ self.direct_urls = direct_urls.copy() # Don't mutate original list
79
+
80
+ # State tracking
81
+ self.phase = "proxy" # "proxy" or "direct"
82
+ self.proxy_attempts = 0
83
+ self.direct_index = 0
84
+ self.last_served_by: str | None = None
85
+ self.direct_cdn_order: list[str] = []
86
+
87
+ # Attempt limits
88
+ self.max_proxy_attempts = 2
89
+ self.max_direct_attempts = len(direct_urls) # 1 per backend
90
+
91
+ def get_next_cdn(self) -> tuple[str | None, int, int]:
92
+ """
93
+ Get next CDN to try.
94
+
95
+ Returns:
96
+ Tuple of (cdn_url, attempt_number, max_attempts_for_this_cdn)
97
+ Returns (None, 0, 0) if all attempts exhausted
98
+ """
99
+ if self.phase == "proxy":
100
+ if self.proxy_attempts < self.max_proxy_attempts:
101
+ self.proxy_attempts += 1
102
+ return (self.proxy_url, self.proxy_attempts, self.max_proxy_attempts)
103
+ else:
104
+ # Transition to direct phase
105
+ self._initialize_direct_phase()
106
+ self.phase = "direct"
107
+ # Fall through to direct phase logic
108
+
109
+ if self.phase == "direct":
110
+ if self.direct_index < len(self.direct_cdn_order):
111
+ cdn_url = self.direct_cdn_order[self.direct_index]
112
+ self.direct_index += 1
113
+ # Each direct CDN gets 1 attempt
114
+ return (cdn_url, 1, 1)
115
+
116
+ # All attempts exhausted
117
+ return (None, 0, 0)
118
+
119
+ def _initialize_direct_phase(self) -> None:
120
+ """
121
+ Initialize direct CDN phase with smart ordering.
122
+
123
+ If last_served_by is known, put that backend LAST in the order.
124
+ Rationale: If proxy via cdn-2 failed, cdn-2 might be blocked for this user.
125
+ """
126
+ if not self.last_served_by:
127
+ # No backend identified, use default order
128
+ self.direct_cdn_order = self.direct_urls.copy()
129
+ return
130
+
131
+ # Find the backend URL that matches last_served_by
132
+ identified_backend_url = None
133
+ other_backends = []
134
+
135
+ for url in self.direct_urls:
136
+ backend_id = extract_backend_from_header(url)
137
+ if backend_id == self.last_served_by:
138
+ identified_backend_url = url
139
+ else:
140
+ other_backends.append(url)
141
+
142
+ # Smart ordering: try other backends first, identified backend last
143
+ if identified_backend_url:
144
+ self.direct_cdn_order = other_backends + [identified_backend_url]
145
+ logging.info(
146
+ f"CDN ordering: Trying {len(other_backends)} other backends before {self.last_served_by}"
147
+ )
148
+ else:
149
+ # Backend not found in direct URLs (shouldn't happen)
150
+ self.direct_cdn_order = self.direct_urls.copy()
151
+
152
+ def record_failure(
153
+ self, served_by_header: str | None, reason: str
154
+ ) -> None:
155
+ """
156
+ Record a failure for the current CDN attempt.
157
+
158
+ Args:
159
+ served_by_header: Value of x-served-by header (if available)
160
+ reason: Human-readable failure reason (for logging)
161
+ """
162
+ # Extract backend identifier from header
163
+ if served_by_header and self.phase == "proxy":
164
+ backend_id = extract_backend_from_header(served_by_header)
165
+ if backend_id:
166
+ self.last_served_by = backend_id
167
+ logging.debug(f"Identified backend from proxy: {backend_id}")
168
+
169
+ logging.debug(
170
+ f"CDN failure in {self.phase} phase: {reason} "
171
+ f"(served_by: {served_by_header or 'unknown'})"
172
+ )
173
+
174
+ def get_phase_summary(self) -> str:
175
+ """
176
+ Get human-readable summary of current phase.
177
+
178
+ Returns:
179
+ Summary string for logging
180
+ """
181
+ if self.phase == "proxy":
182
+ return f"proxy phase ({self.proxy_attempts}/{self.max_proxy_attempts})"
183
+ else:
184
+ return f"direct phase ({self.direct_index}/{len(self.direct_cdn_order)})"
185
+
186
+
187
+ class CDNManager:
188
+ """
189
+ Manages CDN configuration and provides failover strategies.
190
+
191
+ Coordinates between main proxy CDN and direct backend CDNs.
192
+ """
193
+
194
+ def __init__(
195
+ self,
196
+ proxy_url: str = "https://cdn.runonflux.io",
197
+ direct_urls: list[str] | None = None,
198
+ ):
199
+ """
200
+ Initialize CDN manager.
201
+
202
+ Args:
203
+ proxy_url: Main proxy CDN URL
204
+ direct_urls: List of direct backend CDN URLs (defaults to cdn-1/2/3)
205
+ """
206
+ self.proxy_url = proxy_url
207
+
208
+ if direct_urls is None:
209
+ # Default to standard Flux CDN backends
210
+ self.direct_urls = [
211
+ "https://cdn-1.runonflux.io",
212
+ "https://cdn-2.runonflux.io",
213
+ "https://cdn-3.runonflux.io",
214
+ ]
215
+ else:
216
+ self.direct_urls = direct_urls
217
+
218
+ logging.debug(
219
+ f"CDN Manager initialized: proxy={proxy_url}, "
220
+ f"direct={len(self.direct_urls)} backends"
221
+ )
222
+
223
+ def create_failover_strategy(self) -> CDNFailoverStrategy:
224
+ """
225
+ Create a new failover strategy for a part download.
226
+
227
+ Each part download gets its own independent strategy instance.
228
+
229
+ Returns:
230
+ New CDNFailoverStrategy instance
231
+ """
232
+ return CDNFailoverStrategy(self.proxy_url, self.direct_urls)
233
+
234
+ def get_proxy_url(self) -> str:
235
+ """
236
+ Get main proxy CDN URL.
237
+
238
+ Returns:
239
+ Proxy CDN URL
240
+ """
241
+ return self.proxy_url
242
+
243
+ def get_direct_urls(self) -> list[str]:
244
+ """
245
+ Get list of direct backend CDN URLs.
246
+
247
+ Returns:
248
+ List of backend CDN URLs
249
+ """
250
+ return self.direct_urls.copy()
@@ -0,0 +1,50 @@
1
+ """Data structures and constants for flux-downloader."""
2
+
3
+ from dataclasses import dataclass
4
+
5
+ # Download and streaming constants
6
+ DOWNLOAD_CHUNK_SIZE = 16 * 1024 * 1024 # 16MB chunks for HTTP streaming to disk
7
+ FIFO_CHUNK_SIZE = 64 * 1024 # 64KB chunks for FIFO writing (tar needs smaller chunks)
8
+ MAX_CONCURRENT_DOWNLOADS = 2 # Max concurrent part downloads
9
+ MAX_DOWNLOAD_ATTEMPTS = 3 # Max attempts per download (1 initial + 2 retries)
10
+ RETRY_DELAY_SECONDS = 5 # Delay between retry attempts
11
+ MAX_UNVERIFIED_PARTS = 4 # Max parts sent to worker but not yet verified
12
+
13
+ # Timeout configuration
14
+ CONNECT_TIMEOUT_SECONDS = 10 # Connection timeout (reduced from 30 for faster CDN failover)
15
+ SOCK_READ_TIMEOUT_SECONDS = 300 # Socket read timeout (5 minutes)
16
+
17
+ # Speed monitoring for CDN failover
18
+ MIN_SPEED_MBPS = 1.0 # Minimum acceptable speed in megabits per second
19
+ SPEED_CHECK_WINDOW_SECONDS = 120 # Speed monitoring window (2 minutes)
20
+
21
+ # File paths
22
+ DEFAULT_API_URL = "https://cdn.runonflux.io/fluxd/api/latest_bootstrap"
23
+ DEFAULT_CDN_URL = "https://cdn.runonflux.io"
24
+
25
+ # CDN failover configuration
26
+ DIRECT_CDN_URLS = [
27
+ "https://cdn-1.runonflux.io",
28
+ "https://cdn-2.runonflux.io",
29
+ "https://cdn-3.runonflux.io",
30
+ ]
31
+
32
+
33
+ @dataclass
34
+ class PartNotification:
35
+ """Notification sent from main process to worker when a part is complete."""
36
+
37
+ part_id: int # Part number (0-indexed)
38
+ filepath: str # Path to the downloaded part file
39
+ expected_sha256: str # Expected SHA256 checksum
40
+ size: int # Size in bytes
41
+ already_verified: bool = False # True if part already verified (resume scenario)
42
+
43
+
44
+ @dataclass
45
+ class VerificationResult:
46
+ """Result sent from worker back to main process after SHA256 verification."""
47
+
48
+ part_id: int # Part number (0-indexed)
49
+ verified: bool # True if SHA256 matched, False otherwise
50
+ actual_sha256: str # Actual SHA256 computed (for logging)