firecloud-devnet 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fc_mlops/__init__.py +3 -0
- fc_mlops/__main__.py +5 -0
- fc_mlops/anomaly.py +112 -0
- fc_mlops/artifact_store.py +111 -0
- fc_mlops/cli.py +190 -0
- fc_mlops/simulate_failure.py +100 -0
- fc_mlops/telemetry.py +72 -0
- fc_rag/__init__.py +3 -0
- fc_rag/cli.py +51 -0
- fc_rag/config.py +24 -0
- fc_rag/embedder.py +62 -0
- fc_rag/indexer.py +121 -0
- fc_rag/query_engine.py +79 -0
- fc_rag/requirements.txt +6 -0
- fc_rag/retriever.py +46 -0
- firecloud/__init__.py +17 -0
- firecloud/chunker.py +122 -0
- firecloud/cli.py +540 -0
- firecloud/crypto.py +269 -0
- firecloud/discovery.py +164 -0
- firecloud/distributor.py +269 -0
- firecloud/exceptions.py +41 -0
- firecloud/fec.py +87 -0
- firecloud/manifest.py +263 -0
- firecloud/network.py +90 -0
- firecloud/node.py +562 -0
- firecloud/storage.py +146 -0
- firecloud/sync.py +277 -0
- firecloud/transport.py +387 -0
- firecloud_devnet-0.1.0.dist-info/METADATA +158 -0
- firecloud_devnet-0.1.0.dist-info/RECORD +34 -0
- firecloud_devnet-0.1.0.dist-info/WHEEL +4 -0
- firecloud_devnet-0.1.0.dist-info/entry_points.txt +4 -0
- firecloud_devnet-0.1.0.dist-info/licenses/LICENSE +21 -0
firecloud/crypto.py
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""FireCloud cryptographic engine.
|
|
2
|
+
|
|
3
|
+
Provides chunk-level authenticated encryption (XChaCha20-Poly1305),
|
|
4
|
+
convergent chunk addressing (HMAC-SHA-256), integrity verification
|
|
5
|
+
(SHA-256), passphrase-protected keystore (scrypt + AES-256-GCM),
|
|
6
|
+
and HKDF-based sub-key derivation.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import hashlib
|
|
10
|
+
import hmac
|
|
11
|
+
|
|
12
|
+
from Crypto.Cipher import AES, ChaCha20_Poly1305
|
|
13
|
+
from Crypto.Random import get_random_bytes
|
|
14
|
+
from cryptography.hazmat.primitives import hashes
|
|
15
|
+
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
|
|
16
|
+
from cryptography.hazmat.primitives.kdf.scrypt import Scrypt
|
|
17
|
+
|
|
18
|
+
from firecloud.exceptions import ChunkCorruptError, NetworkKeyError
|
|
19
|
+
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Constants
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
_XCHACHA_NONCE_LEN = 24 # XChaCha20-Poly1305 nonce
|
|
25
|
+
_AES_GCM_NONCE_LEN = 12 # AES-256-GCM nonce
|
|
26
|
+
_SCRYPT_SALT_LEN = 16
|
|
27
|
+
_KEY_LEN = 32 # 256-bit keys throughout
|
|
28
|
+
|
|
29
|
+
# Scrypt cost parameters (interactive-grade, fast for dev/test)
|
|
30
|
+
_SCRYPT_N = 2**14
|
|
31
|
+
_SCRYPT_R = 8
|
|
32
|
+
_SCRYPT_P = 1
|
|
33
|
+
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
# Chunk encryption / decryption
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def encrypt_chunk(plaintext: bytes, key: bytes) -> bytes:
|
|
40
|
+
"""Encrypt *plaintext* with XChaCha20-Poly1305.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
plaintext: Arbitrary-length data (may be empty).
|
|
44
|
+
key: 32-byte symmetric key.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
``nonce (24 B) || ciphertext || auth_tag (16 B)``
|
|
48
|
+
"""
|
|
49
|
+
nonce = get_random_bytes(_XCHACHA_NONCE_LEN)
|
|
50
|
+
cipher = ChaCha20_Poly1305.new(key=key, nonce=nonce)
|
|
51
|
+
ciphertext, tag = cipher.encrypt_and_digest(plaintext)
|
|
52
|
+
return nonce + ciphertext + tag
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def decrypt_chunk(encrypted: bytes, key: bytes) -> bytes:
|
|
56
|
+
"""Decrypt data produced by :func:`encrypt_chunk`.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
encrypted: ``nonce (24 B) || ciphertext || auth_tag (16 B)``
|
|
60
|
+
key: 32-byte symmetric key (must match encryption key).
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
The original plaintext bytes.
|
|
64
|
+
|
|
65
|
+
Raises:
|
|
66
|
+
ChunkCorruptError: Authentication tag verification failed
|
|
67
|
+
(wrong key, truncated data, or tampered ciphertext).
|
|
68
|
+
"""
|
|
69
|
+
if len(encrypted) < _XCHACHA_NONCE_LEN + 16:
|
|
70
|
+
raise ChunkCorruptError(
|
|
71
|
+
"Encrypted payload too short to contain nonce + auth tag"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
nonce = encrypted[:_XCHACHA_NONCE_LEN]
|
|
75
|
+
ciphertext_and_tag = encrypted[_XCHACHA_NONCE_LEN:]
|
|
76
|
+
ciphertext = ciphertext_and_tag[:-16]
|
|
77
|
+
tag = ciphertext_and_tag[-16:]
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
cipher = ChaCha20_Poly1305.new(key=key, nonce=nonce)
|
|
81
|
+
plaintext = cipher.decrypt_and_verify(ciphertext, tag)
|
|
82
|
+
except (ValueError, KeyError) as exc:
|
|
83
|
+
raise ChunkCorruptError(
|
|
84
|
+
"Chunk authentication failed — data may be corrupt or the key is wrong"
|
|
85
|
+
) from exc
|
|
86
|
+
|
|
87
|
+
return plaintext
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# ---------------------------------------------------------------------------
|
|
91
|
+
# Chunk addressing & integrity
|
|
92
|
+
# ---------------------------------------------------------------------------
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def derive_chunk_id(plaintext: bytes, hmac_key: bytes) -> str:
|
|
96
|
+
"""Compute a keyed chunk address via HMAC-SHA-256.
|
|
97
|
+
|
|
98
|
+
The result is deterministic for the same ``(plaintext, hmac_key)`` pair
|
|
99
|
+
but unpredictable without knowledge of *hmac_key*, preventing offline
|
|
100
|
+
content guessing attacks.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
plaintext: The chunk data.
|
|
104
|
+
hmac_key: 32-byte HMAC key derived from the network key.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Hex-encoded HMAC-SHA-256 digest (64 hex chars).
|
|
108
|
+
"""
|
|
109
|
+
return hmac.new(hmac_key, plaintext, hashlib.sha256).hexdigest()
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def compute_integrity_hash(plaintext: bytes) -> str:
|
|
113
|
+
"""Compute a SHA-256 digest for post-decryption verification.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
plaintext: The chunk data.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Hex-encoded SHA-256 digest (64 hex chars).
|
|
120
|
+
"""
|
|
121
|
+
return hashlib.sha256(plaintext).hexdigest()
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# ---------------------------------------------------------------------------
|
|
125
|
+
# Key generation
|
|
126
|
+
# ---------------------------------------------------------------------------
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def generate_network_key() -> bytes:
|
|
130
|
+
"""Generate a fresh 32-byte (256-bit) random network key.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Cryptographically-random 32-byte key.
|
|
134
|
+
"""
|
|
135
|
+
return get_random_bytes(_KEY_LEN)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# ---------------------------------------------------------------------------
|
|
139
|
+
# Keystore (passphrase-protected key wrapping)
|
|
140
|
+
# ---------------------------------------------------------------------------
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def encrypt_keystore(key: bytes, passphrase: str) -> bytes:
|
|
144
|
+
"""Encrypt a network key under a passphrase using scrypt + AES-256-GCM.
|
|
145
|
+
|
|
146
|
+
Wire format::
|
|
147
|
+
|
|
148
|
+
salt (16 B) || nonce (12 B) || ciphertext || tag (16 B)
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
key: The 32-byte network key to protect.
|
|
152
|
+
passphrase: User-supplied passphrase.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
The encrypted keystore blob.
|
|
156
|
+
"""
|
|
157
|
+
salt = get_random_bytes(_SCRYPT_SALT_LEN)
|
|
158
|
+
wrapping_key = _derive_scrypt_key(passphrase, salt)
|
|
159
|
+
|
|
160
|
+
nonce = get_random_bytes(_AES_GCM_NONCE_LEN)
|
|
161
|
+
cipher = AES.new(wrapping_key, AES.MODE_GCM, nonce=nonce)
|
|
162
|
+
ciphertext, tag = cipher.encrypt_and_digest(key)
|
|
163
|
+
|
|
164
|
+
return salt + nonce + ciphertext + tag
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def decrypt_keystore(encrypted: bytes, passphrase: str) -> bytes:
|
|
168
|
+
"""Decrypt a keystore blob produced by :func:`encrypt_keystore`.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
encrypted: The encrypted keystore blob.
|
|
172
|
+
passphrase: User-supplied passphrase.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
The 32-byte network key.
|
|
176
|
+
|
|
177
|
+
Raises:
|
|
178
|
+
NetworkKeyError: Wrong passphrase or corrupt keystore data.
|
|
179
|
+
"""
|
|
180
|
+
min_len = _SCRYPT_SALT_LEN + _AES_GCM_NONCE_LEN + 16 # salt+nonce+tag
|
|
181
|
+
if len(encrypted) < min_len:
|
|
182
|
+
raise NetworkKeyError("Keystore data is too short or corrupt")
|
|
183
|
+
|
|
184
|
+
salt = encrypted[:_SCRYPT_SALT_LEN]
|
|
185
|
+
nonce = encrypted[_SCRYPT_SALT_LEN : _SCRYPT_SALT_LEN + _AES_GCM_NONCE_LEN]
|
|
186
|
+
ciphertext_and_tag = encrypted[_SCRYPT_SALT_LEN + _AES_GCM_NONCE_LEN :]
|
|
187
|
+
ciphertext = ciphertext_and_tag[:-16]
|
|
188
|
+
tag = ciphertext_and_tag[-16:]
|
|
189
|
+
|
|
190
|
+
wrapping_key = _derive_scrypt_key(passphrase, salt)
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
cipher = AES.new(wrapping_key, AES.MODE_GCM, nonce=nonce)
|
|
194
|
+
plaintext = cipher.decrypt_and_verify(ciphertext, tag)
|
|
195
|
+
except (ValueError, KeyError) as exc:
|
|
196
|
+
raise NetworkKeyError(
|
|
197
|
+
"Failed to decrypt keystore — wrong passphrase or corrupt data"
|
|
198
|
+
) from exc
|
|
199
|
+
|
|
200
|
+
return plaintext
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
# ---------------------------------------------------------------------------
|
|
204
|
+
# HKDF sub-key derivation
|
|
205
|
+
# ---------------------------------------------------------------------------
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def derive_auth_token(key: bytes) -> bytes:
|
|
209
|
+
"""Derive a 32-byte authentication token from *key* via HKDF-SHA256.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
key: 32-byte network key (input keying material).
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
32-byte derived token.
|
|
216
|
+
"""
|
|
217
|
+
return _hkdf_derive(key, info=b"firecloud-auth-token")
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def derive_encryption_key(key: bytes) -> bytes:
|
|
221
|
+
"""Derive a 32-byte encryption sub-key from *key* via HKDF-SHA256.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
key: 32-byte network key (input keying material).
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
32-byte derived key.
|
|
228
|
+
"""
|
|
229
|
+
return _hkdf_derive(key, info=b"firecloud-encryption-key")
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def derive_hmac_key(key: bytes) -> bytes:
|
|
233
|
+
"""Derive a 32-byte HMAC sub-key from *key* via HKDF-SHA256.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
key: 32-byte network key (input keying material).
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
32-byte derived key.
|
|
240
|
+
"""
|
|
241
|
+
return _hkdf_derive(key, info=b"firecloud-hmac-key")
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
# ---------------------------------------------------------------------------
|
|
245
|
+
# Internal helpers
|
|
246
|
+
# ---------------------------------------------------------------------------
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _hkdf_derive(ikm: bytes, *, info: bytes, length: int = _KEY_LEN) -> bytes:
|
|
250
|
+
"""Run HKDF-SHA256 (no salt) with the given *info* label."""
|
|
251
|
+
hkdf = HKDF(
|
|
252
|
+
algorithm=hashes.SHA256(),
|
|
253
|
+
length=length,
|
|
254
|
+
salt=None,
|
|
255
|
+
info=info,
|
|
256
|
+
)
|
|
257
|
+
return hkdf.derive(ikm)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _derive_scrypt_key(passphrase: str, salt: bytes) -> bytes:
|
|
261
|
+
"""Derive a 32-byte wrapping key from *passphrase* using scrypt."""
|
|
262
|
+
kdf = Scrypt(
|
|
263
|
+
salt=salt,
|
|
264
|
+
length=_KEY_LEN,
|
|
265
|
+
n=_SCRYPT_N,
|
|
266
|
+
r=_SCRYPT_R,
|
|
267
|
+
p=_SCRYPT_P,
|
|
268
|
+
)
|
|
269
|
+
return kdf.derive(passphrase.encode("utf-8"))
|
firecloud/discovery.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""FireCloud Discovery Engine.
|
|
2
|
+
|
|
3
|
+
Implements mDNS peer discovery using zeroconf and config file-based peer listings
|
|
4
|
+
as a fallback for LAN discovery.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import socket
|
|
11
|
+
from typing import Callable
|
|
12
|
+
|
|
13
|
+
from zeroconf import ServiceBrowser, ServiceInfo, ServiceListener, Zeroconf
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_local_ip() -> str:
|
|
17
|
+
"""Determine the local IPv4 address used for network traffic."""
|
|
18
|
+
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
|
19
|
+
try:
|
|
20
|
+
# Try connecting to an arbitrary IP to get our routing IP address
|
|
21
|
+
s.connect(("10.254.254.254", 1))
|
|
22
|
+
ip = s.getsockname()[0]
|
|
23
|
+
except Exception:
|
|
24
|
+
ip = "127.0.0.1"
|
|
25
|
+
finally:
|
|
26
|
+
s.close()
|
|
27
|
+
return ip
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class FireCloudListener(ServiceListener):
|
|
31
|
+
"""mDNS Service Listener for FireCloud peers."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, discovery: "LANDiscovery") -> None:
|
|
34
|
+
self.discovery = discovery
|
|
35
|
+
|
|
36
|
+
def add_service(self, zc: Zeroconf, type_: str, name: str) -> None:
|
|
37
|
+
info = zc.get_service_info(type_, name)
|
|
38
|
+
if info:
|
|
39
|
+
self._process_info(info)
|
|
40
|
+
|
|
41
|
+
def update_service(self, zc: Zeroconf, type_: str, name: str) -> None:
|
|
42
|
+
info = zc.get_service_info(type_, name)
|
|
43
|
+
if info:
|
|
44
|
+
self._process_info(info)
|
|
45
|
+
|
|
46
|
+
def remove_service(self, zc: Zeroconf, type_: str, name: str) -> None:
|
|
47
|
+
peer_node_id = name.split(".")[0]
|
|
48
|
+
if self.discovery.on_removed_callback:
|
|
49
|
+
self.discovery.on_removed_callback(peer_node_id)
|
|
50
|
+
|
|
51
|
+
def _process_info(self, info: ServiceInfo) -> None:
|
|
52
|
+
peer_node_id = info.name.split(".")[0]
|
|
53
|
+
if peer_node_id == self.discovery.node_id:
|
|
54
|
+
return # Skip self
|
|
55
|
+
|
|
56
|
+
# Parse properties
|
|
57
|
+
props = info.properties
|
|
58
|
+
net_id_bytes = props.get(b"network_id")
|
|
59
|
+
if not net_id_bytes:
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
net_id = net_id_bytes.decode("utf-8")
|
|
63
|
+
if net_id != self.discovery.network_id:
|
|
64
|
+
return # Belong to a different network
|
|
65
|
+
|
|
66
|
+
if not info.addresses:
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
# Convert IP address bytes to string
|
|
70
|
+
ip = socket.inet_ntoa(info.addresses[0])
|
|
71
|
+
port = info.port
|
|
72
|
+
|
|
73
|
+
if self.discovery.on_found_callback:
|
|
74
|
+
self.discovery.on_found_callback(peer_node_id, ip, port)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class LANDiscovery:
|
|
78
|
+
"""Handles mDNS service registration and browsing on the LAN."""
|
|
79
|
+
|
|
80
|
+
def __init__(self, node_id: str, network_id: str, port: int) -> None:
|
|
81
|
+
self.node_id = node_id
|
|
82
|
+
self.network_id = network_id
|
|
83
|
+
self.port = port
|
|
84
|
+
self.zeroconf: Zeroconf | None = None
|
|
85
|
+
self.browser: ServiceBrowser | None = None
|
|
86
|
+
self.service_info: ServiceInfo | None = None
|
|
87
|
+
self.on_found_callback: Callable[[str, str, int], None] | None = None
|
|
88
|
+
self.on_removed_callback: Callable[[str], None] | None = None
|
|
89
|
+
|
|
90
|
+
async def start(self) -> None:
|
|
91
|
+
"""Register the node's mDNS service and start browsing for peers."""
|
|
92
|
+
# Use run_in_executor to avoid blocking the asyncio loop during Zeroconf init
|
|
93
|
+
loop = asyncio.get_running_loop()
|
|
94
|
+
await loop.run_in_executor(None, self._sync_start)
|
|
95
|
+
|
|
96
|
+
def _sync_start(self) -> None:
|
|
97
|
+
self.zeroconf = Zeroconf()
|
|
98
|
+
local_ip = get_local_ip()
|
|
99
|
+
|
|
100
|
+
self.service_info = ServiceInfo(
|
|
101
|
+
"_firecloud._tcp.local.",
|
|
102
|
+
f"{self.node_id}._firecloud._tcp.local.",
|
|
103
|
+
addresses=[socket.inet_aton(local_ip)],
|
|
104
|
+
port=self.port,
|
|
105
|
+
properties={
|
|
106
|
+
b"version": b"0.1.0",
|
|
107
|
+
b"network_id": self.network_id.encode("utf-8"),
|
|
108
|
+
},
|
|
109
|
+
)
|
|
110
|
+
self.zeroconf.register_service(self.service_info)
|
|
111
|
+
|
|
112
|
+
listener = FireCloudListener(self)
|
|
113
|
+
self.browser = ServiceBrowser(
|
|
114
|
+
self.zeroconf, "_firecloud._tcp.local.", listener
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
async def stop(self) -> None:
|
|
118
|
+
"""Stop browsing and unregister the service."""
|
|
119
|
+
loop = asyncio.get_running_loop()
|
|
120
|
+
await loop.run_in_executor(None, self._sync_stop)
|
|
121
|
+
|
|
122
|
+
def _sync_stop(self) -> None:
|
|
123
|
+
if self.browser:
|
|
124
|
+
self.browser.cancel()
|
|
125
|
+
self.browser = None
|
|
126
|
+
if self.zeroconf:
|
|
127
|
+
if self.service_info:
|
|
128
|
+
self.zeroconf.unregister_service(self.service_info)
|
|
129
|
+
self.service_info = None
|
|
130
|
+
self.zeroconf.close()
|
|
131
|
+
self.zeroconf = None
|
|
132
|
+
|
|
133
|
+
def on_peer_found(self, callback: Callable[[str, str, int], None]) -> None:
|
|
134
|
+
"""Set the callback for when a peer is found."""
|
|
135
|
+
self.on_found_callback = callback
|
|
136
|
+
|
|
137
|
+
def on_peer_removed(self, callback: Callable[[str], None]) -> None:
|
|
138
|
+
"""Set the callback for when a peer is removed."""
|
|
139
|
+
self.on_removed_callback = callback
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class PeerConfig:
|
|
143
|
+
"""Manages static peer lists loaded from and saved to a config file."""
|
|
144
|
+
|
|
145
|
+
def load(self, path: Path | str) -> list[tuple[str, int]]:
|
|
146
|
+
"""Load static peer endpoints from a JSON file."""
|
|
147
|
+
path = Path(path)
|
|
148
|
+
if not path.exists():
|
|
149
|
+
return []
|
|
150
|
+
try:
|
|
151
|
+
with open(path, "r") as f:
|
|
152
|
+
data = json.load(f)
|
|
153
|
+
return [(item[0], int(item[1])) for item in data]
|
|
154
|
+
except Exception:
|
|
155
|
+
return []
|
|
156
|
+
|
|
157
|
+
def save(self, path: Path | str, peers: list[tuple[str, int]]) -> None:
|
|
158
|
+
"""Save a list of static peer endpoints to a JSON file."""
|
|
159
|
+
path = Path(path)
|
|
160
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
161
|
+
# Convert tuples to lists for standard JSON encoding
|
|
162
|
+
data = [[host, port] for host, port in peers]
|
|
163
|
+
with open(path, "w") as f:
|
|
164
|
+
json.dump(data, f, indent=4)
|
firecloud/distributor.py
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""FireCloud Distributor Engine.
|
|
2
|
+
|
|
3
|
+
Decides the placement of chunks (local, replicated, or erasure coded)
|
|
4
|
+
based on the network peer count, and retrieves them, performing
|
|
5
|
+
erasure coding reconstruction if nodes are offline.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import math
|
|
9
|
+
import struct
|
|
10
|
+
|
|
11
|
+
from firecloud import fec
|
|
12
|
+
from firecloud.crypto import derive_chunk_id, compute_integrity_hash
|
|
13
|
+
from firecloud.exceptions import ChunkNotFoundError
|
|
14
|
+
from firecloud.manifest import ChunkInfo
|
|
15
|
+
|
|
16
|
+
# Message type constant needed for peer socket commands
|
|
17
|
+
MSG_STORE_CHUNK = 0x10
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Distributor:
|
|
21
|
+
"""Orchestrates chunk distribution and retrieval strategies."""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
peers: list[str],
|
|
26
|
+
local_node_id: str,
|
|
27
|
+
fec_enabled: bool = True,
|
|
28
|
+
fec_threshold: int = 5,
|
|
29
|
+
) -> None:
|
|
30
|
+
"""Initialize the distributor.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
peers: List of active peer node IDs.
|
|
34
|
+
local_node_id: The ID of this local node.
|
|
35
|
+
fec_enabled: Whether FEC is allowed.
|
|
36
|
+
fec_threshold: The node count threshold to use FEC.
|
|
37
|
+
"""
|
|
38
|
+
self.peers = peers
|
|
39
|
+
self.local_node_id = local_node_id
|
|
40
|
+
self.fec_enabled = fec_enabled
|
|
41
|
+
self.fec_threshold = fec_threshold
|
|
42
|
+
|
|
43
|
+
def get_strategy(self) -> str:
|
|
44
|
+
"""Determine the distribution strategy based on the node count."""
|
|
45
|
+
total_nodes = len(self.peers) + 1
|
|
46
|
+
if total_nodes < 2:
|
|
47
|
+
return "local"
|
|
48
|
+
elif self.fec_enabled and total_nodes >= self.fec_threshold:
|
|
49
|
+
return "erasure_coding"
|
|
50
|
+
else:
|
|
51
|
+
return "replication"
|
|
52
|
+
|
|
53
|
+
async def distribute(self, chunks: list, transport) -> list[ChunkInfo]:
|
|
54
|
+
"""Distribute chunks across peers. Returns placement info.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
chunks: List of Chunk objects containing encrypted data.
|
|
58
|
+
transport: The transport client/manager containing connections.
|
|
59
|
+
"""
|
|
60
|
+
strategy = self.get_strategy()
|
|
61
|
+
all_nodes = [self.local_node_id] + self.peers
|
|
62
|
+
|
|
63
|
+
if strategy == "local":
|
|
64
|
+
chunk_infos = []
|
|
65
|
+
for c in chunks:
|
|
66
|
+
transport.node.chunk_store.store(c.chunk_id, c.data)
|
|
67
|
+
chunk_infos.append(
|
|
68
|
+
ChunkInfo(
|
|
69
|
+
chunk_id=c.chunk_id,
|
|
70
|
+
integrity_hash=c.integrity_hash,
|
|
71
|
+
index=c.index,
|
|
72
|
+
size=len(c.data),
|
|
73
|
+
stored_on=[self.local_node_id],
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
return chunk_infos
|
|
77
|
+
|
|
78
|
+
elif strategy == "replication":
|
|
79
|
+
chunk_infos = []
|
|
80
|
+
for c in chunks:
|
|
81
|
+
# Store on 2 nodes (replication factor = 2) using round-robin
|
|
82
|
+
idx1 = c.index % len(all_nodes)
|
|
83
|
+
idx2 = (c.index + 1) % len(all_nodes)
|
|
84
|
+
nodes_to_store = [all_nodes[idx1], all_nodes[idx2]]
|
|
85
|
+
|
|
86
|
+
for node_id in nodes_to_store:
|
|
87
|
+
if node_id == self.local_node_id:
|
|
88
|
+
transport.node.chunk_store.store(c.chunk_id, c.data)
|
|
89
|
+
else:
|
|
90
|
+
conn = transport.node.connections.get(node_id)
|
|
91
|
+
if conn:
|
|
92
|
+
payload = c.chunk_id.encode("utf-8") + c.data
|
|
93
|
+
await conn.send_message(MSG_STORE_CHUNK, payload)
|
|
94
|
+
|
|
95
|
+
chunk_infos.append(
|
|
96
|
+
ChunkInfo(
|
|
97
|
+
chunk_id=c.chunk_id,
|
|
98
|
+
integrity_hash=c.integrity_hash,
|
|
99
|
+
index=c.index,
|
|
100
|
+
size=len(c.data),
|
|
101
|
+
stored_on=nodes_to_store,
|
|
102
|
+
)
|
|
103
|
+
)
|
|
104
|
+
return chunk_infos
|
|
105
|
+
|
|
106
|
+
else:
|
|
107
|
+
# erasure_coding strategy
|
|
108
|
+
k = len(chunks)
|
|
109
|
+
if k == 0:
|
|
110
|
+
return []
|
|
111
|
+
n = fec.compute_n(k)
|
|
112
|
+
|
|
113
|
+
# Prepend a header containing the chunk count and each chunk's size
|
|
114
|
+
header = struct.pack("!I", k) + b"".join(
|
|
115
|
+
struct.pack("!I", len(c.data)) for c in chunks
|
|
116
|
+
)
|
|
117
|
+
payload = header + b"".join(c.data for c in chunks)
|
|
118
|
+
|
|
119
|
+
# Encode into N shares
|
|
120
|
+
shares = fec.encode(payload, k, n)
|
|
121
|
+
|
|
122
|
+
chunk_infos = []
|
|
123
|
+
hmac_key = transport.node.network.hmac_key
|
|
124
|
+
|
|
125
|
+
for i, share_data in enumerate(shares):
|
|
126
|
+
share_id = derive_chunk_id(share_data, hmac_key)
|
|
127
|
+
share_hash = compute_integrity_hash(share_data)
|
|
128
|
+
|
|
129
|
+
# Store share on a node round-robin
|
|
130
|
+
node_id = all_nodes[i % len(all_nodes)]
|
|
131
|
+
if node_id == self.local_node_id:
|
|
132
|
+
transport.node.chunk_store.store(share_id, share_data)
|
|
133
|
+
else:
|
|
134
|
+
conn = transport.node.connections.get(node_id)
|
|
135
|
+
if conn:
|
|
136
|
+
store_payload = share_id.encode("utf-8") + share_data
|
|
137
|
+
await conn.send_message(MSG_STORE_CHUNK, store_payload)
|
|
138
|
+
|
|
139
|
+
chunk_infos.append(
|
|
140
|
+
ChunkInfo(
|
|
141
|
+
chunk_id=share_id,
|
|
142
|
+
integrity_hash=share_hash,
|
|
143
|
+
index=i,
|
|
144
|
+
size=len(share_data),
|
|
145
|
+
stored_on=[node_id],
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
return chunk_infos
|
|
149
|
+
|
|
150
|
+
async def retrieve(self, chunk_infos: list[ChunkInfo], transport) -> list[bytes]:
|
|
151
|
+
"""Retrieve chunks from peers, with FEC reconstruction if needed.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
chunk_infos: List of ChunkInfo objects representing the placement of
|
|
155
|
+
chunks or shares.
|
|
156
|
+
transport: The transport client/manager containing connections.
|
|
157
|
+
"""
|
|
158
|
+
strategy = self.get_strategy()
|
|
159
|
+
|
|
160
|
+
if strategy != "erasure_coding":
|
|
161
|
+
chunks_data = []
|
|
162
|
+
for info in chunk_infos:
|
|
163
|
+
chunk_data = None
|
|
164
|
+
# Try primary stored nodes
|
|
165
|
+
for node_id in info.stored_on:
|
|
166
|
+
if node_id == self.local_node_id:
|
|
167
|
+
if transport.node.chunk_store.has(info.chunk_id):
|
|
168
|
+
chunk_data = transport.node.chunk_store.retrieve(
|
|
169
|
+
info.chunk_id
|
|
170
|
+
)
|
|
171
|
+
break
|
|
172
|
+
else:
|
|
173
|
+
conn = transport.node.connections.get(node_id)
|
|
174
|
+
if conn:
|
|
175
|
+
chunk_data = await conn.retrieve_chunk(info.chunk_id)
|
|
176
|
+
if chunk_data:
|
|
177
|
+
break
|
|
178
|
+
|
|
179
|
+
# Fallback to other connections if primary is down
|
|
180
|
+
if chunk_data is None:
|
|
181
|
+
for node_id, conn in transport.node.connections.items():
|
|
182
|
+
chunk_data = await conn.retrieve_chunk(info.chunk_id)
|
|
183
|
+
if chunk_data:
|
|
184
|
+
break
|
|
185
|
+
|
|
186
|
+
# Absolute local fallback
|
|
187
|
+
if chunk_data is None and self.local_node_id not in info.stored_on:
|
|
188
|
+
if transport.node.chunk_store.has(info.chunk_id):
|
|
189
|
+
chunk_data = transport.node.chunk_store.retrieve(
|
|
190
|
+
info.chunk_id
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
if chunk_data is None:
|
|
194
|
+
raise ChunkNotFoundError(
|
|
195
|
+
f"Failed to retrieve chunk {info.chunk_id}"
|
|
196
|
+
)
|
|
197
|
+
chunks_data.append(chunk_data)
|
|
198
|
+
return chunks_data
|
|
199
|
+
|
|
200
|
+
else:
|
|
201
|
+
# erasure_coding strategy: chunk_infos are the N shares.
|
|
202
|
+
# Determine threshold K from N.
|
|
203
|
+
n = len(chunk_infos)
|
|
204
|
+
if n == 0:
|
|
205
|
+
return []
|
|
206
|
+
k = 1
|
|
207
|
+
while math.ceil(k * 1.5) < n:
|
|
208
|
+
k += 1
|
|
209
|
+
|
|
210
|
+
retrieved_shares = [] # list of (index, share_data)
|
|
211
|
+
|
|
212
|
+
for info in chunk_infos:
|
|
213
|
+
share_data = None
|
|
214
|
+
# Try local first
|
|
215
|
+
if self.local_node_id in info.stored_on:
|
|
216
|
+
if transport.node.chunk_store.has(info.chunk_id):
|
|
217
|
+
share_data = transport.node.chunk_store.retrieve(
|
|
218
|
+
info.chunk_id
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Try primary peer connections
|
|
222
|
+
if share_data is None:
|
|
223
|
+
for node_id in info.stored_on:
|
|
224
|
+
if node_id != self.local_node_id:
|
|
225
|
+
conn = transport.node.connections.get(node_id)
|
|
226
|
+
if conn:
|
|
227
|
+
share_data = await conn.retrieve_chunk(
|
|
228
|
+
info.chunk_id
|
|
229
|
+
)
|
|
230
|
+
if share_data:
|
|
231
|
+
break
|
|
232
|
+
|
|
233
|
+
# Try generic fallback peer connections
|
|
234
|
+
if share_data is None:
|
|
235
|
+
for node_id, conn in transport.node.connections.items():
|
|
236
|
+
share_data = await conn.retrieve_chunk(info.chunk_id)
|
|
237
|
+
if share_data:
|
|
238
|
+
break
|
|
239
|
+
|
|
240
|
+
if share_data is not None:
|
|
241
|
+
retrieved_shares.append((info.index, share_data))
|
|
242
|
+
if len(retrieved_shares) >= k:
|
|
243
|
+
break
|
|
244
|
+
|
|
245
|
+
if len(retrieved_shares) < k:
|
|
246
|
+
raise ChunkNotFoundError(
|
|
247
|
+
f"Insufficient shares to reconstruct file: need {k}, got {len(retrieved_shares)}"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# Reconstruct original payload
|
|
251
|
+
payload = fec.decode(retrieved_shares, k)
|
|
252
|
+
|
|
253
|
+
# Parse header
|
|
254
|
+
num_chunks = struct.unpack("!I", payload[:4])[0]
|
|
255
|
+
sizes = []
|
|
256
|
+
offset = 4
|
|
257
|
+
for _ in range(num_chunks):
|
|
258
|
+
sizes.append(
|
|
259
|
+
struct.unpack("!I", payload[offset : offset + 4])[0]
|
|
260
|
+
)
|
|
261
|
+
offset += 4
|
|
262
|
+
|
|
263
|
+
# Split payload into original encrypted chunks
|
|
264
|
+
chunks_data = []
|
|
265
|
+
for size in sizes:
|
|
266
|
+
chunks_data.append(payload[offset : offset + size])
|
|
267
|
+
offset += size
|
|
268
|
+
|
|
269
|
+
return chunks_data
|