firecloud-devnet 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
firecloud/node.py ADDED
@@ -0,0 +1,562 @@
1
+ """FireCloud Node — orchestrates storage, transport, discovery, and sync.
2
+
3
+ The :class:`Node` is the primary user-facing object. It wires together
4
+ the chunk store, manifest, transport layer, mDNS discovery, and
5
+ distributor so that files can be uploaded, downloaded, deleted, and
6
+ synced across the LAN with a single method call.
7
+ """
8
+
9
+ import asyncio
10
+ import builtins
11
+ import json
12
+ import logging
13
+ import uuid
14
+ from dataclasses import asdict
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+
18
+ from firecloud.chunker import Chunk, chunk_file, reassemble_chunks, compute_file_id
19
+ from firecloud.crypto import encrypt_chunk, decrypt_chunk, compute_integrity_hash
20
+ from firecloud.discovery import LANDiscovery
21
+ from firecloud.distributor import Distributor
22
+ from firecloud.exceptions import (
23
+ ChunkCorruptError,
24
+ )
25
+ from firecloud.manifest import FileEntry, Manifest
26
+ from firecloud.network import Network
27
+ from firecloud.storage import ChunkStore
28
+ from firecloud.transport import NodeClient, NodeServer, PeerConnection, MSG_SYNC_MANIFEST
29
+
30
+ logger = logging.getLogger("firecloud.node")
31
+
32
+
33
+ class Node:
34
+ """Main FireCloud node that orchestrates all operations.
35
+
36
+ Ties together:
37
+ - :class:`~firecloud.storage.ChunkStore` for local chunk persistence
38
+ - :class:`~firecloud.manifest.Manifest` for file metadata
39
+ - :class:`~firecloud.transport.NodeServer` / :class:`~firecloud.transport.NodeClient`
40
+ for peer-to-peer communication
41
+ - :class:`~firecloud.discovery.LANDiscovery` for mDNS peer discovery
42
+ - :class:`~firecloud.distributor.Distributor` for chunk placement
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ network: Network,
48
+ storage_path: Path | str,
49
+ port: int = 7474,
50
+ max_storage: int | None = None,
51
+ host: str = "0.0.0.0",
52
+ node_id: str | None = None,
53
+ enable_discovery: bool = True,
54
+ ) -> None:
55
+ """Initialise the node.
56
+
57
+ Args:
58
+ network: The :class:`~firecloud.network.Network` this node belongs to.
59
+ storage_path: Root directory for chunk storage and metadata.
60
+ port: TCP port to listen on.
61
+ max_storage: Maximum bytes for the chunk store (``None`` = 80 % of disk).
62
+ host: Interface address to bind the server to.
63
+ node_id: Unique identifier for this node. Auto-generated when ``None``.
64
+ enable_discovery: Whether to start mDNS discovery.
65
+ """
66
+ self.network = network
67
+ self.storage_path = Path(storage_path)
68
+ self.storage_path.mkdir(parents=True, exist_ok=True)
69
+ self.port = port
70
+ self.host = host
71
+ self.enable_discovery = enable_discovery
72
+
73
+ # Node identity
74
+ self.node_id = node_id or uuid.uuid4().hex[:16]
75
+
76
+ # Core subsystems — initialised eagerly so tests can inject mocks.
77
+ chunks_dir = self.storage_path / "chunks"
78
+ self.chunk_store = ChunkStore(chunks_dir, max_storage=max_storage)
79
+ self.manifest = Manifest(self.storage_path)
80
+
81
+ # Transport
82
+ self._server: NodeServer | None = None
83
+ self._client: NodeClient | None = None
84
+
85
+ # Discovery
86
+ self._discovery: LANDiscovery | None = None
87
+
88
+ # Active peer connections keyed by peer node_id.
89
+ self.connections: dict[str, PeerConnection] = {}
90
+
91
+ # Known peer addresses: {node_id: (host, port)}
92
+ self._known_peers: dict[str, tuple[str, int]] = {}
93
+
94
+ # Background tasks
95
+ self._heartbeat_task: asyncio.Task | None = None
96
+ self._manifest_sync_task: asyncio.Task | None = None
97
+
98
+ # Running state
99
+ self._running = False
100
+
101
+ # ------------------------------------------------------------------
102
+ # Lifecycle
103
+ # ------------------------------------------------------------------
104
+
105
+ async def start(self) -> None:
106
+ """Start the node: server, discovery, heartbeat, and manifest sync."""
107
+ if self._running:
108
+ return
109
+
110
+ # Start TCP server
111
+ self._server = NodeServer(self, self.host, self.port)
112
+ await self._server.start()
113
+
114
+ # Start client
115
+ self._client = NodeClient(self)
116
+
117
+ # Start mDNS discovery
118
+ if self.enable_discovery:
119
+ try:
120
+ self._discovery = LANDiscovery(
121
+ self.node_id,
122
+ self.network.network_id,
123
+ self.port,
124
+ )
125
+ self._discovery.on_peer_found(self._on_peer_discovered)
126
+ self._discovery.on_peer_removed(self._on_peer_removed)
127
+ await self._discovery.start()
128
+ except Exception as exc:
129
+ logger.warning(f"mDNS discovery failed to start: {exc}")
130
+ self._discovery = None
131
+
132
+ # Start periodic tasks
133
+ self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
134
+ self._manifest_sync_task = asyncio.create_task(self._manifest_sync_loop())
135
+
136
+ self._running = True
137
+ logger.info(
138
+ f"Node {self.node_id} started on {self.host}:{self.port} "
139
+ f"(network {self.network.network_id})"
140
+ )
141
+
142
+ async def stop(self) -> None:
143
+ """Gracefully shut down the node."""
144
+ if not self._running:
145
+ return
146
+ self._running = False
147
+
148
+ # Cancel periodic tasks
149
+ for task in (self._heartbeat_task, self._manifest_sync_task):
150
+ if task and not task.done():
151
+ task.cancel()
152
+ try:
153
+ await task
154
+ except asyncio.CancelledError:
155
+ pass
156
+
157
+ # Close all peer connections
158
+ for conn in list(self.connections.values()):
159
+ try:
160
+ await conn.close()
161
+ except Exception:
162
+ pass
163
+ self.connections.clear()
164
+
165
+ # Stop server
166
+ if self._server:
167
+ await self._server.stop()
168
+ self._server = None
169
+
170
+ # Stop discovery
171
+ if self._discovery:
172
+ try:
173
+ await self._discovery.stop()
174
+ except Exception:
175
+ pass
176
+ self._discovery = None
177
+
178
+ logger.info(f"Node {self.node_id} stopped")
179
+
180
+ # ------------------------------------------------------------------
181
+ # File operations
182
+ # ------------------------------------------------------------------
183
+
184
+ async def upload(self, filepath: str | Path) -> str:
185
+ """Upload a file to the network.
186
+
187
+ Pipeline: read → chunk → encrypt → distribute → manifest.
188
+
189
+ Args:
190
+ filepath: Path to the local file to upload.
191
+
192
+ Returns:
193
+ The file_id (HMAC-SHA-256 of the whole file content).
194
+
195
+ Raises:
196
+ builtins.FileNotFoundError: If *filepath* does not exist.
197
+ StorageFullError: If the quota is exceeded.
198
+ """
199
+ filepath = Path(filepath)
200
+ if not filepath.is_file():
201
+ raise builtins.FileNotFoundError(f"File not found: {filepath}")
202
+
203
+ hmac_key = self.network.hmac_key
204
+ enc_key = self.network.encryption_key
205
+
206
+ # 1. Compute the file-level ID
207
+ file_id = compute_file_id(filepath, hmac_key)
208
+
209
+ # 2. Content-defined chunking
210
+ chunks = chunk_file(filepath, hmac_key)
211
+
212
+ # 3. Encrypt each chunk
213
+ encrypted_chunks = []
214
+ for c in chunks:
215
+ enc_data = encrypt_chunk(c.data, enc_key)
216
+ encrypted_chunks.append(
217
+ Chunk(
218
+ index=c.index,
219
+ offset=c.offset,
220
+ length=c.length,
221
+ data=enc_data,
222
+ chunk_id=c.chunk_id,
223
+ integrity_hash=c.integrity_hash,
224
+ )
225
+ )
226
+
227
+ # 4. Distribute
228
+ peer_ids = list(self.connections.keys())
229
+ distributor = Distributor(
230
+ peers=peer_ids,
231
+ local_node_id=self.node_id,
232
+ fec_enabled=len(peer_ids) + 1 >= 5,
233
+ )
234
+ chunk_infos = await distributor.distribute(encrypted_chunks, self._client)
235
+
236
+ # 5. Build manifest entry
237
+ strategy = distributor.get_strategy()
238
+ entry = FileEntry(
239
+ file_id=file_id,
240
+ name=filepath.name,
241
+ size=filepath.stat().st_size,
242
+ chunk_count=len(chunks),
243
+ uploaded_at=datetime.now(timezone.utc).isoformat(),
244
+ uploaded_by=self.node_id,
245
+ chunks=chunk_infos,
246
+ fec_enabled=(strategy == "erasure_coding"),
247
+ replication_factor=2 if strategy == "replication" else 1,
248
+ )
249
+ self.manifest.add_file(entry)
250
+
251
+ # 6. Sync manifest to peers
252
+ await self._sync_manifest_to_peers()
253
+
254
+ logger.info(f"Uploaded {filepath.name} → {file_id}")
255
+ return file_id
256
+
257
+ async def download(self, file_id: str, output: str | Path) -> None:
258
+ """Download a file from the network.
259
+
260
+ Pipeline: manifest → retrieve → decrypt → verify → reassemble → write.
261
+
262
+ Args:
263
+ file_id: The unique file identifier.
264
+ output: Local path to write the reassembled file to.
265
+
266
+ Raises:
267
+ firecloud.exceptions.FileNotFoundError: If the file is not in
268
+ the manifest or is tombstoned.
269
+ ChunkNotFoundError: If chunks are irrecoverable.
270
+ ChunkCorruptError: If integrity verification fails.
271
+ """
272
+ output = Path(output)
273
+ entry = self.manifest.get_file(file_id)
274
+
275
+ enc_key = self.network.encryption_key
276
+
277
+ # Retrieve encrypted chunks
278
+ peer_ids = list(self.connections.keys())
279
+ distributor = Distributor(
280
+ peers=peer_ids,
281
+ local_node_id=self.node_id,
282
+ fec_enabled=entry.fec_enabled,
283
+ )
284
+ encrypted_data_list = await distributor.retrieve(entry.chunks, self._client)
285
+
286
+ # Decrypt and verify each chunk
287
+ decrypted_chunks: list[Chunk] = []
288
+ for i, enc_data in enumerate(encrypted_data_list):
289
+ plaintext = decrypt_chunk(enc_data, enc_key)
290
+
291
+ # When NOT using FEC, verify integrity against the manifest
292
+ if not entry.fec_enabled and i < len(entry.chunks):
293
+ expected_hash = entry.chunks[i].integrity_hash
294
+ actual_hash = compute_integrity_hash(plaintext)
295
+ if actual_hash != expected_hash:
296
+ raise ChunkCorruptError(
297
+ f"Integrity check failed for chunk {entry.chunks[i].chunk_id}"
298
+ )
299
+
300
+ decrypted_chunks.append(
301
+ Chunk(
302
+ index=i,
303
+ offset=0,
304
+ length=len(plaintext),
305
+ data=plaintext,
306
+ chunk_id="",
307
+ integrity_hash="",
308
+ )
309
+ )
310
+
311
+ # Reassemble and write
312
+ reassembled = reassemble_chunks(decrypted_chunks)
313
+ output.parent.mkdir(parents=True, exist_ok=True)
314
+ output.write_bytes(reassembled)
315
+
316
+ logger.info(f"Downloaded {entry.name} → {output}")
317
+
318
+ async def delete(self, file_id: str) -> None:
319
+ """Tombstone a file in the manifest and sync to peers.
320
+
321
+ Args:
322
+ file_id: The file to delete.
323
+
324
+ Raises:
325
+ firecloud.exceptions.FileNotFoundError: If the file is not found.
326
+ """
327
+ self.manifest.delete_file(file_id)
328
+ await self._sync_manifest_to_peers()
329
+ logger.info(f"Deleted file {file_id}")
330
+
331
+ def list_files(self) -> list[dict]:
332
+ """Return a list of all non-deleted files as plain dicts.
333
+
334
+ Each dict contains: ``file_id``, ``name``, ``size``,
335
+ ``chunk_count``, ``uploaded_at``, ``uploaded_by``,
336
+ ``fec_enabled``, ``replication_factor``.
337
+ """
338
+ entries = self.manifest.list_files()
339
+ return [
340
+ {
341
+ "file_id": e.file_id,
342
+ "name": e.name,
343
+ "size": e.size,
344
+ "chunk_count": e.chunk_count,
345
+ "uploaded_at": e.uploaded_at,
346
+ "uploaded_by": e.uploaded_by,
347
+ "fec_enabled": e.fec_enabled,
348
+ "replication_factor": e.replication_factor,
349
+ }
350
+ for e in entries
351
+ ]
352
+
353
+ # ------------------------------------------------------------------
354
+ # Networking
355
+ # ------------------------------------------------------------------
356
+
357
+ async def connect(self, address: str) -> None:
358
+ """Connect to a peer by ``host:port`` string.
359
+
360
+ Args:
361
+ address: Peer address in ``host:port`` format.
362
+ """
363
+ host, port_str = address.rsplit(":", 1)
364
+ port = int(port_str)
365
+ peer_node_id = await self._client.connect(host, port)
366
+ self._known_peers[peer_node_id] = (host, port)
367
+ logger.info(f"Connected to peer {peer_node_id} at {host}:{port}")
368
+
369
+ def status(self) -> dict:
370
+ """Return a status dict describing this node."""
371
+ return {
372
+ "node_id": self.node_id,
373
+ "network_id": self.network.network_id,
374
+ "host": self.host,
375
+ "port": self.port,
376
+ "running": self._running,
377
+ "peers_connected": len(self.connections),
378
+ "files_stored": len(self.manifest.list_files()),
379
+ "chunks_stored": len(self.chunk_store.list_chunks()),
380
+ "storage_used": self.chunk_store.used_bytes(),
381
+ "storage_available": self.chunk_store.available_bytes(),
382
+ }
383
+
384
+ def peers(self) -> list[dict]:
385
+ """Return a list of known / connected peers."""
386
+ result = []
387
+ all_peer_ids = set(self.connections.keys()) | set(self._known_peers.keys())
388
+ for pid in all_peer_ids:
389
+ addr = self._known_peers.get(pid)
390
+ result.append({
391
+ "node_id": pid,
392
+ "host": addr[0] if addr else "unknown",
393
+ "port": addr[1] if addr else 0,
394
+ "connected": pid in self.connections,
395
+ })
396
+ return result
397
+
398
+ # ------------------------------------------------------------------
399
+ # Connection management (called by transport layer)
400
+ # ------------------------------------------------------------------
401
+
402
+ def register_connection(self, peer_node_id: str, conn: PeerConnection) -> None:
403
+ """Register an active peer connection (called by transport)."""
404
+ self.connections[peer_node_id] = conn
405
+ logger.debug(f"Registered connection with peer {peer_node_id}")
406
+
407
+ def on_connection_closed(self, peer_node_id: str) -> None:
408
+ """Handle a closed connection (called by PeerConnection)."""
409
+ self.connections.pop(peer_node_id, None)
410
+ logger.debug(f"Connection with peer {peer_node_id} closed")
411
+ if self._running:
412
+ asyncio.create_task(self._rereplicate_peer_chunks(peer_node_id))
413
+
414
+ async def remove_node(self, node_id: str) -> None:
415
+ """Explicitly remove a node from the network and trigger re-replication.
416
+
417
+ This closes any connection, removes the node from known list,
418
+ and re-replicates any of its chunks that were replicated on this network.
419
+ """
420
+ conn = self.connections.pop(node_id, None)
421
+ if conn:
422
+ try:
423
+ await conn.close()
424
+ except Exception:
425
+ pass
426
+ self._known_peers.pop(node_id, None)
427
+ await self._rereplicate_peer_chunks(node_id)
428
+
429
+ async def _rereplicate_peer_chunks(self, offline_node_id: str) -> None:
430
+ """Scan manifest for chunks stored on the offline node and re-replicate them."""
431
+ from firecloud.transport import MSG_STORE_CHUNK
432
+ active_peers = [pid for pid in self.connections.keys() if pid != offline_node_id]
433
+ if not active_peers:
434
+ logger.info("No active peers available for re-replication.")
435
+ return
436
+
437
+ all_nodes = [self.node_id] + active_peers
438
+
439
+ for entry in self.manifest.list_files():
440
+ # zfec shares are handled separately; focus on standard replication for re-replication
441
+ if entry.fec_enabled or entry.replication_factor < 2:
442
+ continue
443
+
444
+ updated = False
445
+ for chunk_info in entry.chunks:
446
+ if offline_node_id in chunk_info.stored_on:
447
+ chunk_info.stored_on = [nid for nid in chunk_info.stored_on if nid != offline_node_id]
448
+
449
+ while len(chunk_info.stored_on) < entry.replication_factor:
450
+ candidate = None
451
+ for nid in all_nodes:
452
+ if nid not in chunk_info.stored_on:
453
+ candidate = nid
454
+ break
455
+ if not candidate:
456
+ break
457
+
458
+ chunk_data = None
459
+ if self.chunk_store.has(chunk_info.chunk_id):
460
+ chunk_data = self.chunk_store.retrieve(chunk_info.chunk_id)
461
+ else:
462
+ for nid in chunk_info.stored_on:
463
+ conn = self.connections.get(nid)
464
+ if conn:
465
+ chunk_data = await conn.retrieve_chunk(chunk_info.chunk_id)
466
+ if chunk_data:
467
+ break
468
+
469
+ if chunk_data is not None:
470
+ try:
471
+ if candidate == self.node_id:
472
+ self.chunk_store.store(chunk_info.chunk_id, chunk_data)
473
+ else:
474
+ conn = self.connections.get(candidate)
475
+ if conn:
476
+ payload = chunk_info.chunk_id.encode("utf-8") + chunk_data
477
+ await conn.send_message(MSG_STORE_CHUNK, payload)
478
+ chunk_info.stored_on.append(candidate)
479
+ updated = True
480
+ logger.info(f"Re-replicated chunk {chunk_info.chunk_id[:16]}... to {candidate}")
481
+ except Exception as exc:
482
+ logger.warning(f"Failed to re-replicate chunk {chunk_info.chunk_id} to {candidate}: {exc}")
483
+ else:
484
+ break
485
+
486
+ if updated:
487
+ self.manifest.add_file(entry)
488
+ await self._sync_manifest_to_peers()
489
+
490
+ def add_peer_discovered(self, node_id: str, host: str, port: int) -> None:
491
+ """Record a newly discovered peer address (called by transport/discovery)."""
492
+ if node_id != self.node_id:
493
+ self._known_peers[node_id] = (host, port)
494
+
495
+ # ------------------------------------------------------------------
496
+ # Discovery callbacks
497
+ # ------------------------------------------------------------------
498
+
499
+ def _on_peer_discovered(self, node_id: str, host: str, port: int) -> None:
500
+ """Callback when mDNS discovers a peer — schedule auto-connect."""
501
+ if node_id == self.node_id or node_id in self.connections:
502
+ return
503
+ self._known_peers[node_id] = (host, port)
504
+ asyncio.ensure_future(self._try_connect(node_id, host, port))
505
+
506
+ def _on_peer_removed(self, node_id: str) -> None:
507
+ """Callback when mDNS detects a peer departure."""
508
+ conn = self.connections.pop(node_id, None)
509
+ if conn:
510
+ asyncio.ensure_future(conn.close())
511
+ logger.debug(f"Peer {node_id} removed via mDNS")
512
+
513
+ async def _try_connect(self, node_id: str, host: str, port: int) -> None:
514
+ """Try to connect to a discovered peer, silently ignoring failures."""
515
+ try:
516
+ if node_id not in self.connections:
517
+ await self._client.connect(host, port)
518
+ except Exception as exc:
519
+ logger.debug(f"Auto-connect to {node_id} at {host}:{port} failed: {exc}")
520
+
521
+ # ------------------------------------------------------------------
522
+ # Periodic tasks
523
+ # ------------------------------------------------------------------
524
+
525
+ async def _heartbeat_loop(self) -> None:
526
+ """Send heartbeats to all connected peers every 30 seconds."""
527
+ from firecloud.transport import MSG_HEARTBEAT
528
+
529
+ try:
530
+ while self._running:
531
+ await asyncio.sleep(30)
532
+ ts = datetime.now(timezone.utc).isoformat().encode("utf-8")
533
+ payload = self.node_id.encode("utf-8") + b"|" + ts
534
+ for conn in list(self.connections.values()):
535
+ try:
536
+ await conn.send_message(MSG_HEARTBEAT, payload)
537
+ except Exception:
538
+ pass
539
+ except asyncio.CancelledError:
540
+ pass
541
+
542
+ async def _manifest_sync_loop(self) -> None:
543
+ """Periodically sync the manifest to all peers every 60 seconds."""
544
+ try:
545
+ while self._running:
546
+ await asyncio.sleep(60)
547
+ await self._sync_manifest_to_peers()
548
+ except asyncio.CancelledError:
549
+ pass
550
+
551
+ async def _sync_manifest_to_peers(self) -> None:
552
+ """Push the local manifest entries to all connected peers."""
553
+ entries = self.manifest.to_entries()
554
+ if not entries:
555
+ return
556
+ entries_dicts = [asdict(e) for e in entries]
557
+ payload = json.dumps(entries_dicts).encode("utf-8")
558
+ for conn in list(self.connections.values()):
559
+ try:
560
+ await conn.send_message(MSG_SYNC_MANIFEST, payload)
561
+ except Exception as exc:
562
+ logger.debug(f"Manifest sync failed for peer: {exc}")