firecloud-devnet 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
firecloud/storage.py ADDED
@@ -0,0 +1,146 @@
1
+ """Local filesystem chunk storage with sharded directories and quota enforcement."""
2
+
3
+ import shutil
4
+ import threading
5
+ from pathlib import Path
6
+
7
+ from firecloud.exceptions import ChunkNotFoundError, StorageFullError
8
+
9
+
10
+ class ChunkStore:
11
+ """Thread-safe, sharded local storage for encrypted chunks.
12
+
13
+ Chunks are stored in a two-level directory tree sharded by the first two
14
+ hex characters of the chunk ID::
15
+
16
+ base_path/ab/abcdef0123456789...
17
+
18
+ A storage quota is enforced on every ``store()`` call. When *max_storage*
19
+ is ``None`` the quota defaults to 80 % of the free space reported by the OS
20
+ at construction time.
21
+ """
22
+
23
+ def __init__(self, base_path: Path | str, max_storage: int | None = None) -> None:
24
+ """Initialise the chunk store.
25
+
26
+ Args:
27
+ base_path: Root directory for chunk storage.
28
+ max_storage: Maximum bytes allowed. ``None`` means 80 % of the
29
+ available disk space at *base_path*.
30
+ """
31
+ self._base = Path(base_path)
32
+ self._base.mkdir(parents=True, exist_ok=True)
33
+ self._lock = threading.Lock()
34
+
35
+ if max_storage is not None:
36
+ self._max_storage = max_storage
37
+ else:
38
+ self._max_storage = int(shutil.disk_usage(self._base).free * 0.8)
39
+
40
+ # ------------------------------------------------------------------
41
+ # Public API
42
+ # ------------------------------------------------------------------
43
+
44
+ def store(self, chunk_id: str, data: bytes) -> None:
45
+ """Store an encrypted chunk on disk.
46
+
47
+ Args:
48
+ chunk_id: Hex string identifying the chunk.
49
+ data: Raw (already-encrypted) chunk bytes.
50
+
51
+ Raises:
52
+ StorageFullError: If storing *data* would exceed the quota.
53
+ """
54
+ with self._lock:
55
+ if self.used_bytes() + len(data) > self._max_storage:
56
+ raise StorageFullError(
57
+ f"Storing chunk {chunk_id} ({len(data)} bytes) would exceed "
58
+ f"the quota of {self._max_storage} bytes"
59
+ )
60
+ path = self._chunk_path(chunk_id)
61
+ path.parent.mkdir(parents=True, exist_ok=True)
62
+ path.write_bytes(data)
63
+
64
+ def retrieve(self, chunk_id: str) -> bytes:
65
+ """Retrieve a stored chunk by its ID.
66
+
67
+ Args:
68
+ chunk_id: Hex string identifying the chunk.
69
+
70
+ Returns:
71
+ The raw bytes of the chunk.
72
+
73
+ Raises:
74
+ ChunkNotFoundError: If the chunk is not in the store.
75
+ """
76
+ with self._lock:
77
+ path = self._chunk_path(chunk_id)
78
+ if not path.is_file():
79
+ raise ChunkNotFoundError(
80
+ f"Chunk {chunk_id} not found in store"
81
+ )
82
+ return path.read_bytes()
83
+
84
+ def delete(self, chunk_id: str) -> None:
85
+ """Delete a chunk from the store.
86
+
87
+ This is a no-op if the chunk does not exist.
88
+
89
+ Args:
90
+ chunk_id: Hex string identifying the chunk.
91
+ """
92
+ with self._lock:
93
+ path = self._chunk_path(chunk_id)
94
+ if path.is_file():
95
+ path.unlink()
96
+ # Clean up empty shard directory.
97
+ try:
98
+ path.parent.rmdir()
99
+ except OSError:
100
+ pass # Directory not empty — that's fine.
101
+
102
+ def has(self, chunk_id: str) -> bool:
103
+ """Check whether a chunk exists in the store.
104
+
105
+ Args:
106
+ chunk_id: Hex string identifying the chunk.
107
+
108
+ Returns:
109
+ ``True`` if the chunk is stored, ``False`` otherwise.
110
+ """
111
+ with self._lock:
112
+ return self._chunk_path(chunk_id).is_file()
113
+
114
+ def used_bytes(self) -> int:
115
+ """Return the total number of bytes consumed by stored chunks."""
116
+ total = 0
117
+ for path in self._base.rglob("*"):
118
+ if path.is_file():
119
+ total += path.stat().st_size
120
+ return total
121
+
122
+ def available_bytes(self) -> int:
123
+ """Return the number of bytes remaining before the quota is hit."""
124
+ return max(0, self._max_storage - self.used_bytes())
125
+
126
+ def list_chunks(self) -> list[str]:
127
+ """Return a list of all stored chunk IDs."""
128
+ chunks: list[str] = []
129
+ for shard_dir in sorted(self._base.iterdir()):
130
+ if not shard_dir.is_dir():
131
+ continue
132
+ for chunk_file in sorted(shard_dir.iterdir()):
133
+ if chunk_file.is_file():
134
+ chunks.append(chunk_file.name)
135
+ return chunks
136
+
137
+ # ------------------------------------------------------------------
138
+ # Internal helpers
139
+ # ------------------------------------------------------------------
140
+
141
+ def _chunk_path(self, chunk_id: str) -> Path:
142
+ """Return the sharded filesystem path for *chunk_id*.
143
+
144
+ Layout: ``base_path / chunk_id[:2] / chunk_id``
145
+ """
146
+ return self._base / chunk_id[:2] / chunk_id
firecloud/sync.py ADDED
@@ -0,0 +1,277 @@
1
+ """FireCloud Folder Sync — watchdog-based bi-directional folder synchronization.
2
+
3
+ Uses :pypi:`watchdog` to monitor a local folder for file changes and
4
+ automatically uploads / deletes files through the :class:`~firecloud.node.Node`.
5
+ Incoming files from remote peers are downloaded periodically by comparing the
6
+ manifest against the sync folder contents.
7
+ """
8
+
9
+ import asyncio
10
+ import logging
11
+ import threading
12
+ import time
13
+ from pathlib import Path
14
+ from typing import TYPE_CHECKING
15
+
16
+ from watchdog.events import FileSystemEvent, FileSystemEventHandler
17
+ from watchdog.observers import Observer
18
+
19
+ if TYPE_CHECKING:
20
+ from firecloud.node import Node
21
+
22
+ logger = logging.getLogger("firecloud.sync")
23
+
24
+ # Debounce window — how long (in seconds) to wait after the last filesystem
25
+ # event before processing the change. This handles editors that do
26
+ # write-to-temp-then-rename.
27
+ _DEBOUNCE_SECONDS = 0.5
28
+
29
+
30
+ class _SyncEventHandler(FileSystemEventHandler):
31
+ """Collects filesystem events and feeds them to the async sync loop."""
32
+
33
+ def __init__(self, sync: "FolderSync") -> None:
34
+ super().__init__()
35
+ self.sync = sync
36
+
37
+ def on_created(self, event: FileSystemEvent) -> None:
38
+ if not event.is_directory:
39
+ self.sync._schedule_event("created", event.src_path)
40
+
41
+ def on_modified(self, event: FileSystemEvent) -> None:
42
+ if not event.is_directory:
43
+ self.sync._schedule_event("modified", event.src_path)
44
+
45
+ def on_deleted(self, event: FileSystemEvent) -> None:
46
+ if not event.is_directory:
47
+ self.sync._schedule_event("deleted", event.src_path)
48
+
49
+ def on_moved(self, event: FileSystemEvent) -> None:
50
+ if not event.is_directory:
51
+ self.sync._schedule_event("deleted", event.src_path)
52
+ if hasattr(event, "dest_path"):
53
+ self.sync._schedule_event("created", event.dest_path)
54
+
55
+
56
+ class FolderSync:
57
+ """Watches a folder and syncs its contents through a FireCloud node.
58
+
59
+ - **Outbound:** local file changes (create / modify / delete) are uploaded
60
+ or tombstoned via the node.
61
+ - **Inbound:** new files in the manifest that are missing from the local
62
+ folder are downloaded periodically.
63
+ """
64
+
65
+ def __init__(self, node: "Node", folder: Path | str) -> None:
66
+ self.node = node
67
+ self.folder = Path(folder)
68
+ self.folder.mkdir(parents=True, exist_ok=True)
69
+
70
+ self._observer: Observer | None = None
71
+ self._running = False
72
+ self._incoming_task: asyncio.Task | None = None
73
+ self._debounce_task: asyncio.Task | None = None
74
+
75
+ # Pending events: path → (event_type, timestamp)
76
+ self._pending: dict[str, tuple[str, float]] = {}
77
+ self._pending_lock = threading.Lock()
78
+
79
+ # Track file_id ↔ filename mappings for delete propagation
80
+ self._name_to_id: dict[str, str] = {}
81
+ self._id_to_name: dict[str, str] = {}
82
+
83
+ # Files we are currently downloading — skip outbound re-upload
84
+ self._downloading: set[str] = set()
85
+
86
+ # ------------------------------------------------------------------
87
+ # Lifecycle
88
+ # ------------------------------------------------------------------
89
+
90
+ async def start(self) -> None:
91
+ """Start watching the folder for changes."""
92
+ if self._running:
93
+ return
94
+
95
+ # Seed name ↔ id mapping from existing manifest
96
+ self._rebuild_name_map()
97
+
98
+ self._running = True
99
+ self._observer = Observer()
100
+ handler = _SyncEventHandler(self)
101
+ self._observer.schedule(handler, str(self.folder), recursive=False)
102
+ self._observer.start()
103
+
104
+ # Background task to check for incoming files every 5 seconds
105
+ self._incoming_task = asyncio.create_task(self._incoming_loop())
106
+
107
+ # Background task to process debounced events
108
+ self._debounce_task = asyncio.create_task(self._debounce_loop())
109
+
110
+ logger.info(f"Folder sync started for {self.folder}")
111
+
112
+ async def stop(self) -> None:
113
+ """Stop watching the folder."""
114
+ if not self._running:
115
+ return
116
+ self._running = False
117
+
118
+ if self._observer:
119
+ self._observer.stop()
120
+ self._observer.join(timeout=2)
121
+ self._observer = None
122
+
123
+ for task in (self._incoming_task, self._debounce_task):
124
+ if task and not task.done():
125
+ task.cancel()
126
+ try:
127
+ await task
128
+ except asyncio.CancelledError:
129
+ pass
130
+
131
+ logger.info(f"Folder sync stopped for {self.folder}")
132
+
133
+ # ------------------------------------------------------------------
134
+ # Event scheduling (called from watchdog thread)
135
+ # ------------------------------------------------------------------
136
+
137
+ def _schedule_event(self, event_type: str, path: str) -> None:
138
+ """Record a filesystem event for debounced processing."""
139
+ # Skip files we are downloading ourselves
140
+ filename = Path(path).name
141
+ if filename in self._downloading:
142
+ return
143
+
144
+ with self._pending_lock:
145
+ self._pending[path] = (event_type, time.monotonic())
146
+
147
+ # ------------------------------------------------------------------
148
+ # Debounce loop (runs on asyncio loop)
149
+ # ------------------------------------------------------------------
150
+
151
+ async def _debounce_loop(self) -> None:
152
+ """Process filesystem events after the debounce window elapses."""
153
+ try:
154
+ while self._running:
155
+ await asyncio.sleep(_DEBOUNCE_SECONDS)
156
+ now = time.monotonic()
157
+
158
+ # Collect events that have settled
159
+ ready: list[tuple[str, str]] = []
160
+ with self._pending_lock:
161
+ for path, (event_type, ts) in list(self._pending.items()):
162
+ if now - ts >= _DEBOUNCE_SECONDS:
163
+ ready.append((path, event_type))
164
+ del self._pending[path]
165
+
166
+ for path, event_type in ready:
167
+ try:
168
+ await self._handle_event(event_type, path)
169
+ except Exception as exc:
170
+ logger.error(f"Sync event error for {path}: {exc}")
171
+ except asyncio.CancelledError:
172
+ pass
173
+
174
+ async def _handle_event(self, event_type: str, path: str) -> None:
175
+ """Process a single filesystem event."""
176
+ filepath = Path(path)
177
+ filename = filepath.name
178
+
179
+ if event_type in ("created", "modified"):
180
+ if filepath.is_file():
181
+ file_id = await self.node.upload(filepath)
182
+ self._name_to_id[filename] = file_id
183
+ self._id_to_name[file_id] = filename
184
+ logger.debug(f"Sync uploaded {filename} → {file_id}")
185
+
186
+ elif event_type == "deleted":
187
+ file_id = self._name_to_id.get(filename)
188
+ if file_id:
189
+ try:
190
+ await self.node.delete(file_id)
191
+ del self._name_to_id[filename]
192
+ del self._id_to_name[file_id]
193
+ logger.debug(f"Sync deleted {filename} ({file_id})")
194
+ except Exception:
195
+ pass
196
+
197
+ # ------------------------------------------------------------------
198
+ # Incoming file download loop
199
+ # ------------------------------------------------------------------
200
+
201
+ async def _incoming_loop(self) -> None:
202
+ """Periodically check the manifest for files missing from the folder."""
203
+ try:
204
+ while self._running:
205
+ await asyncio.sleep(5)
206
+ try:
207
+ await self._pull_incoming()
208
+ except Exception as exc:
209
+ logger.error(f"Incoming sync error: {exc}")
210
+ except asyncio.CancelledError:
211
+ pass
212
+
213
+ async def _pull_incoming(self) -> None:
214
+ """Download any manifest files that are not present locally or are newer on remote."""
215
+ # Group entries by filename and find the one with the highest Lamport timestamp
216
+ latest_entries = {}
217
+ for entry in self.node.manifest.list_files():
218
+ filename = entry.name
219
+ current = latest_entries.get(filename)
220
+ if current is None or entry.lamport_ts > current.lamport_ts:
221
+ latest_entries[filename] = entry
222
+
223
+ for filename, entry in latest_entries.items():
224
+ local_path = self.folder / filename
225
+
226
+ # If the file exists locally and we already have this file_id mapped, it is up to date
227
+ mapped_id = self._name_to_id.get(filename)
228
+ if local_path.exists() and mapped_id == entry.file_id:
229
+ continue
230
+
231
+ # If the file exists locally but corresponds to a different file_id
232
+ if local_path.exists() and mapped_id is not None:
233
+ # If the remote version is not newer than our locally mapped version, skip it
234
+ try:
235
+ local_entry = self.node.manifest.get_file(mapped_id)
236
+ if entry.lamport_ts <= local_entry.lamport_ts:
237
+ continue
238
+ except Exception:
239
+ pass
240
+
241
+ # Skip if we are currently downloading this file
242
+ if filename in self._downloading:
243
+ continue
244
+
245
+ # Already tracked by us (skip if it has been tombstoned / deleted)
246
+ if entry.file_id in self._id_to_name and entry.deleted:
247
+ continue
248
+
249
+ # Download from the network
250
+ try:
251
+ self._downloading.add(filename)
252
+ await self.node.download(entry.file_id, local_path)
253
+ self._name_to_id[filename] = entry.file_id
254
+ self._id_to_name[entry.file_id] = filename
255
+ logger.debug(f"Sync downloaded {filename} from network (latest file_id: {entry.file_id})")
256
+ except Exception as exc:
257
+ logger.error(f"Failed to download {filename}: {exc}")
258
+ finally:
259
+ self._downloading.discard(filename)
260
+
261
+ # ------------------------------------------------------------------
262
+ # Helpers
263
+ # ------------------------------------------------------------------
264
+
265
+ def _rebuild_name_map(self) -> None:
266
+ """Populate the name ↔ id mapping from the current manifest.
267
+
268
+ Only includes files that actually exist in the sync folder, so that
269
+ files uploaded by remote peers can be downloaded on first sync start.
270
+ """
271
+ self._name_to_id.clear()
272
+ self._id_to_name.clear()
273
+ for entry in self.node.manifest.list_files():
274
+ local_path = self.folder / entry.name
275
+ if local_path.exists():
276
+ self._name_to_id[entry.name] = entry.file_id
277
+ self._id_to_name[entry.file_id] = entry.name