hashserver 1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hash_file_response.py ADDED
@@ -0,0 +1,237 @@
1
+ import os
2
+ import stat
3
+ import time
4
+ import typing
5
+ from hashlib import sha3_256, sha256
6
+
7
+ import anyio
8
+
9
+ from starlette.background import BackgroundTask
10
+ from starlette.types import Receive, Scope, Send
11
+ from starlette.responses import FileResponse
12
+
13
+ HASH_ALGORITHMS = {
14
+ "sha3-256": sha3_256,
15
+ "sha-256": sha256,
16
+ }
17
+ DEFAULT_HASH_ALGORITHM = "sha-256"
18
+ _current_hash_algorithm = DEFAULT_HASH_ALGORITHM
19
+ _hash_constructor = HASH_ALGORITHMS[DEFAULT_HASH_ALGORITHM]
20
+
21
+
22
+ def set_hash_algorithm(algorithm: str) -> None:
23
+ global _current_hash_algorithm, _hash_constructor
24
+ try:
25
+ _hash_constructor = HASH_ALGORITHMS[algorithm]
26
+ except KeyError as exc:
27
+ raise ValueError(
28
+ f"Unsupported hash algorithm '{algorithm}'. "
29
+ f"Choose one of: {', '.join(HASH_ALGORITHMS)}"
30
+ ) from exc
31
+ _current_hash_algorithm = algorithm
32
+
33
+
34
+ def get_hash_algorithm() -> str:
35
+ return _current_hash_algorithm
36
+
37
+
38
+ def parse_checksum(checksum) -> str:
39
+ """Parses checksum and returns it as string.
40
+
41
+ Adapted from the Seamless source code (fair use)"""
42
+ if isinstance(checksum, bytes):
43
+ checksum = checksum.hex()
44
+ if isinstance(checksum, str):
45
+ if len(checksum) % 2:
46
+ raise ValueError("Wrong length")
47
+ checksum = bytes.fromhex(checksum)
48
+
49
+ if isinstance(checksum, bytes):
50
+ if len(checksum) != 32:
51
+ raise ValueError("Wrong length")
52
+ return checksum.hex()
53
+
54
+ if checksum is None:
55
+ return
56
+ raise TypeError(type(checksum))
57
+
58
+
59
+ class HashFileResponse(FileResponse):
60
+ """FileResponse that validates files against their checksum-derived filename."""
61
+
62
+ _PREFIX = False
63
+
64
+ lock_timeout = 120
65
+ chunk_size = 640 * 1024
66
+
67
+ def __init__(
68
+ self,
69
+ checksum: str,
70
+ directory: str,
71
+ status_code: int = 200,
72
+ headers: typing.Optional[typing.Mapping[str, str]] = None,
73
+ media_type: typing.Optional[str] = None,
74
+ background: typing.Optional[BackgroundTask] = None,
75
+ stat_result: typing.Optional[os.stat_result] = None,
76
+ method: typing.Optional[str] = None,
77
+ content_disposition_type: str = "attachment",
78
+ extra_dirs: typing.Optional[typing.List[str]] = None,
79
+ ) -> None:
80
+ filename = parse_checksum(checksum)
81
+ self.prefix = filename[:2]
82
+ stat_result = None
83
+ if self._PREFIX:
84
+ path = os.path.join(directory, self.prefix, filename)
85
+ else:
86
+ path = os.path.join(directory, filename)
87
+ super().__init__(
88
+ path=path,
89
+ status_code=status_code,
90
+ headers=headers,
91
+ media_type=media_type,
92
+ background=background,
93
+ filename=filename,
94
+ stat_result=stat_result,
95
+ method=method,
96
+ content_disposition_type=content_disposition_type,
97
+ )
98
+ self.directory = directory
99
+ self.extra_dirs = extra_dirs
100
+ extra_dirs_layout = {}
101
+ for extra_dir in extra_dirs:
102
+ prefix_file = os.path.join(extra_dir, ".HASHSERVER_PREFIX")
103
+ if os.path.exists(prefix_file):
104
+ layout = "prefix"
105
+ else:
106
+ layout = "flat"
107
+ extra_dirs_layout[extra_dir] = layout
108
+ self.extra_dirs_layout = extra_dirs_layout
109
+
110
+ async def refresh_stat_headers(self):
111
+ if self.extra_dirs and not await anyio.Path(self.path).exists():
112
+ for extra_dir in self.extra_dirs:
113
+ layout = self.extra_dirs_layout[extra_dir]
114
+ if layout == "prefix":
115
+ path0 = os.path.join(extra_dir, self.prefix, self.filename)
116
+ else:
117
+ path0 = os.path.join(extra_dir, self.filename)
118
+ if await anyio.Path(path0).exists():
119
+ self.path = path0
120
+ break
121
+
122
+ try:
123
+ stat_result = await anyio.to_thread.run_sync(os.stat, self.path)
124
+ del self.headers["content-length"]
125
+ del self.headers["last-modified"]
126
+ del self.headers["etag"]
127
+
128
+ self.set_stat_headers(stat_result)
129
+ except FileNotFoundError:
130
+ raise FileNotFoundError(
131
+ f"File at path {self.path} does not exist."
132
+ ) from None
133
+ else:
134
+ mode = stat_result.st_mode
135
+ if not stat.S_ISREG(mode):
136
+ raise RuntimeError(f"File at path {self.path} is not a file.")
137
+ return stat_result
138
+
139
+ async def _until_no_lock(self, lockpaths):
140
+ for lockpath in lockpaths:
141
+ while 1:
142
+ try:
143
+ lock_stat_result = await anyio.to_thread.run_sync(os.stat, lockpath)
144
+ except FileNotFoundError:
145
+ break
146
+ lock_mtime = lock_stat_result.st_mtime
147
+ if time.time() - lock_mtime > self.lock_timeout:
148
+ break
149
+ await anyio.sleep(1)
150
+
151
+ async def until_no_lock(self):
152
+ lockpaths = [os.path.join(self.directory, ".LOCK")]
153
+ if self.path is not None:
154
+ lockpaths.append(self.path + ".LOCK")
155
+ return await self._until_no_lock(lockpaths)
156
+
157
+ async def calculate_checksum(self):
158
+ """Return checksum for the configured algorithm."""
159
+ checksum = _hash_constructor()
160
+ async with await anyio.open_file(self.path, mode="rb") as file:
161
+ more_body = True
162
+ while more_body:
163
+ chunk = await file.read(self.chunk_size)
164
+ checksum.update(chunk)
165
+ more_body = len(chunk) == self.chunk_size
166
+
167
+ checksum = checksum.digest().hex()
168
+ return checksum
169
+
170
+ async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
171
+ if self.stat_result is None:
172
+ try:
173
+ stat_result = await self.refresh_stat_headers()
174
+ except FileNotFoundError:
175
+ await self.until_no_lock()
176
+ stat_result = await self.refresh_stat_headers()
177
+ self.stat_result = stat_result
178
+
179
+ checksum = await self.calculate_checksum()
180
+ if checksum != self.filename:
181
+ await self.until_no_lock()
182
+ stat_result = await self.refresh_stat_headers()
183
+ self.stat_result = stat_result
184
+ checksum2 = await self.calculate_checksum()
185
+ if checksum2 != self.filename:
186
+ raise RuntimeError(
187
+ f"File corruption: file at path {self.path} does not have the correct {_current_hash_algorithm} checksum."
188
+ )
189
+
190
+ await super().__call__(scope=scope, receive=receive, send=send)
191
+
192
+
193
+ class PrefixHashFileResponse(HashFileResponse):
194
+ """Same as HashFileResponse but files are stored under a two-character prefix.
195
+
196
+ File has the same name as checksum.
197
+ File is stored as $PREFIX/$CHECKSUM, where $PREFIX is the first two
198
+ characters of $CHECKSUM
199
+ """
200
+
201
+ _PREFIX = True
202
+
203
+ def __init__(
204
+ self,
205
+ checksum: str,
206
+ directory: str,
207
+ status_code: int = 200,
208
+ headers: typing.Optional[typing.Mapping[str, str]] = None,
209
+ media_type: typing.Optional[str] = None,
210
+ background: typing.Optional[BackgroundTask] = None,
211
+ stat_result: typing.Optional[os.stat_result] = None,
212
+ method: typing.Optional[str] = None,
213
+ content_disposition_type: str = "attachment",
214
+ extra_dirs: typing.Optional[typing.List[str]] = None,
215
+ ) -> None:
216
+
217
+ super().__init__(
218
+ checksum=checksum,
219
+ directory=directory,
220
+ status_code=status_code,
221
+ headers=headers,
222
+ media_type=media_type,
223
+ background=background,
224
+ stat_result=stat_result,
225
+ method=method,
226
+ content_disposition_type=content_disposition_type,
227
+ extra_dirs=extra_dirs,
228
+ )
229
+ prefix_file = os.path.join(directory, ".HASHSERVER_PREFIX")
230
+ with open(prefix_file, mode="wb") as f:
231
+ f.write(b"1\n")
232
+
233
+ async def until_no_lock(self):
234
+ lockpaths = [os.path.join(self.directory, self.prefix, ".LOCK")]
235
+ if self.path is not None:
236
+ lockpaths.append(self.path + ".LOCK")
237
+ return await self._until_no_lock(lockpaths)
@@ -0,0 +1,174 @@
1
+ Metadata-Version: 2.4
2
+ Name: hashserver
3
+ Version: 1.0
4
+ Summary: Simple FastAPI-based hash server
5
+ Author: Sjoerd de Vries
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE.txt
10
+ Requires-Dist: fastapi
11
+ Requires-Dist: uvicorn[standard]
12
+ Requires-Dist: typing-extensions
13
+ Dynamic: license-file
14
+
15
+ # Hashserver
16
+
17
+ A lightweight, content-addressed file server over HTTP.
18
+
19
+ Hashserver stores and serves opaque binary buffers keyed by their cryptographic checksum. You PUT a buffer with its checksum in the URL; you GET it back by the same checksum. There are no filenames, no directories, no metadata — just content and its hash.
20
+
21
+ The hash algorithm is configurable: SHA-256 (default) or SHA3-256.
22
+
23
+ ## Why content-addressed storage?
24
+
25
+ Content-addressed storage (CAS) is a well-established pattern used by Git, IPFS, Docker registries, and many other systems. Identifying data by its cryptographic hash gives you automatic deduplication, trivially verifiable integrity, and strong reproducibility guarantees.
26
+
27
+ Hashserver brings these benefits to any project that needs a simple HTTP-based buffer store. It is intentionally minimal: a single ASGI application backed by a directory of files, designed to be easy to deploy, easy to integrate, and easy to reason about.
28
+
29
+ ## Relationship to Seamless
30
+
31
+ Hashserver was originally developed as the buffer-serving component of [Seamless](https://github.com/sjdv1982/seamless), a framework for reproducible, reactive computational workflows. In Seamless, all data — inputs, source code, and results — is represented as a tree of checksums, and hashserver provides the storage layer that maps those checksums back to actual data.
32
+
33
+ However, **hashserver has no dependency on Seamless** and no knowledge of it. It is a generic content-addressed file server that is useful in any context where you need to store and retrieve buffers by hash — caching layers, artifact stores, reproducible pipelines, or your own CAS-backed application. It is published as an independent PyPI package for exactly this reason.
34
+
35
+ ## Features
36
+
37
+ - **Content-addressed**: buffers are stored and retrieved by their cryptographic checksum.
38
+ - **Configurable hash algorithm**: SHA-256 (default) or SHA3-256, selected at startup.
39
+ - **Integrity-verified reads**: every buffer is re-checksummed on GET to detect corruption.
40
+ - **Prefix directory layout**: by default, buffers are stored under a two-character prefix subdirectory (e.g. `ab/ab3f7c...`) to avoid filesystem performance problems with large flat directories. A flat layout is also supported.
41
+ - **Extra read-only directories**: additional buffer directories can be mounted as fallback read sources.
42
+ - **Promises**: a client can announce that a buffer will be uploaded soon via `PUT /promise/{checksum}`. Other clients reading that checksum will wait for the upload rather than getting a 404.
43
+ - **Concurrent-safe**: in-flight PUT requests are tracked so concurrent GETs and batch queries return consistent results. Lock files are respected for external writers.
44
+ - **Multiple instances**: several hashserver processes can safely share the same buffer directory.
45
+ - **Lightweight**: built on FastAPI/Starlette — no database, no external services.
46
+ - **Flexible deployment**: run as a CLI tool, under any ASGI server, or via Docker Compose.
47
+
48
+ ## Installation
49
+
50
+ ```bash
51
+ pip install hashserver
52
+ ```
53
+
54
+ Or with conda:
55
+
56
+ ```bash
57
+ mamba env create --file environment.yml
58
+ conda activate hashserver
59
+ ```
60
+
61
+ ## Quick start
62
+
63
+ Serve buffers from a local directory:
64
+
65
+ ```bash
66
+ hashserver ./my-buffers
67
+ ```
68
+
69
+ This starts the server under uvicorn on port 8000. Run `hashserver -h` for all options.
70
+
71
+ ### Storing and retrieving a buffer
72
+
73
+ ```bash
74
+ # Start a writable server
75
+ hashserver ./my-buffers --writable
76
+
77
+ # Compute the SHA-256 checksum and upload
78
+ CHECKSUM=$(python3 -c "
79
+ import hashlib, sys
80
+ print(hashlib.sha256(open(sys.argv[1],'rb').read()).hexdigest())
81
+ " myfile.bin)
82
+ curl -X PUT --data-binary @myfile.bin http://localhost:8000/$CHECKSUM
83
+
84
+ # Download
85
+ curl -O http://localhost:8000/$CHECKSUM
86
+ ```
87
+
88
+ To use SHA3-256 instead, start the server with `--hash-algorithm sha3-256` and hash your files with `hashlib.sha3_256`.
89
+
90
+ ## API
91
+
92
+ ### Retrieving buffers
93
+
94
+ **`GET /{checksum}`** — Retrieve a buffer by its hex checksum. The server verifies the checksum before sending the response. Returns the raw buffer (200), or 404 if not found.
95
+
96
+ ### Storing buffers
97
+
98
+ Requires `--writable`.
99
+
100
+ **`PUT /{checksum}`** — Upload a buffer. The request body is the raw data; the server verifies that its checksum matches the URL. Returns 200 on success, 201 if the buffer already existed, or 400 on checksum mismatch.
101
+
102
+ **`PUT /promise/{checksum}`** — Announce that a buffer will be uploaded soon. Returns 202 with the promise TTL. While a promise is active, GET requests for that checksum will wait rather than returning 404, and `/has` queries will report the checksum as present.
103
+
104
+ ### Querying availability
105
+
106
+ **`GET /has`** — Batch existence check. Send a JSON list of checksums in the request body. Returns a JSON list of booleans. Includes both on-disk buffers and active promises.
107
+
108
+ **`GET /has-now`** — Same as `/has`, but excludes promises — only reports buffers that are already on disk.
109
+
110
+ **`GET /buffer-length`** — Batch size query. Send a JSON list of checksums in the request body. Returns a JSON list of integers: the buffer size in bytes, or 0 if not present. Promised checksums are reported as `true`.
111
+
112
+ ### Health
113
+
114
+ **`GET /healthcheck`** — Returns "OK". Useful for load balancer probes.
115
+
116
+ ## Configuration
117
+
118
+ ### CLI flags
119
+
120
+ | Flag | Description | Default |
121
+ |------|-------------|---------|
122
+ | `directory` | Buffer storage directory (positional, required) | — |
123
+ | `--writable` | Enable PUT endpoints | off |
124
+ | `--hash-algorithm` | Hash algorithm: `sha3-256` or `sha-256` | `sha-256` |
125
+ | `--layout` | Directory layout: `prefix` or `flat` | `prefix` |
126
+ | `--extra-dirs` | Semicolon-separated list of extra read-only buffer directories | — |
127
+ | `--host` | Listen address | `127.0.0.1` |
128
+ | `--port` | Listen port | `8000` |
129
+ | `--port-range START END` | Pick a random free port in range (mutually exclusive with `--port`) | — |
130
+ | `--status-file` | JSON file for reporting server status | — |
131
+ | `--timeout` | Shut down after this many seconds of inactivity | — |
132
+
133
+ ### Environment variables
134
+
135
+ When running under an external ASGI server (e.g. `uvicorn hashserver:app`), configure via environment variables instead:
136
+
137
+ | Variable | Equivalent flag |
138
+ |----------|----------------|
139
+ | `HASHSERVER_DIRECTORY` | `directory` |
140
+ | `HASHSERVER_WRITABLE` | `--writable` (set to `1` or `true`) |
141
+ | `HASHSERVER_HASH_ALGORITHM` | `--hash-algorithm` |
142
+ | `HASHSERVER_LAYOUT` | `--layout` |
143
+ | `HASHSERVER_EXTRA_DIRS` | `--extra-dirs` |
144
+
145
+ ### Docker Compose
146
+
147
+ ```bash
148
+ export HASHSERVER_PORT=8000
149
+ export HASHSERVER_HOST=0.0.0.0
150
+ export HASHSERVER_DIRECTORY=./buffers
151
+ export HASHSERVER_WRITABLE=1
152
+ docker compose up -d
153
+ ```
154
+
155
+ Container user/group ID can be set with `HASHSERVER_USER_ID` and `HASHSERVER_GROUP_ID` (both default to 0).
156
+
157
+ ## Directory layouts
158
+
159
+ In **prefix** layout (the default), a buffer with checksum `ab3f7c...` is stored as `<directory>/ab/ab3f7c...`. A sentinel file `.HASHSERVER_PREFIX` is written to the directory. This avoids performance issues when storing large numbers of buffers.
160
+
161
+ In **flat** layout, the same buffer is stored as `<directory>/ab3f7c...`.
162
+
163
+ Extra directories auto-detect their layout by checking for the `.HASHSERVER_PREFIX` sentinel.
164
+
165
+ ## Running tests
166
+
167
+ ```bash
168
+ pip install requests
169
+ pytest tests/
170
+ ```
171
+
172
+ ## License
173
+
174
+ See [LICENSE.txt](LICENSE.txt).
@@ -0,0 +1,8 @@
1
+ hash_file_response.py,sha256=3H5snJYz8FWiIfIugI5i-zQPhgF0jUVjr2GDRrX7DX8,8247
2
+ hashserver.py,sha256=LDAYpZI7plcT6I5Lpe-nfFQwC3T54u-Z2-by2vPpBEY,26715
3
+ hashserver-1.0.dist-info/licenses/LICENSE.txt,sha256=aFUunT7WYX_fR7ryljRBKqoltt4dSYe-PB15Hz9GeyA,1117
4
+ hashserver-1.0.dist-info/METADATA,sha256=xc1WzqyX1Bb6Y7IlU9PIVUiLyz7_AYf4IPVtGVZTa3Y,7630
5
+ hashserver-1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
6
+ hashserver-1.0.dist-info/entry_points.txt,sha256=APGs23yr75suYAn6nQFAmAUL0GC1MARS6SFntYzhdvo,47
7
+ hashserver-1.0.dist-info/top_level.txt,sha256=oBgEDscAxsuQKVjeI30QOwRavz0iM5MLWh__ffn3mB0,30
8
+ hashserver-1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ hashserver = hashserver:main
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Author: Sjoerd de Vries, MBI platform.
4
+ Copyright (c) 2023-2026 CNRS.
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
@@ -0,0 +1,2 @@
1
+ hash_file_response
2
+ hashserver
hashserver.py ADDED
@@ -0,0 +1,865 @@
1
+ import os
2
+ import sys
3
+ import argparse
4
+ import random
5
+ import socket
6
+ import json
7
+ import asyncio
8
+ import logging
9
+ import aiofiles
10
+ import aiofiles.os
11
+ import aiofiles.tempfile
12
+ import contextlib
13
+ import copy
14
+ from dataclasses import dataclass
15
+ from typing import Iterable, List, Optional, Set, Union
16
+ from fastapi import FastAPI, Path, Body, Request
17
+ from fastapi.middleware.cors import CORSMiddleware
18
+ from fastapi.responses import Response, JSONResponse
19
+ from fastapi.exceptions import RequestValidationError
20
+ from fastapi.encoders import jsonable_encoder
21
+ from starlette.requests import ClientDisconnect
22
+
23
+ from functools import partial
24
+
25
+ from hash_file_response import (
26
+ parse_checksum,
27
+ HashFileResponse,
28
+ PrefixHashFileResponse,
29
+ HASH_ALGORITHMS,
30
+ DEFAULT_HASH_ALGORITHM,
31
+ set_hash_algorithm,
32
+ )
33
+
34
+ from typing_extensions import Annotated
35
+ from pydantic.functional_validators import BeforeValidator
36
+
37
+ import anyio
38
+ import pathlib
39
+ import time
40
+
41
+ Checksum = Annotated[str, BeforeValidator(parse_checksum)]
42
+
43
+ checksum_constructor = HASH_ALGORITHMS[DEFAULT_HASH_ALGORITHM]
44
+
45
+ STATUS_FILE_WAIT_TIMEOUT = 20.0
46
+ INACTIVITY_CHECK_INTERVAL = 1.0
47
+
48
+
49
+ INACTIVITY_STATE = {
50
+ "timeout": None,
51
+ "last_request": None,
52
+ "task": None,
53
+ "server": None,
54
+ }
55
+
56
+ aiofiles_chmod = aiofiles.os.wrap(os.chmod) # aiofiles.os lacks chmod
57
+
58
+
59
+ def calculate_checksum(buffer):
60
+ """Return checksum in the configured hash algorithm."""
61
+ return checksum_constructor(buffer).digest().hex()
62
+
63
+
64
+ def calculate_checksum_stream():
65
+ return checksum_constructor()
66
+
67
+
68
+ def wait_for_status_file(path: str, timeout: float = STATUS_FILE_WAIT_TIMEOUT):
69
+ deadline = time.monotonic() + timeout
70
+ while True:
71
+ try:
72
+ with open(path, "r", encoding="utf-8") as status_stream:
73
+ contents = json.load(status_stream)
74
+ break
75
+ except FileNotFoundError:
76
+ if time.monotonic() >= deadline:
77
+ print(
78
+ f"Status file '{path}' not found after {int(timeout)} seconds",
79
+ file=sys.stderr,
80
+ )
81
+ sys.exit(1)
82
+ time.sleep(0.1)
83
+ continue
84
+ except json.JSONDecodeError as exc:
85
+ print(
86
+ f"Status file '{path}' is not valid JSON: {exc}",
87
+ file=sys.stderr,
88
+ )
89
+ sys.exit(1)
90
+
91
+ if not isinstance(contents, dict):
92
+ print(
93
+ f"Status file '{path}' must contain a JSON object",
94
+ file=sys.stderr,
95
+ )
96
+ sys.exit(1)
97
+
98
+ return contents
99
+
100
+
101
+ class StatusFileTracker:
102
+ def __init__(self, path: str, base_contents: dict, port: int):
103
+ self.path = path
104
+ self._base_contents = dict(base_contents)
105
+ self.port = port
106
+ self.running_written = False
107
+
108
+ def _write(self, payload: dict):
109
+ tmp_path = f"{self.path}.tmp"
110
+ with open(tmp_path, "w", encoding="utf-8") as status_stream:
111
+ json.dump(payload, status_stream)
112
+ status_stream.write("\n")
113
+ os.replace(tmp_path, self.path)
114
+
115
+ def write_running(self):
116
+ payload = dict(self._base_contents)
117
+ payload["port"] = self.port
118
+ payload["status"] = "running"
119
+ self._write(payload)
120
+ self._base_contents = payload
121
+ self.running_written = True
122
+
123
+ def write_failed(self):
124
+ payload = dict(self._base_contents)
125
+ payload["status"] = "failed"
126
+ self._write(payload)
127
+
128
+
129
+ def raise_startup_error(exc: BaseException):
130
+ if status_tracker and not status_tracker.running_written:
131
+ status_tracker.write_failed()
132
+ raise exc
133
+
134
+
135
+ def configure_hash_algorithm(algorithm: str):
136
+ global checksum_constructor
137
+ try:
138
+ checksum_constructor = HASH_ALGORITHMS[algorithm]
139
+ except KeyError:
140
+ raise_startup_error(
141
+ RuntimeError(
142
+ f"--hash-algorithm must be one of: {', '.join(HASH_ALGORITHMS.keys())}"
143
+ )
144
+ )
145
+ set_hash_algorithm(algorithm)
146
+
147
+
148
+ def configure_lock_timeout(lock_timeout_seconds: float):
149
+ if lock_timeout_seconds <= 0:
150
+ raise_startup_error(RuntimeError("--lock-timeout must be a positive number"))
151
+ HashFileResponse.lock_timeout = lock_timeout_seconds
152
+ PrefixHashFileResponse.lock_timeout = lock_timeout_seconds
153
+
154
+
155
+ def setup_inactivity_timeout(timeout_seconds: float, server):
156
+ INACTIVITY_STATE["timeout"] = timeout_seconds
157
+ INACTIVITY_STATE["server"] = server
158
+
159
+ async def monitor_inactivity():
160
+ try:
161
+ while True:
162
+ await asyncio.sleep(INACTIVITY_CHECK_INTERVAL)
163
+ last_request = INACTIVITY_STATE.get("last_request")
164
+ if last_request is None:
165
+ continue
166
+ if time.monotonic() - last_request >= timeout_seconds:
167
+ server.should_exit = True
168
+ break
169
+ except asyncio.CancelledError:
170
+ raise
171
+
172
+ async def start_monitor():
173
+ INACTIVITY_STATE["last_request"] = time.monotonic()
174
+ loop = asyncio.get_running_loop()
175
+ INACTIVITY_STATE["task"] = loop.create_task(monitor_inactivity())
176
+
177
+ async def stop_monitor():
178
+ task = INACTIVITY_STATE.get("task")
179
+ if task:
180
+ task.cancel()
181
+ with contextlib.suppress(asyncio.CancelledError):
182
+ await task
183
+ INACTIVITY_STATE["task"] = None
184
+ INACTIVITY_STATE["last_request"] = None
185
+ INACTIVITY_STATE["server"] = None
186
+ INACTIVITY_STATE["timeout"] = None
187
+
188
+ app.add_event_handler("startup", start_monitor)
189
+ app.add_event_handler("shutdown", stop_monitor)
190
+
191
+
192
+ def pick_random_free_port(host: str, start: int, end: int) -> int:
193
+ if start < 0 or end > 65535:
194
+ raise RuntimeError("--port-range values must be between 0 and 65535")
195
+ if start > end:
196
+ raise RuntimeError("--port-range START must be less than or equal to END")
197
+
198
+ span = end - start + 1
199
+ attempted = set()
200
+ while len(attempted) < span:
201
+ port = random.randint(start, end)
202
+ if port in attempted:
203
+ continue
204
+ attempted.add(port)
205
+ try:
206
+ with socket.create_server((host, port), reuse_port=False):
207
+ pass
208
+ except OSError:
209
+ continue
210
+ return port
211
+
212
+ raise RuntimeError(f"No free port available in range {start}-{end}")
213
+
214
+
215
+ DEFAULT_LOCK_TIMEOUT = 120.0
216
+ CHUNK_SIZE = 640 * 1024 # for now, hardcoded
217
+ PROMISE_TTL_SECONDS = 10 * 60.0
218
+
219
+ env = os.environ
220
+ as_commandline_tool = True
221
+ status_tracker = None
222
+ status_file_path = None
223
+ status_file_contents = None
224
+ timeout_seconds = None
225
+
226
+ if "HASHSERVER_DIRECTORY" in os.environ:
227
+ directory = os.environ["HASHSERVER_DIRECTORY"]
228
+ writable = False
229
+ if "HASHSERVER_WRITABLE" in os.environ:
230
+ env_writable = os.environ["HASHSERVER_WRITABLE"]
231
+ assert env_writable.lower() in ("true", "false", "0", "1", ""), env_writable
232
+ if env_writable.lower() in ("true", "1"):
233
+ writable = True
234
+ as_commandline_tool = False
235
+
236
+ extra_dirs: list[str] = []
237
+ extra_dirs0 = os.environ.get("HASHSERVER_EXTRA_DIRS")
238
+ if extra_dirs0:
239
+
240
+ def _filt(d):
241
+ d = d.strip()
242
+ if d == '""' or d == "''":
243
+ return ""
244
+
245
+ extra_dirs00 = [_filt(d) for d in extra_dirs0.split(";")]
246
+ extra_dirs = [d for d in extra_dirs00 if d]
247
+
248
+ layout = os.environ.get("HASHSERVER_LAYOUT", "prefix")
249
+ status_file_path = None
250
+ status_file_contents = None
251
+ timeout_seconds = None
252
+ algorithm = os.environ.get("HASHSERVER_HASH_ALGORITHM", DEFAULT_HASH_ALGORITHM)
253
+ configure_hash_algorithm(algorithm)
254
+ lock_timeout = os.environ.get("HASHSERVER_LOCK_TIMEOUT")
255
+ if lock_timeout is not None:
256
+ try:
257
+ configure_lock_timeout(float(lock_timeout))
258
+ except ValueError:
259
+ raise_startup_error(
260
+ RuntimeError("HASHSERVER_LOCK_TIMEOUT must be a number")
261
+ )
262
+
263
+ else:
264
+ if (
265
+ len(sys.argv)
266
+ and sys.argv[0].find("uvicorn") > -1
267
+ and not os.path.isdir(sys.argv[0])
268
+ ):
269
+ print(
270
+ "Running hashserver under uvicorn CLI requires at least HASHSERVER_DIRECTORY to be defined",
271
+ file=sys.stderr,
272
+ )
273
+ exit(1)
274
+ parser = argparse.ArgumentParser()
275
+ parser.add_argument(
276
+ "directory",
277
+ help="""Directory where buffers are located.
278
+
279
+ Buffers have the same file name as their checksum (sha3-256 by default).""",
280
+ )
281
+ parser.add_argument(
282
+ "--extra-dirs",
283
+ help="""Extra directories where read-only buffers are located.
284
+
285
+ This must be a list of directories separated by semi-colons (;).
286
+ If not specified, this argument is read from HASHSERVER_EXTRA_DIRS, if present""",
287
+ )
288
+
289
+ parser.add_argument(
290
+ "--writable",
291
+ action="store_true",
292
+ help="Allow HTTP PUT requests",
293
+ )
294
+
295
+ port_group = parser.add_mutually_exclusive_group()
296
+ port_group.add_argument(
297
+ "--port",
298
+ type=int,
299
+ help="Network port",
300
+ )
301
+ port_group.add_argument(
302
+ "--port-range",
303
+ type=int,
304
+ nargs=2,
305
+ metavar=("START", "END"),
306
+ help="Inclusive port range to select a random free port from",
307
+ )
308
+
309
+ parser.add_argument(
310
+ "--host",
311
+ type=str,
312
+ help="Network host",
313
+ default="127.0.0.1",
314
+ )
315
+
316
+ parser.add_argument(
317
+ "--layout",
318
+ type=str,
319
+ help="""Directory layout.
320
+ One of:
321
+ - "flat".
322
+ A buffer with checksum CS is stored as file "$DIRECTORY/$CS".
323
+
324
+ - "prefix".
325
+ A buffer with checksum CS is stored as file "$DIRECTORY/$PREFIX/$CS",
326
+ where PREFIX is the first two characters of CS.
327
+
328
+ """,
329
+ default="prefix",
330
+ )
331
+
332
+ parser.add_argument(
333
+ "--hash-algorithm",
334
+ type=str,
335
+ choices=tuple(HASH_ALGORITHMS.keys()),
336
+ default=DEFAULT_HASH_ALGORITHM,
337
+ help="Hash algorithm used for checksum calculations (default: %(default)s)",
338
+ )
339
+ parser.add_argument(
340
+ "--lock-timeout",
341
+ type=float,
342
+ default=DEFAULT_LOCK_TIMEOUT,
343
+ help="Wait this many seconds for stale lock files (default: %(default)s)",
344
+ )
345
+
346
+ parser.add_argument(
347
+ "--status-file",
348
+ type=str,
349
+ help="JSON file used to report server status",
350
+ )
351
+
352
+ parser.add_argument(
353
+ "--timeout",
354
+ type=float,
355
+ help="Stop the server after this many seconds of inactivity",
356
+ )
357
+
358
+ args = parser.parse_args()
359
+ directory = args.directory
360
+ writable = args.writable
361
+ extra_dirs = args.extra_dirs
362
+ configure_hash_algorithm(args.hash_algorithm)
363
+ configure_lock_timeout(args.lock_timeout)
364
+ status_file_path = args.status_file
365
+ timeout_seconds = args.timeout
366
+ if status_file_path:
367
+ status_file_contents = wait_for_status_file(status_file_path)
368
+ status_tracker = StatusFileTracker(
369
+ status_file_path, status_file_contents, args.port
370
+ )
371
+ if timeout_seconds is not None and timeout_seconds <= 0:
372
+ raise_startup_error(RuntimeError("--timeout must be a positive number"))
373
+ if not extra_dirs:
374
+ extra_dirs = os.environ.get("HASHSERVER_EXTRA_DIRS")
375
+ if extra_dirs:
376
+ extra_dirs = [d.strip() for d in extra_dirs.split(";")]
377
+ else:
378
+ extra_dirs = []
379
+ layout = args.layout
380
+ if args.port_range:
381
+ start, end = args.port_range
382
+ try:
383
+ selected_port = pick_random_free_port(args.host, start, end)
384
+ except BaseException as exc:
385
+ raise_startup_error(exc)
386
+ else:
387
+ selected_port = args.port if args.port is not None else 8000
388
+ args.port = selected_port
389
+ if status_tracker:
390
+ status_tracker.port = selected_port
391
+
392
+
393
+ if not os.path.exists(directory):
394
+ raise_startup_error(FileExistsError(f"Directory '{directory}' does not exist"))
395
+ if not os.path.isdir(directory):
396
+ raise_startup_error(FileExistsError(f"Directory '{directory}' is not a directory"))
397
+
398
+ try:
399
+ os.chmod(directory, 0o3775)
400
+ except Exception:
401
+ pass
402
+
403
+ if layout not in ("flat", "prefix"):
404
+ raise_startup_error(RuntimeError("Layout must be 'flat' or 'prefix'"))
405
+
406
+ if layout == "prefix":
407
+ prefix_file = os.path.join(directory, ".HASHSERVER_PREFIX")
408
+ try:
409
+ if not os.path.exists(prefix_file):
410
+ with open(prefix_file, "wb") as f:
411
+ f.write(b"1\n")
412
+ except Exception:
413
+ pass
414
+
415
+ app = FastAPI()
416
+ LOGGER = logging.getLogger("hashserver")
417
+
418
+
419
+ @app.exception_handler(RequestValidationError)
420
+ async def validation_exception_handler(request, exc):
421
+ inner_exc = exc.args[0][0]
422
+ inner_exc = jsonable_encoder(inner_exc)
423
+ inner_exc.pop("ctx", None)
424
+ return JSONResponse(
425
+ status_code=400,
426
+ content={"message": "Invalid data", "exception": inner_exc},
427
+ )
428
+
429
+
430
+ @app.exception_handler(FileNotFoundError)
431
+ async def filenotfound_exception_handler(request, exc):
432
+ return Response(status_code=404, content="Not found")
433
+
434
+
435
+ @app.exception_handler(RuntimeError)
436
+ async def runtime_exception_handler(request, exc):
437
+ return JSONResponse(
438
+ status_code=400,
439
+ content={"message": f"{exc}"},
440
+ )
441
+
442
+
443
+ @app.middleware("http")
444
+ async def record_last_request(request: Request, call_next):
445
+ if INACTIVITY_STATE["timeout"] is not None:
446
+ INACTIVITY_STATE["last_request"] = time.monotonic()
447
+ response = await call_next(request)
448
+ if INACTIVITY_STATE["timeout"] is not None:
449
+ INACTIVITY_STATE["last_request"] = time.monotonic()
450
+ return response
451
+
452
+
453
+ @app.get("/buffer-length")
454
+ async def buffer_length(checksums: Annotated[List[Checksum], Body()]) -> JSONResponse:
455
+ checksums2 = [parse_checksum(checksum) for checksum in checksums]
456
+ await _wait_for_current_put_requests(checksums2)
457
+ curr_results = [0] * len(checksums)
458
+
459
+ async def stat_all(paths):
460
+ futures = []
461
+ for _, path in paths:
462
+ fut = anyio.Path(path).stat()
463
+ futures.append(fut)
464
+ result0 = await asyncio.gather(*futures, return_exceptions=True)
465
+ for (nr, path), stat in zip(paths, result0):
466
+ if isinstance(stat, Exception):
467
+ continue
468
+ curr_results[nr] = stat.st_size
469
+
470
+ paths = []
471
+ for nr, checksum in enumerate(checksums2):
472
+ assert isinstance(checksum, str)
473
+ if layout == "prefix":
474
+ prefix = checksum[:2]
475
+ path = os.path.join(directory, prefix, checksum)
476
+ else:
477
+ path = os.path.join(directory, checksum)
478
+ paths.append((nr, path))
479
+
480
+ await stat_all(paths)
481
+
482
+ for extra_dir in extra_dirs:
483
+ for nr, checksum in enumerate(checksums2):
484
+ if curr_results[nr]:
485
+ continue
486
+ path = os.path.join(extra_dir, checksum)
487
+ paths.append((nr, path))
488
+ if not len(paths):
489
+ break
490
+ await stat_all(paths)
491
+
492
+ promised = await _promise_registry.promised_indices(checksums2)
493
+ for idx in promised:
494
+ curr_results[idx] = True
495
+
496
+ return curr_results
497
+
498
+
499
+ async def _has(checksums: List[Checksum], include_promises: bool) -> List[bool]:
500
+ checksums2 = [parse_checksum(checksum) for checksum in checksums]
501
+ curr_results = [False] * len(checksums)
502
+
503
+ # Flag any in-flight uploads immediately.
504
+ for idx, checksum in enumerate(checksums2):
505
+ if checksum in _current_put_requests:
506
+ curr_results[idx] = True
507
+
508
+ async def exists_all(paths):
509
+ futures = []
510
+ for _, path in paths:
511
+ fut = anyio.Path(path).exists()
512
+ futures.append(fut)
513
+ result0 = await asyncio.gather(*futures, return_exceptions=True)
514
+ for (nr, path), exists in zip(paths, result0):
515
+ if isinstance(exists, Exception):
516
+ continue
517
+ if exists:
518
+ curr_results[nr] = True
519
+
520
+ paths = []
521
+ for nr, checksum in enumerate(checksums2):
522
+ assert isinstance(checksum, str)
523
+ if curr_results[nr]:
524
+ continue
525
+ if layout == "prefix":
526
+ prefix = checksum[:2]
527
+ path = os.path.join(directory, prefix, checksum)
528
+ else:
529
+ path = os.path.join(directory, checksum)
530
+ paths.append((nr, path))
531
+
532
+ if paths:
533
+ await exists_all(paths)
534
+
535
+ for extra_dir in extra_dirs:
536
+ paths = []
537
+ for nr, checksum in enumerate(checksums2):
538
+ if curr_results[nr]:
539
+ continue
540
+ path = os.path.join(extra_dir, checksum)
541
+ paths.append((nr, path))
542
+ if not paths:
543
+ break
544
+ await exists_all(paths)
545
+
546
+ if include_promises:
547
+ promised = await _promise_registry.promised_indices(checksums2)
548
+ for idx in promised:
549
+ curr_results[idx] = True
550
+
551
+ return curr_results
552
+
553
+
554
+ @app.get("/has")
555
+ async def has(checksums: Annotated[List[Checksum], Body()]) -> JSONResponse:
556
+ return await _has(checksums, include_promises=True)
557
+
558
+
559
+ @app.get("/has-now")
560
+ async def has(checksums: Annotated[List[Checksum], Body()]) -> JSONResponse:
561
+ return await _has(checksums, include_promises=False)
562
+
563
+
564
+ class PromiseAwareResponseMixin:
565
+ def __init__(self, *, checksum: str, **kwargs):
566
+ self._promise_checksum = checksum
567
+ super().__init__(checksum=checksum, **kwargs)
568
+
569
+ async def __call__(self, scope, receive, send):
570
+ while True:
571
+ try:
572
+ await super().__call__(scope, receive, send)
573
+ return
574
+ except FileNotFoundError:
575
+ should_retry = await _promise_registry.wait_for(self._promise_checksum)
576
+ if not should_retry:
577
+ raise
578
+
579
+
580
+ class PromiseAwareHashFileResponse(PromiseAwareResponseMixin, HashFileResponse):
581
+ pass
582
+
583
+
584
+ class PromiseAwarePrefixHashFileResponse(
585
+ PromiseAwareResponseMixin, PrefixHashFileResponse
586
+ ):
587
+ pass
588
+
589
+
590
+ _response_classes_get_file = {
591
+ "flat": PromiseAwareHashFileResponse,
592
+ "prefix": PromiseAwarePrefixHashFileResponse,
593
+ }
594
+
595
+
596
+ @app.get("/healthcheck")
597
+ async def healthcheck() -> Response:
598
+ return Response(content="OK")
599
+
600
+
601
+ @app.get("/{checksum}")
602
+ async def get_file(checksum: Annotated[Checksum, Path()]) -> HashFileResponse:
603
+ checksum2 = parse_checksum(checksum)
604
+ LOGGER.info("GET %s", checksum2)
605
+ await _wait_for_current_put_requests((checksum2,))
606
+ ResponseClass = _response_classes_get_file[layout]
607
+ response = ResponseClass(
608
+ directory=directory, checksum=checksum2, extra_dirs=extra_dirs
609
+ )
610
+ return response
611
+
612
+
613
+ async def promise(checksum: Annotated[Checksum, Path()]) -> JSONResponse:
614
+ checksum2 = parse_checksum(checksum)
615
+ await _promise_registry.add(checksum2)
616
+ return JSONResponse(
617
+ status_code=202,
618
+ content={"checksum": checksum2, "expires_in": PROMISE_TTL_SECONDS},
619
+ )
620
+
621
+
622
+ _current_put_requests: set[str] = set()
623
+ _current_put_condition = asyncio.Condition()
624
+
625
+
626
+ @dataclass
627
+ class _PromiseEntry:
628
+ event: asyncio.Event
629
+ expires_at: float
630
+
631
+
632
+ class PromiseRegistry:
633
+ def __init__(self, ttl_seconds: float = PROMISE_TTL_SECONDS):
634
+ self._ttl_seconds = ttl_seconds
635
+ self._promises: dict[str, _PromiseEntry] = {}
636
+ self._lock = asyncio.Lock()
637
+
638
+ def _cleanup_locked(self, now: Optional[float] = None) -> None:
639
+ if now is None:
640
+ now = time.monotonic()
641
+ expired = [
642
+ cs for cs, entry in self._promises.items() if entry.expires_at <= now
643
+ ]
644
+ for checksum in expired:
645
+ self._promises.pop(checksum, None)
646
+
647
+ async def add(self, checksum: str) -> float:
648
+ now = time.monotonic()
649
+ expires_at = now + self._ttl_seconds
650
+ async with self._lock:
651
+ self._cleanup_locked(now)
652
+ entry = self._promises.get(checksum)
653
+ if entry is None:
654
+ entry = _PromiseEntry(asyncio.Event(), expires_at)
655
+ self._promises[checksum] = entry
656
+ else:
657
+ entry.expires_at = expires_at
658
+ return expires_at
659
+
660
+ async def resolve(self, checksum: str) -> None:
661
+ async with self._lock:
662
+ entry = self._promises.pop(checksum, None)
663
+ if entry:
664
+ entry.event.set()
665
+
666
+ async def promised_indices(self, checksums: List[str]) -> Set[int]:
667
+ async with self._lock:
668
+ self._cleanup_locked()
669
+ promised = {idx for idx, cs in enumerate(checksums) if cs in self._promises}
670
+ return promised
671
+
672
+ async def wait_for(self, checksum: str) -> bool:
673
+ while True:
674
+ async with self._lock:
675
+ self._cleanup_locked()
676
+ entry = self._promises.get(checksum)
677
+ if entry is None:
678
+ return False
679
+ timeout = entry.expires_at - time.monotonic()
680
+ if timeout <= 0:
681
+ self._promises.pop(checksum, None)
682
+ return False
683
+ event = entry.event
684
+ try:
685
+ await asyncio.wait_for(event.wait(), timeout)
686
+ return True
687
+ except asyncio.TimeoutError:
688
+ async with self._lock:
689
+ current = self._promises.get(checksum)
690
+ if current is not entry:
691
+ continue
692
+ remaining = current.expires_at - time.monotonic()
693
+ if remaining <= 0:
694
+ self._promises.pop(checksum, None)
695
+ return False
696
+ continue
697
+
698
+
699
+ _promise_registry = PromiseRegistry()
700
+
701
+
702
+ async def _wait_for_current_put_requests(checksums: Iterable[str]) -> None:
703
+ if isinstance(checksums, (str, bytes)):
704
+ checksum_set = {checksums}
705
+ else:
706
+ checksum_set = set(checksums)
707
+ async with _current_put_condition:
708
+ while _current_put_requests.intersection(checksum_set):
709
+ await _current_put_condition.wait()
710
+
711
+
712
+ async def put_file(checksum: Annotated[Checksum, Path()], rq: Request) -> Response:
713
+
714
+ checksum_str = parse_checksum(checksum)
715
+ LOGGER.info("PUT %s start", checksum_str)
716
+
717
+ if layout == "prefix":
718
+ prefix = checksum_str[:2]
719
+ target_dir = os.path.join(directory, prefix)
720
+ else:
721
+ target_dir = directory
722
+ path = os.path.join(target_dir, checksum_str)
723
+
724
+ if layout == "prefix":
725
+ target_directory = anyio.Path(target_dir)
726
+ if not await target_directory.exists():
727
+ await target_directory.mkdir(exist_ok=True)
728
+ await aiofiles_chmod(target_dir, 0o3775)
729
+ if await aiofiles.ospath.exists(path):
730
+ LOGGER.info("PUT %s already exists", checksum_str)
731
+ await aiofiles_chmod(path, 0o444)
732
+ await _promise_registry.resolve(checksum_str)
733
+ return Response(status_code=201)
734
+
735
+ ok = False
736
+ added_to_put_requests = False
737
+ cs_stream = calculate_checksum_stream()
738
+ temp_path = None
739
+ buffer_checksum = None
740
+ try:
741
+ async with _current_put_condition:
742
+ if checksum_str in _current_put_requests:
743
+ LOGGER.info("PUT %s already in progress", checksum_str)
744
+ return Response(status_code=202)
745
+ _current_put_requests.add(checksum_str)
746
+ added_to_put_requests = True
747
+ async with aiofiles.tempfile.NamedTemporaryFile(
748
+ dir=target_dir,
749
+ prefix=checksum_str + "-",
750
+ delete=False,
751
+ ) as file:
752
+ async for chunk in rq.stream():
753
+ cs_stream.update(chunk)
754
+ await file.write(chunk)
755
+ buffer_checksum = cs_stream.hexdigest()
756
+ temp_path = file.name
757
+ if buffer_checksum != checksum_str:
758
+ LOGGER.warning("PUT %s incorrect checksum", checksum_str)
759
+ return Response(status_code=400, content="Incorrect checksum")
760
+ if not await aiofiles.ospath.exists(path):
761
+ try:
762
+ await aiofiles.os.replace(temp_path, path)
763
+ except Exception:
764
+ if not await aiofiles.ospath.exists(path):
765
+ raise
766
+ ok = True
767
+ try:
768
+ await aiofiles_chmod(path, 0o444)
769
+ except Exception:
770
+ pass
771
+
772
+ except ClientDisconnect:
773
+ LOGGER.warning("PUT %s client disconnected", checksum_str)
774
+ return Response(status_code=400)
775
+
776
+ finally:
777
+ if added_to_put_requests:
778
+ async with _current_put_condition:
779
+ _current_put_requests.remove(checksum_str)
780
+ _current_put_condition.notify_all()
781
+ if temp_path is not None:
782
+ try:
783
+ await aiofiles.os.unlink(temp_path)
784
+ except FileNotFoundError:
785
+ pass
786
+ except Exception:
787
+ pass
788
+ if added_to_put_requests and not ok:
789
+ try:
790
+ pathlib.Path(path).unlink()
791
+ except FileNotFoundError:
792
+ pass
793
+
794
+ if ok:
795
+ LOGGER.info("PUT %s completed", checksum_str)
796
+ await _promise_registry.resolve(checksum_str)
797
+ return Response(content="OK")
798
+
799
+
800
+ if writable:
801
+ put_file = app.put("/{checksum}")(put_file)
802
+ promise = app.put("/promise/{checksum}")(promise)
803
+
804
+ app.add_middleware(
805
+ CORSMiddleware,
806
+ allow_origins=["*"],
807
+ allow_methods=["*"],
808
+ allow_headers=["*"],
809
+ )
810
+
811
+
812
+ def main():
813
+ """Console-script shim; server launch happens during module import."""
814
+ return 0
815
+
816
+
817
+ def _timestamped_log_config():
818
+ try:
819
+ from uvicorn.config import LOGGING_CONFIG
820
+ except Exception: # pragma: no cover - uvicorn import guard
821
+ return None
822
+
823
+ log_config = copy.deepcopy(LOGGING_CONFIG)
824
+ formatters = log_config.get("formatters", {})
825
+ for name in ("default", "access"):
826
+ formatter = formatters.get(name)
827
+ if not formatter:
828
+ continue
829
+ fmt = formatter.get("fmt")
830
+ if fmt:
831
+ formatter["fmt"] = f"%(asctime)s {fmt}"
832
+ else:
833
+ formatter["fmt"] = "%(asctime)s %(message)s"
834
+ return log_config
835
+
836
+
837
+ if as_commandline_tool:
838
+ import uvicorn
839
+
840
+ log_config = _timestamped_log_config()
841
+ config_kwargs = dict(app=app, port=args.port, host=args.host)
842
+ if log_config is not None:
843
+ config_kwargs["log_config"] = log_config
844
+ config = uvicorn.Config(**config_kwargs)
845
+ server = uvicorn.Server(config)
846
+
847
+ if status_tracker:
848
+
849
+ @app.on_event("startup")
850
+ async def _hashserver_status_file_running():
851
+ await anyio.to_thread.run_sync(status_tracker.write_running)
852
+
853
+ if timeout_seconds is not None:
854
+ setup_inactivity_timeout(timeout_seconds, server)
855
+
856
+ print("OK")
857
+ try:
858
+ server.run()
859
+ except BaseException:
860
+ if status_tracker and not status_tracker.running_written:
861
+ status_tracker.write_failed()
862
+ raise
863
+ else:
864
+ # uvicorn (or some other ASGI launcher) will take care of it
865
+ pass