nab-python 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nab_python/__init__.py +1 -0
- nab_python/_build/__init__.py +1 -0
- nab_python/_build/env.py +364 -0
- nab_python/_build/errors.py +17 -0
- nab_python/_build/runner.py +254 -0
- nab_python/_lockfile/__init__.py +1 -0
- nab_python/_lockfile/builder.py +339 -0
- nab_python/_lockfile/disjointness.py +207 -0
- nab_python/_lockfile/pylock.py +323 -0
- nab_python/_lockfile/requirements.py +121 -0
- nab_python/_packaging_provider.py +98 -0
- nab_python/_provider/__init__.py +1 -0
- nab_python/_provider/build_remote.py +95 -0
- nab_python/_provider/extras.py +231 -0
- nab_python/_provider/listing.py +442 -0
- nab_python/_provider/lookahead.py +156 -0
- nab_python/_provider/metadata_resolver.py +450 -0
- nab_python/_provider/priority.py +174 -0
- nab_python/_provider/sources.py +215 -0
- nab_python/_testing/__init__.py +1 -0
- nab_python/_testing/coordinator_fake.py +240 -0
- nab_python/_vcs_admission.py +209 -0
- nab_python/_vendor/__init__.py +6 -0
- nab_python/_vendor/packaging/LICENSE +3 -0
- nab_python/_vendor/packaging/LICENSE.APACHE +177 -0
- nab_python/_vendor/packaging/LICENSE.BSD +23 -0
- nab_python/_vendor/packaging/PROVENANCE.md +73 -0
- nab_python/_vendor/packaging/__init__.py +15 -0
- nab_python/_vendor/packaging/_elffile.py +108 -0
- nab_python/_vendor/packaging/_manylinux.py +265 -0
- nab_python/_vendor/packaging/_musllinux.py +88 -0
- nab_python/_vendor/packaging/_parser.py +394 -0
- nab_python/_vendor/packaging/_structures.py +33 -0
- nab_python/_vendor/packaging/_tokenizer.py +196 -0
- nab_python/_vendor/packaging/dependency_groups.py +302 -0
- nab_python/_vendor/packaging/direct_url.py +325 -0
- nab_python/_vendor/packaging/errors.py +94 -0
- nab_python/_vendor/packaging/licenses/__init__.py +186 -0
- nab_python/_vendor/packaging/licenses/_spdx.py +799 -0
- nab_python/_vendor/packaging/markers.py +506 -0
- nab_python/_vendor/packaging/metadata.py +964 -0
- nab_python/_vendor/packaging/py.typed +0 -0
- nab_python/_vendor/packaging/pylock.py +910 -0
- nab_python/_vendor/packaging/ranges.py +1803 -0
- nab_python/_vendor/packaging/requirements.py +132 -0
- nab_python/_vendor/packaging/specifiers.py +1141 -0
- nab_python/_vendor/packaging/tags.py +929 -0
- nab_python/_vendor/packaging/utils.py +296 -0
- nab_python/_vendor/packaging/version.py +1230 -0
- nab_python/build_backend.py +184 -0
- nab_python/config.py +805 -0
- nab_python/download.py +170 -0
- nab_python/fetch.py +827 -0
- nab_python/lockfile.py +238 -0
- nab_python/metadata.py +145 -0
- nab_python/provider.py +1235 -0
- nab_python/py.typed +0 -0
- nab_python/requirements_file.py +180 -0
- nab_python/resolve.py +497 -0
- nab_python/universal/__init__.py +1 -0
- nab_python/universal/matrix.py +235 -0
- nab_python/universal/provider.py +214 -0
- nab_python/universal/reresolve.py +310 -0
- nab_python/universal/resolve.py +508 -0
- nab_python/universal/validate.py +439 -0
- nab_python/universal/wheel_selection.py +327 -0
- nab_python/workspace.py +214 -0
- nab_python-0.0.1.dist-info/METADATA +49 -0
- nab_python-0.0.1.dist-info/RECORD +71 -0
- nab_python-0.0.1.dist-info/WHEEL +4 -0
- nab_python-0.0.1.dist-info/licenses/LICENSE +21 -0
nab_python/fetch.py
ADDED
|
@@ -0,0 +1,827 @@
|
|
|
1
|
+
"""Channel-based async I/O coordinator for nab-python.
|
|
2
|
+
|
|
3
|
+
Mirrors uv's architecture: a sync resolver on the main thread sends
|
|
4
|
+
fetch requests through a queue to an async fetcher on a background
|
|
5
|
+
thread. An InMemoryIndex is shared state between both.
|
|
6
|
+
|
|
7
|
+
The cross-thread bridge is built from the standard library only:
|
|
8
|
+
the fetcher thread owns an ``asyncio.Queue`` and the sync side
|
|
9
|
+
schedules puts on the loop with ``call_soon_threadsafe``.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import asyncio
|
|
15
|
+
import enum
|
|
16
|
+
import logging
|
|
17
|
+
import threading
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from typing import TYPE_CHECKING, Any
|
|
20
|
+
|
|
21
|
+
from nab_index.cache import CacheBackend, NullCache, OnDiskCache
|
|
22
|
+
from nab_index.cached_client import CachedAsyncSimpleClient
|
|
23
|
+
from nab_index.client import SdistFile, WheelFile
|
|
24
|
+
from nab_index.local_index import LocalIndexClient
|
|
25
|
+
from nab_index.multi_index import IndexConfig, MultiIndexClient
|
|
26
|
+
|
|
27
|
+
from ._vendor.packaging.markers import Marker
|
|
28
|
+
from ._vendor.packaging.utils import canonicalize_name
|
|
29
|
+
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from collections.abc import Sequence
|
|
32
|
+
|
|
33
|
+
from typing_extensions import Self
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
"DEFAULT_INDEX_NAME",
|
|
37
|
+
"DEFAULT_INDEX_URL",
|
|
38
|
+
"FetchCoordinator",
|
|
39
|
+
"FetchKind",
|
|
40
|
+
"FetchRequest",
|
|
41
|
+
"InMemoryIndex",
|
|
42
|
+
"IndexOverride",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
DEFAULT_INDEX_NAME = "pypi"
|
|
47
|
+
DEFAULT_INDEX_URL = "https://pypi.org/simple/"
|
|
48
|
+
|
|
49
|
+
# Maximum time the main thread waits for the fetcher thread to drain
|
|
50
|
+
# its queue and exit on :meth:`FetchCoordinator.shutdown`.
|
|
51
|
+
_COORDINATOR_JOIN_TIMEOUT_SECONDS = 10
|
|
52
|
+
|
|
53
|
+
if TYPE_CHECKING:
|
|
54
|
+
from pathlib import Path
|
|
55
|
+
|
|
56
|
+
from nab_index.transport import AsyncHttpTransport
|
|
57
|
+
|
|
58
|
+
logger = logging.getLogger(__name__)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class FetchKind(enum.Enum):
|
|
62
|
+
"""Distinguishes the four kinds of fetches the coordinator handles."""
|
|
63
|
+
|
|
64
|
+
LISTING = "listing"
|
|
65
|
+
METADATA = "metadata"
|
|
66
|
+
SDIST = "sdist"
|
|
67
|
+
SDIST_ARCHIVE = "sdist-archive"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass(frozen=True, slots=True)
|
|
71
|
+
class IndexOverride:
|
|
72
|
+
"""Per-package index routing rule.
|
|
73
|
+
|
|
74
|
+
``name`` is the package name (canonicalised internally). ``index``
|
|
75
|
+
is the *name* of an :class:`IndexConfig` declared in the
|
|
76
|
+
coordinator's ordered list. ``marker`` is an optional :pep:`508`
|
|
77
|
+
marker text; ``None`` or the empty string mean "always applies".
|
|
78
|
+
Multiple overrides for the same name are evaluated in declared
|
|
79
|
+
order; the first one whose marker is True wins, later entries are
|
|
80
|
+
ignored.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
name: str
|
|
84
|
+
index: str
|
|
85
|
+
marker: str | None = None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _resolve_overrides(
|
|
89
|
+
overrides: list[IndexOverride],
|
|
90
|
+
marker_environment: dict[str, str] | None,
|
|
91
|
+
) -> dict[str, str]:
|
|
92
|
+
"""Reduce overrides to ``{canonical_name: index_name}`` for the env.
|
|
93
|
+
|
|
94
|
+
First-match-wins per name; later entries for the same name are
|
|
95
|
+
silently ignored. Marker eval against ``marker_environment``;
|
|
96
|
+
``None`` env means non-marker entries match and marker entries
|
|
97
|
+
do not.
|
|
98
|
+
"""
|
|
99
|
+
out: dict[str, str] = {}
|
|
100
|
+
for entry in overrides:
|
|
101
|
+
canonical = canonicalize_name(entry.name)
|
|
102
|
+
if canonical in out:
|
|
103
|
+
continue
|
|
104
|
+
if entry.marker in (None, ""):
|
|
105
|
+
out[canonical] = entry.index
|
|
106
|
+
continue
|
|
107
|
+
if marker_environment is None:
|
|
108
|
+
continue
|
|
109
|
+
if Marker(entry.marker).evaluate(marker_environment):
|
|
110
|
+
out[canonical] = entry.index
|
|
111
|
+
return out
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dataclass(frozen=True, slots=True)
|
|
115
|
+
class FetchRequest:
|
|
116
|
+
"""A single fetch request, carried across the sync->async boundary."""
|
|
117
|
+
|
|
118
|
+
kind: FetchKind
|
|
119
|
+
package: str
|
|
120
|
+
version: str | None = None
|
|
121
|
+
url: str | None = None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@dataclass
|
|
125
|
+
class _Pending:
|
|
126
|
+
event: threading.Event = field(default_factory=threading.Event)
|
|
127
|
+
result: Any = None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class InMemoryIndex:
|
|
131
|
+
"""Thread-safe storage for fetched package data.
|
|
132
|
+
|
|
133
|
+
The async fetcher writes here; the sync provider reads.
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
def __init__(self) -> None:
|
|
137
|
+
"""Create an empty index."""
|
|
138
|
+
self._lock = threading.Lock()
|
|
139
|
+
self._listings: dict[str, list[WheelFile | SdistFile]] = {}
|
|
140
|
+
self._listing_indexes: dict[str, str] = {}
|
|
141
|
+
self._metadata: dict[tuple[str, str], str | None] = {}
|
|
142
|
+
self._sdist_pyproject: dict[tuple[str, str], str | None] = {}
|
|
143
|
+
self._sdist_archives: dict[tuple[str, str], bytes | None] = {}
|
|
144
|
+
self._pending: dict[str, _Pending] = {}
|
|
145
|
+
|
|
146
|
+
# Parsed metadata is a pure function of the underlying text, so we
|
|
147
|
+
# share it across tuple providers in universal mode.
|
|
148
|
+
self._parsed_metadata: dict[tuple[str, str], Any] = {}
|
|
149
|
+
|
|
150
|
+
# Post-reconciliation sdist metadata: the result after
|
|
151
|
+
# PEP 643 dynamic deps have been resolved via the bundled
|
|
152
|
+
# pyproject.toml fallback or a PEP 517 backend invocation.
|
|
153
|
+
# Shared across tuples so universal mode does not re-augment
|
|
154
|
+
# (or, more importantly, re-build) the same sdist N times.
|
|
155
|
+
self._resolved_sdist_metadata: dict[tuple[str, str], Any] = {}
|
|
156
|
+
|
|
157
|
+
def get_listing(self, package: str) -> list[WheelFile | SdistFile] | None:
|
|
158
|
+
"""Return the cached listing for ``package``, or ``None``."""
|
|
159
|
+
with self._lock:
|
|
160
|
+
return self._listings.get(package)
|
|
161
|
+
|
|
162
|
+
def store_listing(
|
|
163
|
+
self, package: str, data: Sequence[WheelFile | SdistFile]
|
|
164
|
+
) -> None:
|
|
165
|
+
"""Cache the listing for ``package`` and unblock any waiter.
|
|
166
|
+
|
|
167
|
+
``data`` is accepted as a Sequence (covariant) so callers can pass
|
|
168
|
+
homogeneous ``list[WheelFile]`` lists; it is materialised into the
|
|
169
|
+
internal ``list[WheelFile | SdistFile]`` cache.
|
|
170
|
+
"""
|
|
171
|
+
key = f"listing:{package}"
|
|
172
|
+
materialised = list(data)
|
|
173
|
+
with self._lock:
|
|
174
|
+
self._listings[package] = materialised
|
|
175
|
+
pending = self._pending.get(key)
|
|
176
|
+
if pending is not None:
|
|
177
|
+
pending.result = materialised
|
|
178
|
+
pending.event.set()
|
|
179
|
+
|
|
180
|
+
def store_listing_index(self, package: str, index_name: str) -> None:
|
|
181
|
+
"""Record which configured index served ``package``."""
|
|
182
|
+
with self._lock:
|
|
183
|
+
self._listing_indexes[package] = index_name
|
|
184
|
+
|
|
185
|
+
def get_listing_index(self, package: str) -> str | None:
|
|
186
|
+
"""Return the configured index name that served ``package``, or ``None``."""
|
|
187
|
+
with self._lock:
|
|
188
|
+
return self._listing_indexes.get(package)
|
|
189
|
+
|
|
190
|
+
def get_metadata(self, package: str, version: str) -> str | None:
|
|
191
|
+
"""Return cached metadata text, or ``None`` if not yet stored."""
|
|
192
|
+
with self._lock:
|
|
193
|
+
key = (package, version)
|
|
194
|
+
if key in self._metadata:
|
|
195
|
+
return self._metadata[key]
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
def has_metadata(self, package: str, version: str) -> bool:
|
|
199
|
+
"""Return ``True`` once a metadata fetch has resolved (any value)."""
|
|
200
|
+
with self._lock:
|
|
201
|
+
return (package, version) in self._metadata
|
|
202
|
+
|
|
203
|
+
def store_metadata(self, package: str, version: str, data: str | None) -> None:
|
|
204
|
+
"""Cache metadata text (or ``None`` for a failed fetch)."""
|
|
205
|
+
key = f"metadata:{package}:{version}"
|
|
206
|
+
with self._lock:
|
|
207
|
+
self._metadata[(package, version)] = data
|
|
208
|
+
pending = self._pending.get(key)
|
|
209
|
+
if pending is not None:
|
|
210
|
+
pending.result = data
|
|
211
|
+
pending.event.set()
|
|
212
|
+
|
|
213
|
+
def store_sdist_metadata(
|
|
214
|
+
self, package: str, version: str, data: str | None
|
|
215
|
+
) -> None:
|
|
216
|
+
"""Store sdist-derived PKG-INFO under the same metadata key.
|
|
217
|
+
|
|
218
|
+
Wheel and sdist results land in the same ``_metadata`` slot
|
|
219
|
+
because PKG-INFO is core-metadata-equivalent. The pending
|
|
220
|
+
keys differ so a sdist request can run in parallel with (or
|
|
221
|
+
after) a failed wheel metadata request.
|
|
222
|
+
"""
|
|
223
|
+
key = f"sdist:{package}:{version}"
|
|
224
|
+
with self._lock:
|
|
225
|
+
self._metadata[(package, version)] = data
|
|
226
|
+
pending = self._pending.get(key)
|
|
227
|
+
if pending is not None:
|
|
228
|
+
pending.result = data
|
|
229
|
+
pending.event.set()
|
|
230
|
+
|
|
231
|
+
def store_sdist_pyproject(self, package: str, version: str, data: str) -> None:
|
|
232
|
+
"""Store sdist-derived pyproject.toml text for static-metadata fallback.
|
|
233
|
+
|
|
234
|
+
The fetcher writes both PKG-INFO and pyproject.toml when an
|
|
235
|
+
sdist is downloaded. Provider code reads this slot when
|
|
236
|
+
PKG-INFO marks dependencies as :pep:`643` Dynamic. No
|
|
237
|
+
``None`` slot is written: missing pyproject.toml is
|
|
238
|
+
indistinguishable from never-fetched at the read path.
|
|
239
|
+
"""
|
|
240
|
+
with self._lock:
|
|
241
|
+
self._sdist_pyproject[(package, version)] = data
|
|
242
|
+
|
|
243
|
+
def get_sdist_pyproject(self, package: str, version: str) -> str | None:
|
|
244
|
+
"""Return sdist pyproject.toml text or ``None`` if absent or unfetched."""
|
|
245
|
+
with self._lock:
|
|
246
|
+
return self._sdist_pyproject.get((package, version))
|
|
247
|
+
|
|
248
|
+
def store_sdist_archive(
|
|
249
|
+
self, package: str, version: str, data: bytes | None
|
|
250
|
+
) -> None:
|
|
251
|
+
"""Cache sdist archive bytes (or ``None`` for a failed fetch)."""
|
|
252
|
+
key = f"sdist-archive:{package}:{version}"
|
|
253
|
+
with self._lock:
|
|
254
|
+
self._sdist_archives[(package, version)] = data
|
|
255
|
+
pending = self._pending.get(key)
|
|
256
|
+
if pending is not None:
|
|
257
|
+
pending.result = data
|
|
258
|
+
pending.event.set()
|
|
259
|
+
|
|
260
|
+
def get_sdist_archive(self, package: str, version: str) -> bytes | None:
|
|
261
|
+
"""Return cached sdist archive bytes, or ``None`` if absent or failed."""
|
|
262
|
+
with self._lock:
|
|
263
|
+
return self._sdist_archives.get((package, version))
|
|
264
|
+
|
|
265
|
+
def get_or_create_pending(self, key: str) -> tuple[_Pending, bool]:
|
|
266
|
+
"""Return (pending, already_existed)."""
|
|
267
|
+
with self._lock:
|
|
268
|
+
if key in self._pending:
|
|
269
|
+
return self._pending[key], True
|
|
270
|
+
pending = _Pending()
|
|
271
|
+
self._pending[key] = pending
|
|
272
|
+
return pending, False
|
|
273
|
+
|
|
274
|
+
def get_parsed_metadata(self, package: str, version: str) -> Any | None:
|
|
275
|
+
"""Return the cached parsed :class:`WheelMetadata` or ``None``.
|
|
276
|
+
|
|
277
|
+
Unlike :meth:`get_metadata` (which returns the raw text),
|
|
278
|
+
this returns the already-parsed dataclass. Callers populate
|
|
279
|
+
the cache by calling :meth:`store_parsed_metadata` after
|
|
280
|
+
parsing the text once.
|
|
281
|
+
"""
|
|
282
|
+
with self._lock:
|
|
283
|
+
return self._parsed_metadata.get((package, version))
|
|
284
|
+
|
|
285
|
+
def store_parsed_metadata(self, package: str, version: str, metadata: Any) -> None:
|
|
286
|
+
"""Cache a parsed :class:`WheelMetadata` for future tuple lookups.
|
|
287
|
+
|
|
288
|
+
Safe across tuples because the parsed object is read-only and
|
|
289
|
+
a pure function of the underlying text. Per-tuple
|
|
290
|
+
classification (marker eval, extras admission) happens
|
|
291
|
+
above this cache.
|
|
292
|
+
"""
|
|
293
|
+
with self._lock:
|
|
294
|
+
self._parsed_metadata[(package, version)] = metadata
|
|
295
|
+
|
|
296
|
+
def pop_parsed_metadata(self, package: str, version: str) -> Any | None:
|
|
297
|
+
"""Remove and return the cached parsed metadata for one key.
|
|
298
|
+
|
|
299
|
+
Returns ``None`` when no entry was present. Used by the
|
|
300
|
+
re-resolve path so that overriding the raw text invalidates
|
|
301
|
+
the parsed view that downstream metadata classification reads.
|
|
302
|
+
"""
|
|
303
|
+
with self._lock:
|
|
304
|
+
popped = self._parsed_metadata.pop((package, version), None)
|
|
305
|
+
# An override invalidates the reconciled view too: the
|
|
306
|
+
# post-augment / post-build metadata is downstream of the
|
|
307
|
+
# raw parse and must not survive when the parse is replaced.
|
|
308
|
+
self._resolved_sdist_metadata.pop((package, version), None)
|
|
309
|
+
return popped
|
|
310
|
+
|
|
311
|
+
def get_resolved_sdist_metadata(self, package: str, version: str) -> Any | None:
|
|
312
|
+
"""Return cached post-reconciliation sdist metadata or ``None``.
|
|
313
|
+
|
|
314
|
+
The cached value is the result of
|
|
315
|
+
:func:`nab_python._provider.metadata_resolver.resolve_dynamic_sdist`:
|
|
316
|
+
either a :pep:`621` pyproject augmentation or a PEP 517 backend
|
|
317
|
+
invocation. Both branches are deterministic functions of the
|
|
318
|
+
sdist content under nab's build inputs, so the value is shared
|
|
319
|
+
across universal-mode tuples to avoid duplicate work.
|
|
320
|
+
"""
|
|
321
|
+
with self._lock:
|
|
322
|
+
return self._resolved_sdist_metadata.get((package, version))
|
|
323
|
+
|
|
324
|
+
def store_resolved_sdist_metadata(
|
|
325
|
+
self, package: str, version: str, metadata: Any
|
|
326
|
+
) -> None:
|
|
327
|
+
"""Cache reconciled sdist metadata for cross-tuple reuse."""
|
|
328
|
+
with self._lock:
|
|
329
|
+
self._resolved_sdist_metadata[(package, version)] = metadata
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
# Queue items: single request, batch of requests, or None (shutdown).
|
|
333
|
+
_QueueItem = FetchRequest | list[FetchRequest] | None
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class FetchCoordinator:
|
|
337
|
+
"""Bridges the sync provider and the async fetcher.
|
|
338
|
+
|
|
339
|
+
Cross-thread communication uses an ``asyncio.Queue`` that lives
|
|
340
|
+
in the fetcher thread's event loop. Sync callers schedule puts
|
|
341
|
+
on the loop via ``loop.call_soon_threadsafe``. Batch submissions
|
|
342
|
+
(lists) guarantee all items arrive together for concurrent
|
|
343
|
+
processing.
|
|
344
|
+
|
|
345
|
+
Use as a context manager:
|
|
346
|
+
with FetchCoordinator(transport) as coordinator:
|
|
347
|
+
...
|
|
348
|
+
"""
|
|
349
|
+
|
|
350
|
+
PREFETCH_METADATA_COUNT = 10
|
|
351
|
+
|
|
352
|
+
def __init__(
|
|
353
|
+
self,
|
|
354
|
+
transport: AsyncHttpTransport,
|
|
355
|
+
*,
|
|
356
|
+
indexes: list[IndexConfig] | None = None,
|
|
357
|
+
max_concurrency: int = 50,
|
|
358
|
+
cache_dir: Path | None = None,
|
|
359
|
+
cache_backend: CacheBackend | None = None,
|
|
360
|
+
offline: bool = False,
|
|
361
|
+
index_overrides: list[IndexOverride] | None = None,
|
|
362
|
+
marker_environment: dict[str, str] | None = None,
|
|
363
|
+
) -> None:
|
|
364
|
+
"""Create a coordinator that wraps ``transport``.
|
|
365
|
+
|
|
366
|
+
``indexes`` is the ordered list of :class:`IndexConfig` records;
|
|
367
|
+
order is significant (presence-based first-index walks them
|
|
368
|
+
left-to-right). When omitted, defaults to
|
|
369
|
+
``[IndexConfig("pypi", "https://pypi.org/simple/")]``. Each
|
|
370
|
+
index name must be unique across the list.
|
|
371
|
+
|
|
372
|
+
``index_overrides`` adds per-package routing rules; an entry's
|
|
373
|
+
``index`` field names one of the configured indexes and its
|
|
374
|
+
optional ``marker`` is evaluated against ``marker_environment``.
|
|
375
|
+
|
|
376
|
+
``cache_backend`` wins over ``cache_dir`` if both are given;
|
|
377
|
+
otherwise ``cache_dir`` enables a per-index :class:`OnDiskCache`
|
|
378
|
+
and ``None`` falls back to a :class:`NullCache`. Passing an
|
|
379
|
+
explicit ``cache_backend`` together with more than one entry in
|
|
380
|
+
``indexes`` is rejected: each index needs its own cache.
|
|
381
|
+
"""
|
|
382
|
+
if indexes is None:
|
|
383
|
+
indexes = [IndexConfig(DEFAULT_INDEX_NAME, DEFAULT_INDEX_URL)]
|
|
384
|
+
if not indexes:
|
|
385
|
+
msg = "indexes must contain at least one IndexConfig"
|
|
386
|
+
raise ValueError(msg)
|
|
387
|
+
names = [idx.name for idx in indexes]
|
|
388
|
+
if len(names) != len(set(names)):
|
|
389
|
+
duplicates = sorted({n for n in names if names.count(n) > 1})
|
|
390
|
+
msg = f"duplicate index names: {duplicates}"
|
|
391
|
+
raise ValueError(msg)
|
|
392
|
+
self.indexes = list(indexes)
|
|
393
|
+
self._transport = transport
|
|
394
|
+
self._max_concurrency = max_concurrency
|
|
395
|
+
self._offline = offline
|
|
396
|
+
if cache_backend is not None and len(indexes) > 1:
|
|
397
|
+
msg = (
|
|
398
|
+
"explicit cache_backend is incompatible with more than one"
|
|
399
|
+
" index: pass cache_dir so each index gets its own cache."
|
|
400
|
+
)
|
|
401
|
+
raise ValueError(msg)
|
|
402
|
+
if cache_backend is not None:
|
|
403
|
+
self._cache: CacheBackend = cache_backend
|
|
404
|
+
elif cache_dir is not None:
|
|
405
|
+
self._cache = OnDiskCache(cache_dir, indexes[0].url)
|
|
406
|
+
else:
|
|
407
|
+
self._cache = NullCache()
|
|
408
|
+
self._cache_dir = cache_dir
|
|
409
|
+
self._index_overrides = list(index_overrides or [])
|
|
410
|
+
self._marker_environment = (
|
|
411
|
+
dict(marker_environment) if marker_environment is not None else None
|
|
412
|
+
)
|
|
413
|
+
self.index = InMemoryIndex()
|
|
414
|
+
self._thread: threading.Thread | None = None
|
|
415
|
+
self._started = False
|
|
416
|
+
self._crashed = False
|
|
417
|
+
self._loop: asyncio.AbstractEventLoop | None = None
|
|
418
|
+
self._async_q: asyncio.Queue[_QueueItem] | None = None
|
|
419
|
+
self._queue_ready = threading.Event()
|
|
420
|
+
|
|
421
|
+
def __enter__(self) -> Self:
|
|
422
|
+
"""Start the fetcher thread and return self."""
|
|
423
|
+
self.start()
|
|
424
|
+
return self
|
|
425
|
+
|
|
426
|
+
def __exit__(self, *args: object) -> None:
|
|
427
|
+
"""Shut the fetcher thread down on context exit."""
|
|
428
|
+
self.shutdown()
|
|
429
|
+
|
|
430
|
+
def start(self) -> None:
|
|
431
|
+
"""Start the fetcher thread (idempotent)."""
|
|
432
|
+
if self._started:
|
|
433
|
+
return
|
|
434
|
+
self._started = True
|
|
435
|
+
self._thread = threading.Thread(
|
|
436
|
+
target=self._run_loop,
|
|
437
|
+
daemon=True,
|
|
438
|
+
name="nab-fetcher",
|
|
439
|
+
)
|
|
440
|
+
self._thread.start()
|
|
441
|
+
self._queue_ready.wait()
|
|
442
|
+
|
|
443
|
+
def shutdown(self) -> None:
|
|
444
|
+
"""Signal the fetcher thread to exit and join it."""
|
|
445
|
+
if self._thread is None:
|
|
446
|
+
return
|
|
447
|
+
self._submit(None)
|
|
448
|
+
self._thread.join(timeout=_COORDINATOR_JOIN_TIMEOUT_SECONDS)
|
|
449
|
+
self._thread = None
|
|
450
|
+
self._started = False
|
|
451
|
+
|
|
452
|
+
def _submit(self, item: _QueueItem) -> None:
|
|
453
|
+
"""Schedule ``item`` on the fetcher loop's queue from any thread."""
|
|
454
|
+
loop = self._loop
|
|
455
|
+
queue = self._async_q
|
|
456
|
+
if loop is None or queue is None:
|
|
457
|
+
return
|
|
458
|
+
loop.call_soon_threadsafe(queue.put_nowait, item)
|
|
459
|
+
|
|
460
|
+
def _check_alive(self) -> None:
|
|
461
|
+
if self._crashed:
|
|
462
|
+
msg = "Fetcher thread crashed, see log for details"
|
|
463
|
+
raise RuntimeError(msg)
|
|
464
|
+
|
|
465
|
+
def request_listing(self, package: str) -> threading.Event:
|
|
466
|
+
"""Request a listing fetch; return an event set when the result lands."""
|
|
467
|
+
self._check_alive()
|
|
468
|
+
if self.index.get_listing(package) is not None:
|
|
469
|
+
done = threading.Event()
|
|
470
|
+
done.set()
|
|
471
|
+
return done
|
|
472
|
+
key = f"listing:{package}"
|
|
473
|
+
pending, existed = self.index.get_or_create_pending(key)
|
|
474
|
+
if not existed:
|
|
475
|
+
self._submit(FetchRequest(kind=FetchKind.LISTING, package=package))
|
|
476
|
+
return pending.event
|
|
477
|
+
|
|
478
|
+
def request_metadata(self, package: str, version: str, url: str) -> threading.Event:
|
|
479
|
+
"""Request a wheel-metadata fetch for ``(package, version)``."""
|
|
480
|
+
self._check_alive()
|
|
481
|
+
if self.index.has_metadata(package, version):
|
|
482
|
+
done = threading.Event()
|
|
483
|
+
done.set()
|
|
484
|
+
return done
|
|
485
|
+
key = f"metadata:{package}:{version}"
|
|
486
|
+
pending, existed = self.index.get_or_create_pending(key)
|
|
487
|
+
if not existed:
|
|
488
|
+
self._submit(
|
|
489
|
+
FetchRequest(
|
|
490
|
+
kind=FetchKind.METADATA,
|
|
491
|
+
package=package,
|
|
492
|
+
version=version,
|
|
493
|
+
url=url,
|
|
494
|
+
)
|
|
495
|
+
)
|
|
496
|
+
return pending.event
|
|
497
|
+
|
|
498
|
+
def request_wheel_metadata(
|
|
499
|
+
self, package: str, version: str, wheel_filename: str, url: str
|
|
500
|
+
) -> threading.Event:
|
|
501
|
+
"""Request metadata for one specific wheel of ``(package, version)``.
|
|
502
|
+
|
|
503
|
+
Used by the universal-resolution validation pass to fetch each
|
|
504
|
+
tuple's chosen wheel metadata separately from the resolver's
|
|
505
|
+
baseline. Cached under the sentinel key ``f"{version}#{wheel_filename}"``
|
|
506
|
+
so the resolver-time cache (keyed on plain ``version``) is
|
|
507
|
+
untouched. Goes through the same async transport and shares
|
|
508
|
+
connection pooling.
|
|
509
|
+
"""
|
|
510
|
+
self._check_alive()
|
|
511
|
+
sentinel_version = f"{version}#{wheel_filename}"
|
|
512
|
+
if self.index.has_metadata(package, sentinel_version):
|
|
513
|
+
done = threading.Event()
|
|
514
|
+
done.set()
|
|
515
|
+
return done
|
|
516
|
+
key = f"metadata:{package}:{sentinel_version}"
|
|
517
|
+
pending, existed = self.index.get_or_create_pending(key)
|
|
518
|
+
if not existed:
|
|
519
|
+
self._submit(
|
|
520
|
+
FetchRequest(
|
|
521
|
+
kind=FetchKind.METADATA,
|
|
522
|
+
package=package,
|
|
523
|
+
version=sentinel_version,
|
|
524
|
+
url=url,
|
|
525
|
+
)
|
|
526
|
+
)
|
|
527
|
+
return pending.event
|
|
528
|
+
|
|
529
|
+
def request_sdist(self, package: str, version: str, url: str) -> threading.Event:
|
|
530
|
+
"""Request sdist PKG-INFO extraction.
|
|
531
|
+
|
|
532
|
+
Uses a separate ``sdist:`` pending key so it can fire even
|
|
533
|
+
when a prior wheel metadata fetch already cached ``None``
|
|
534
|
+
for the same ``(package, version)`` slot.
|
|
535
|
+
"""
|
|
536
|
+
self._check_alive()
|
|
537
|
+
key = f"sdist:{package}:{version}"
|
|
538
|
+
pending, existed = self.index.get_or_create_pending(key)
|
|
539
|
+
if not existed:
|
|
540
|
+
self._submit(
|
|
541
|
+
FetchRequest(
|
|
542
|
+
kind=FetchKind.SDIST,
|
|
543
|
+
package=package,
|
|
544
|
+
version=version,
|
|
545
|
+
url=url,
|
|
546
|
+
)
|
|
547
|
+
)
|
|
548
|
+
return pending.event
|
|
549
|
+
|
|
550
|
+
def request_sdist_archive(
|
|
551
|
+
self, package: str, version: str, url: str
|
|
552
|
+
) -> threading.Event:
|
|
553
|
+
"""Request the raw bytes of an sdist archive.
|
|
554
|
+
|
|
555
|
+
Used by the :class:`~nab_python.provider.BuildPolicy.BUILD_REMOTE`
|
|
556
|
+
path; the bytes are extracted to a temp dir and handed to a
|
|
557
|
+
PEP 517 backend. Stored under a separate pending key so it can
|
|
558
|
+
run concurrently with a PKG-INFO fetch for the same version.
|
|
559
|
+
"""
|
|
560
|
+
self._check_alive()
|
|
561
|
+
key = f"sdist-archive:{package}:{version}"
|
|
562
|
+
pending, existed = self.index.get_or_create_pending(key)
|
|
563
|
+
if not existed:
|
|
564
|
+
self._submit(
|
|
565
|
+
FetchRequest(
|
|
566
|
+
kind=FetchKind.SDIST_ARCHIVE,
|
|
567
|
+
package=package,
|
|
568
|
+
version=version,
|
|
569
|
+
url=url,
|
|
570
|
+
)
|
|
571
|
+
)
|
|
572
|
+
return pending.event
|
|
573
|
+
|
|
574
|
+
def request_metadata_batch(
|
|
575
|
+
self, items: list[tuple[str, str, str]]
|
|
576
|
+
) -> list[tuple[str, str, threading.Event]]:
|
|
577
|
+
"""Submit a batch of metadata requests as a single queue item.
|
|
578
|
+
|
|
579
|
+
All requests in the batch reach the fetcher together so they
|
|
580
|
+
are processed concurrently.
|
|
581
|
+
"""
|
|
582
|
+
self._check_alive()
|
|
583
|
+
results: list[tuple[str, str, threading.Event]] = []
|
|
584
|
+
batch: list[FetchRequest] = []
|
|
585
|
+
for package, version, url in items:
|
|
586
|
+
if self.index.has_metadata(package, version):
|
|
587
|
+
done = threading.Event()
|
|
588
|
+
done.set()
|
|
589
|
+
results.append((package, version, done))
|
|
590
|
+
continue
|
|
591
|
+
key = f"metadata:{package}:{version}"
|
|
592
|
+
pending, existed = self.index.get_or_create_pending(key)
|
|
593
|
+
if not existed:
|
|
594
|
+
batch.append(
|
|
595
|
+
FetchRequest(
|
|
596
|
+
kind=FetchKind.METADATA,
|
|
597
|
+
package=package,
|
|
598
|
+
version=version,
|
|
599
|
+
url=url,
|
|
600
|
+
)
|
|
601
|
+
)
|
|
602
|
+
results.append((package, version, pending.event))
|
|
603
|
+
if batch:
|
|
604
|
+
self._submit(batch)
|
|
605
|
+
return results
|
|
606
|
+
|
|
607
|
+
def _run_loop(self) -> None:
|
|
608
|
+
try:
|
|
609
|
+
asyncio.run(self._async_fetcher())
|
|
610
|
+
except Exception:
|
|
611
|
+
logger.exception("Fetcher thread crashed")
|
|
612
|
+
self._crashed = True
|
|
613
|
+
|
|
614
|
+
def _build_client(
|
|
615
|
+
self,
|
|
616
|
+
) -> CachedAsyncSimpleClient | LocalIndexClient | MultiIndexClient:
|
|
617
|
+
"""Return the index client for this run.
|
|
618
|
+
|
|
619
|
+
Single-index configurations return a plain
|
|
620
|
+
:class:`CachedAsyncSimpleClient` (or a :class:`LocalIndexClient`
|
|
621
|
+
for ``file://``). Multi-index configurations wire up a
|
|
622
|
+
:class:`MultiIndexClient` whose underlying clients share the
|
|
623
|
+
coordinator's transport but get their own per-URL cache.
|
|
624
|
+
"""
|
|
625
|
+
override_map = _resolve_overrides(
|
|
626
|
+
self._index_overrides, self._marker_environment
|
|
627
|
+
)
|
|
628
|
+
if len(self.indexes) == 1 and not override_map:
|
|
629
|
+
cfg = self.indexes[0]
|
|
630
|
+
return self._build_index_client(cfg.url)
|
|
631
|
+
clients_by_name: dict[str, CachedAsyncSimpleClient | LocalIndexClient] = {}
|
|
632
|
+
for cfg in self.indexes:
|
|
633
|
+
clients_by_name[cfg.name] = self._build_index_client(cfg.url)
|
|
634
|
+
order = [cfg.name for cfg in self.indexes]
|
|
635
|
+
return MultiIndexClient(
|
|
636
|
+
clients_by_name,
|
|
637
|
+
order,
|
|
638
|
+
override_map,
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
def _build_index_client(
|
|
642
|
+
self,
|
|
643
|
+
url: str,
|
|
644
|
+
) -> CachedAsyncSimpleClient | LocalIndexClient:
|
|
645
|
+
"""Build a single index client for ``url``.
|
|
646
|
+
|
|
647
|
+
``file://`` URLs go to :class:`LocalIndexClient` (no caching;
|
|
648
|
+
the filesystem is the cache). Everything else goes to
|
|
649
|
+
:class:`CachedAsyncSimpleClient` with a per-URL
|
|
650
|
+
:class:`OnDiskCache` when ``cache_dir`` is set.
|
|
651
|
+
"""
|
|
652
|
+
if url.startswith("file://"):
|
|
653
|
+
return LocalIndexClient(url)
|
|
654
|
+
backend: CacheBackend
|
|
655
|
+
if self._cache_dir is not None:
|
|
656
|
+
backend = OnDiskCache(self._cache_dir, url)
|
|
657
|
+
else:
|
|
658
|
+
backend = self._cache
|
|
659
|
+
return CachedAsyncSimpleClient(
|
|
660
|
+
self._transport,
|
|
661
|
+
backend,
|
|
662
|
+
url,
|
|
663
|
+
offline=self._offline,
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
async def _async_fetcher(self) -> None:
|
|
667
|
+
self._loop = asyncio.get_running_loop()
|
|
668
|
+
queue: asyncio.Queue[_QueueItem] = asyncio.Queue()
|
|
669
|
+
self._async_q = queue
|
|
670
|
+
self._queue_ready.set()
|
|
671
|
+
|
|
672
|
+
sem = asyncio.Semaphore(self._max_concurrency)
|
|
673
|
+
tasks: set[asyncio.Task] = set()
|
|
674
|
+
|
|
675
|
+
client = self._build_client()
|
|
676
|
+
try:
|
|
677
|
+
while True:
|
|
678
|
+
item = await queue.get()
|
|
679
|
+
if item is None:
|
|
680
|
+
break
|
|
681
|
+
|
|
682
|
+
self._dispatch(item, client, sem, tasks)
|
|
683
|
+
|
|
684
|
+
# drain any extra queued items without blocking
|
|
685
|
+
while not queue.empty():
|
|
686
|
+
try:
|
|
687
|
+
extra = queue.get_nowait()
|
|
688
|
+
except asyncio.QueueEmpty:
|
|
689
|
+
break
|
|
690
|
+
if extra is None:
|
|
691
|
+
for t in tasks:
|
|
692
|
+
t.cancel()
|
|
693
|
+
await asyncio.gather(*tasks, return_exceptions=True)
|
|
694
|
+
return
|
|
695
|
+
self._dispatch(extra, client, sem, tasks)
|
|
696
|
+
|
|
697
|
+
if tasks:
|
|
698
|
+
await asyncio.gather(*tasks, return_exceptions=True)
|
|
699
|
+
finally:
|
|
700
|
+
await client.aclose()
|
|
701
|
+
|
|
702
|
+
def _dispatch(
|
|
703
|
+
self,
|
|
704
|
+
item: FetchRequest | list[FetchRequest],
|
|
705
|
+
client: CachedAsyncSimpleClient | LocalIndexClient | MultiIndexClient,
|
|
706
|
+
sem: asyncio.Semaphore,
|
|
707
|
+
tasks: set[asyncio.Task],
|
|
708
|
+
) -> None:
|
|
709
|
+
"""Create async tasks for a single request or a batch."""
|
|
710
|
+
if isinstance(item, list):
|
|
711
|
+
for req in item:
|
|
712
|
+
task = asyncio.create_task(self._handle(client, req, sem))
|
|
713
|
+
tasks.add(task)
|
|
714
|
+
task.add_done_callback(tasks.discard)
|
|
715
|
+
else:
|
|
716
|
+
task = asyncio.create_task(self._handle(client, item, sem))
|
|
717
|
+
tasks.add(task)
|
|
718
|
+
task.add_done_callback(tasks.discard)
|
|
719
|
+
|
|
720
|
+
async def _handle(
|
|
721
|
+
self,
|
|
722
|
+
client: CachedAsyncSimpleClient | LocalIndexClient | MultiIndexClient,
|
|
723
|
+
req: FetchRequest,
|
|
724
|
+
sem: asyncio.Semaphore,
|
|
725
|
+
) -> None:
|
|
726
|
+
async with sem:
|
|
727
|
+
try:
|
|
728
|
+
if req.kind is FetchKind.LISTING:
|
|
729
|
+
await self._fetch_listing(client, req)
|
|
730
|
+
elif req.kind is FetchKind.METADATA:
|
|
731
|
+
await self._fetch_metadata(client, req)
|
|
732
|
+
elif req.kind is FetchKind.SDIST:
|
|
733
|
+
await self._fetch_sdist(client, req)
|
|
734
|
+
else:
|
|
735
|
+
await self._fetch_sdist_archive(client, req)
|
|
736
|
+
except Exception:
|
|
737
|
+
logger.exception("Fetch failed: %s %s", req.kind.value, req.package)
|
|
738
|
+
# Record an empty result so any waiter unblocks. Without
|
|
739
|
+
# this, the resolver deadlocks on event.wait() when a
|
|
740
|
+
# transitive dep 404s or fails any other way.
|
|
741
|
+
if req.kind is FetchKind.LISTING:
|
|
742
|
+
self.index.store_listing(req.package, [])
|
|
743
|
+
else:
|
|
744
|
+
assert req.version is not None
|
|
745
|
+
if req.kind is FetchKind.METADATA:
|
|
746
|
+
self.index.store_metadata(req.package, req.version, None)
|
|
747
|
+
elif req.kind is FetchKind.SDIST:
|
|
748
|
+
self.index.store_sdist_metadata(req.package, req.version, None)
|
|
749
|
+
else:
|
|
750
|
+
self.index.store_sdist_archive(req.package, req.version, None)
|
|
751
|
+
|
|
752
|
+
async def _fetch_listing(
|
|
753
|
+
self,
|
|
754
|
+
client: CachedAsyncSimpleClient | LocalIndexClient | MultiIndexClient,
|
|
755
|
+
req: FetchRequest,
|
|
756
|
+
) -> None:
|
|
757
|
+
files = await client.get_files(req.package)
|
|
758
|
+
self.index.store_listing(req.package, files)
|
|
759
|
+
self._record_serving_index(client, req.package)
|
|
760
|
+
|
|
761
|
+
# auto-prefetch metadata for newest candidates (files are oldest-first)
|
|
762
|
+
wheels_with_meta: list[tuple[WheelFile, str]] = [
|
|
763
|
+
(f, f.metadata_url)
|
|
764
|
+
for f in files
|
|
765
|
+
if isinstance(f, WheelFile) and f.metadata_url is not None
|
|
766
|
+
]
|
|
767
|
+
for w, metadata_url in wheels_with_meta[-self.PREFETCH_METADATA_COUNT :]:
|
|
768
|
+
self.request_metadata(req.package, w.version, metadata_url)
|
|
769
|
+
|
|
770
|
+
def _record_serving_index(
|
|
771
|
+
self,
|
|
772
|
+
client: CachedAsyncSimpleClient | LocalIndexClient | MultiIndexClient,
|
|
773
|
+
package: str,
|
|
774
|
+
) -> None:
|
|
775
|
+
"""Record which configured index served ``package``'s listing.
|
|
776
|
+
|
|
777
|
+
Single-client configurations always serve from the lone
|
|
778
|
+
configured index. Multi-client configurations consult the
|
|
779
|
+
router's per-package route cache; an empty cache (e.g. an
|
|
780
|
+
index returned no listing on every walk) falls back to the
|
|
781
|
+
first configured index name so consumers always see
|
|
782
|
+
something.
|
|
783
|
+
"""
|
|
784
|
+
if isinstance(client, MultiIndexClient):
|
|
785
|
+
routed = client.route_for(package)
|
|
786
|
+
name = routed if routed is not None else self.indexes[0].name
|
|
787
|
+
else:
|
|
788
|
+
name = self.indexes[0].name
|
|
789
|
+
self.index.store_listing_index(package, name)
|
|
790
|
+
|
|
791
|
+
async def _fetch_metadata(
|
|
792
|
+
self,
|
|
793
|
+
client: CachedAsyncSimpleClient | LocalIndexClient | MultiIndexClient,
|
|
794
|
+
req: FetchRequest,
|
|
795
|
+
) -> None:
|
|
796
|
+
assert req.version is not None
|
|
797
|
+
if req.url is None:
|
|
798
|
+
self.index.store_metadata(req.package, req.version, None)
|
|
799
|
+
return
|
|
800
|
+
text = await client.get_metadata_text(req.package, req.version, req.url)
|
|
801
|
+
self.index.store_metadata(req.package, req.version, text)
|
|
802
|
+
|
|
803
|
+
async def _fetch_sdist(
|
|
804
|
+
self,
|
|
805
|
+
client: CachedAsyncSimpleClient | LocalIndexClient | MultiIndexClient,
|
|
806
|
+
req: FetchRequest,
|
|
807
|
+
) -> None:
|
|
808
|
+
assert req.version is not None
|
|
809
|
+
assert req.url is not None
|
|
810
|
+
pkg_info, pyproject = await client.get_sdist_files(
|
|
811
|
+
req.package, req.version, req.url
|
|
812
|
+
)
|
|
813
|
+
self.index.store_sdist_metadata(req.package, req.version, pkg_info)
|
|
814
|
+
if pyproject is not None:
|
|
815
|
+
self.index.store_sdist_pyproject(req.package, req.version, pyproject)
|
|
816
|
+
|
|
817
|
+
async def _fetch_sdist_archive(
|
|
818
|
+
self,
|
|
819
|
+
client: CachedAsyncSimpleClient | LocalIndexClient | MultiIndexClient,
|
|
820
|
+
req: FetchRequest,
|
|
821
|
+
) -> None:
|
|
822
|
+
assert req.version is not None
|
|
823
|
+
if req.url is None:
|
|
824
|
+
self.index.store_sdist_archive(req.package, req.version, None)
|
|
825
|
+
return
|
|
826
|
+
data = await client.get_sdist_archive(req.package, req.version, req.url)
|
|
827
|
+
self.index.store_sdist_archive(req.package, req.version, data)
|