fc-data 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. datasmith/__init__.py +330 -0
  2. datasmith/__init__.pyi +194 -0
  3. datasmith/agents/__init__.py +31 -0
  4. datasmith/agents/classifiers.py +272 -0
  5. datasmith/agents/codex.py +25 -0
  6. datasmith/agents/config.py +108 -0
  7. datasmith/agents/extractors.py +197 -0
  8. datasmith/agents/installed/README.md +52 -0
  9. datasmith/agents/installed/__init__.py +22 -0
  10. datasmith/agents/installed/base.py +240 -0
  11. datasmith/agents/installed/claude.py +134 -0
  12. datasmith/agents/installed/codex.py +91 -0
  13. datasmith/agents/installed/gemini.py +118 -0
  14. datasmith/agents/installed/none.py +27 -0
  15. datasmith/agents/sandbox.py +547 -0
  16. datasmith/agents/synthesizer.py +439 -0
  17. datasmith/agents/templates/AGENTS.md.j2 +150 -0
  18. datasmith/agents/templates/sandbox_verify.py +428 -0
  19. datasmith/docker/__init__.py +31 -0
  20. datasmith/docker/context.py +112 -0
  21. datasmith/docker/images.py +158 -0
  22. datasmith/docker/publish.py +56 -0
  23. datasmith/docker/templates/Dockerfile.base +26 -0
  24. datasmith/docker/templates/Dockerfile.pr +42 -0
  25. datasmith/docker/templates/Dockerfile.repo +11 -0
  26. datasmith/docker/templates/docker_build_base.sh +780 -0
  27. datasmith/docker/templates/docker_build_env.sh +309 -0
  28. datasmith/docker/templates/docker_build_final.sh +106 -0
  29. datasmith/docker/templates/docker_build_pkg.sh +99 -0
  30. datasmith/docker/templates/docker_build_run.sh +124 -0
  31. datasmith/docker/templates/entrypoint.sh +62 -0
  32. datasmith/docker/templates/parser.py +1405 -0
  33. datasmith/docker/templates/profile.sh +199 -0
  34. datasmith/docker/templates/pytest_runner.py +692 -0
  35. datasmith/docker/templates/run-tests.sh +197 -0
  36. datasmith/docker/verifiers.py +131 -0
  37. datasmith/filters.py +154 -0
  38. datasmith/github/__init__.py +22 -0
  39. datasmith/github/client.py +333 -0
  40. datasmith/github/hooks.py +50 -0
  41. datasmith/github/links.py +110 -0
  42. datasmith/github/models.py +206 -0
  43. datasmith/github/render.py +173 -0
  44. datasmith/github/search.py +66 -0
  45. datasmith/github/templates/comment.md.j2 +5 -0
  46. datasmith/github/templates/final.md.j2 +66 -0
  47. datasmith/github/templates/issues.md.j2 +21 -0
  48. datasmith/github/templates/repo.md.j2 +1 -0
  49. datasmith/preflight.py +162 -0
  50. datasmith/publish/__init__.py +13 -0
  51. datasmith/publish/huggingface.py +104 -0
  52. datasmith/publish/pipeline.py +60 -0
  53. datasmith/publish/records.py +91 -0
  54. datasmith/py.typed +1 -0
  55. datasmith/resolution/__init__.py +14 -0
  56. datasmith/resolution/blocklist.py +145 -0
  57. datasmith/resolution/cache.py +120 -0
  58. datasmith/resolution/constants.py +277 -0
  59. datasmith/resolution/dependency_resolver.py +174 -0
  60. datasmith/resolution/git_utils.py +378 -0
  61. datasmith/resolution/import_analyzer.py +66 -0
  62. datasmith/resolution/metadata_parser.py +412 -0
  63. datasmith/resolution/models.py +41 -0
  64. datasmith/resolution/orchestrator.py +522 -0
  65. datasmith/resolution/package_filters.py +312 -0
  66. datasmith/resolution/python_manager.py +110 -0
  67. datasmith/runners/__init__.py +15 -0
  68. datasmith/runners/base.py +112 -0
  69. datasmith/runners/classify_prs.py +48 -0
  70. datasmith/runners/render_problems.py +113 -0
  71. datasmith/runners/resolve_packages.py +66 -0
  72. datasmith/runners/scrape_commits.py +166 -0
  73. datasmith/runners/scrape_repos.py +44 -0
  74. datasmith/runners/synthesize_images.py +310 -0
  75. datasmith/update/__init__.py +5 -0
  76. datasmith/update/cli.py +169 -0
  77. datasmith/update/offline.py +173 -0
  78. datasmith/update/pipeline.py +497 -0
  79. datasmith/utils/__init__.py +18 -0
  80. datasmith/utils/core.py +67 -0
  81. datasmith/utils/db.py +156 -0
  82. datasmith/utils/tokens.py +65 -0
  83. fc_data-0.2.0.dist-info/METADATA +441 -0
  84. fc_data-0.2.0.dist-info/RECORD +87 -0
  85. fc_data-0.2.0.dist-info/WHEEL +4 -0
  86. fc_data-0.2.0.dist-info/entry_points.txt +2 -0
  87. fc_data-0.2.0.dist-info/licenses/LICENSE +28 -0
@@ -0,0 +1,780 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ REQUESTED_PY_VERSION="${PY_VERSION-}"
5
+
6
+ usage() {
7
+ local status=${1:-1}
8
+ echo "Usage: $0 [--py-version <major.minor>|<major.minor>]" >&2
9
+ exit "$status"
10
+ }
11
+
12
+ while (($#)); do
13
+ case "$1" in
14
+ --py-version)
15
+ if (($# < 2)); then
16
+ echo "--py-version flag requires a value" >&2
17
+ usage
18
+ fi
19
+ REQUESTED_PY_VERSION="$2"
20
+ shift 2
21
+ ;;
22
+ -h|--help)
23
+ usage 0
24
+ ;;
25
+ *)
26
+ if [[ -z "$REQUESTED_PY_VERSION" ]]; then
27
+ REQUESTED_PY_VERSION="$1"
28
+ shift
29
+ else
30
+ echo "Unexpected argument: $1" >&2
31
+ usage
32
+ fi
33
+ ;;
34
+ esac
35
+ done
36
+
37
+ if [[ -n "$REQUESTED_PY_VERSION" ]]; then
38
+ if [[ ! "$REQUESTED_PY_VERSION" =~ ^[0-9]+\.[0-9]+$ ]]; then
39
+ echo "Invalid Python version '$REQUESTED_PY_VERSION'; expected format <major>.<minor> (e.g. 3.8)" >&2
40
+ exit 1
41
+ fi
42
+ fi
43
+
44
+ # -------- Helpers installed for all shells --------
45
+ install_profile_helpers() {
46
+ cat >/etc/profile.d/asv_utils.sh <<'EOF'
47
+ # asv_utils.sh — login/interactive shell helpers for ASV builds
48
+ export MAMBA_ROOT_PREFIX="${MAMBA_ROOT_PREFIX:-/opt/conda}"
49
+
50
+ # Initialize micromamba for bash shells (no-op if not present)
51
+ if command -v micromamba >/dev/null 2>&1; then
52
+ eval "$(micromamba shell hook --shell=bash)"
53
+ fi
54
+
55
+ # Find and cd into the first directory that contains an asv.*.json
56
+ cd_asv_json_dir() {
57
+ local match
58
+ match=$(find . -type f -name "asv.*.json" | head -n 1)
59
+ if [[ -n "$match" ]]; then
60
+ cd "$(dirname "$match")" || echo "Failed to change directory"
61
+ else
62
+ echo "No 'asv.*.json' file found in current directory or subdirectories."
63
+ return 1
64
+ fi
65
+ }
66
+
67
+ # Return just the conf filename (e.g., asv.conf.json)
68
+ asv_conf_name() {
69
+ local f
70
+ f=$(find . -type f -name "asv.*.json" | head -n 1)
71
+ [[ -n "$f" ]] && basename "$f" || return 1
72
+ }
73
+
74
+ write_vars() {
75
+ local key="$1" value="${2-}" # default to empty if unset to avoid set -u crash
76
+ mkdir -p /etc/asv_env
77
+ # Safely append a properly quoted export line:
78
+ printf 'export %s=%q\n' "$key" "$value" >> /etc/profile.d/asv_build_vars.sh
79
+ }
80
+
81
+ # Build performance knobs (overridable)
82
+ export MAKEFLAGS="${MAKEFLAGS:--j$(nproc)}"
83
+ export CMAKE_BUILD_PARALLEL_LEVEL="${CMAKE_BUILD_PARALLEL_LEVEL:-$(nproc)}"
84
+ export NPY_NUM_BUILD_JOBS="${NPY_NUM_BUILD_JOBS:-$(nproc)}"
85
+
86
+ # Shared uv cache to speed repeated builds
87
+ export UV_CACHE_DIR="${UV_CACHE_DIR:-/opt/uvcache}"
88
+ mkdir -p "$UV_CACHE_DIR"
89
+
90
+ # Legacy pip cache (keeping for compatibility)
91
+ export PIP_CACHE_DIR="${PIP_CACHE_DIR:-/opt/pipcache}"
92
+ mkdir -p "$PIP_CACHE_DIR"
93
+ EOF
94
+ }
95
+
96
+ # -------- Persisted build variables --------
97
+ write_build_vars() {
98
+ local py_versions="$1"
99
+
100
+ mkdir -p /etc/asv_env
101
+ echo "$py_versions" > /etc/asv_env/py_versions
102
+
103
+ # Exported for every future shell (pkg script, interactive, etc.)
104
+ cat >>/etc/profile.d/asv_build_vars.sh <<EOF
105
+ # Auto-generated during docker_build_env.sh
106
+ export ASV_PY_VERSIONS="${py_versions}"
107
+ EOF
108
+ }
109
+
110
+ # Append install-related variables (extras/specs) so the follow-up script can use them.
111
+ append_install_vars() {
112
+ local extras_all="$1"
113
+ local setuppy_cmd="$2"
114
+
115
+ mkdir -p /etc/asv_env
116
+ printf "%s\n" "$extras_all" > /etc/asv_env/extras_all
117
+ printf "%s\n" "$setuppy_cmd" > /etc/asv_env/setuppy_cmd
118
+
119
+ # Export for future shells
120
+ cat >>/etc/profile.d/asv_build_vars.sh <<EOF
121
+ export ALL_EXTRAS="${extras_all}"
122
+ export SAVED_SETUPPY_CMD="${setuppy_cmd}"
123
+ EOF
124
+ }
125
+
126
+ # -------- Install a reusable smoke-check CLI --------
127
+ install_smokecheck() {
128
+ cat >/usr/local/bin/asv_smokecheck.py <<'PY'
129
+ #!/usr/bin/env python
130
+ import argparse, importlib, pathlib, sys
131
+ import importlib.machinery as mach
132
+
133
+ def _strip_ext_suffix(filename: str) -> str:
134
+ # Remove the *full* extension suffix, e.g.
135
+ # ".cpython-310-x86_64-linux-gnu.so", ".abi3.so", ".pyd", etc.
136
+ for suf in mach.EXTENSION_SUFFIXES:
137
+ if filename.endswith(suf):
138
+ return filename[:-len(suf)]
139
+ # Fallback: drop last extension and any remaining ABI tag after the first dot
140
+ stem = pathlib.Path(filename).stem
141
+ return stem.split(".", 1)[0]
142
+
143
+ def import_and_version(name: str):
144
+ m = importlib.import_module(name)
145
+ ver = getattr(m, "__version__", "unknown")
146
+ print(f"{name} imported ok; __version__={ver}")
147
+
148
+ def probe_compiled(name: str, max_ext: int = 10):
149
+ m = importlib.import_module(name)
150
+ if not hasattr(m, "__path__"):
151
+ print("No package __path__ (likely a single-module dist); skipping compiled probe.")
152
+ return
153
+ pkg_path = pathlib.Path(list(m.__path__)[0])
154
+ so_like = list(pkg_path.rglob("*.so")) + list(pkg_path.rglob("*.pyd"))
155
+ failed = []
156
+ for ext in so_like[:max_ext]:
157
+ rel = ext.relative_to(pkg_path)
158
+ parts = list(rel.parts)
159
+ parts[-1] = _strip_ext_suffix(parts[-1]) # replace filename with real module basename
160
+ dotted = ".".join([name] + parts)
161
+ try:
162
+ importlib.import_module(dotted)
163
+ except Exception as e:
164
+ failed.append((dotted, str(e)))
165
+ if failed:
166
+ print("WARNING: Some compiled submodules failed to import:")
167
+ for d, err in failed:
168
+ print(" -", d, "->", err)
169
+ else:
170
+ print("Compiled submodules (if any) import ok")
171
+
172
+ def main():
173
+ p = argparse.ArgumentParser()
174
+ p.add_argument("--import-name", required=True)
175
+ p.add_argument("--repo-root", default=".")
176
+ p.add_argument("--pytest-smoke", action="store_true",
177
+ help="Run a quick pytest smoke: -k 'not slow' --maxfail=1")
178
+ p.add_argument("--max-ext", type=int, default=10)
179
+ args = p.parse_args()
180
+
181
+ import_and_version(args.import_name.strip("\"\' "))
182
+ probe_compiled(args.import_name, max_ext=args.max_ext)
183
+
184
+ if args.pytest_smoke:
185
+ import subprocess, os
186
+ if any((pathlib.Path(args.repo_root)/p).exists() for p in ("tests", "pytest.ini", "pyproject.toml")):
187
+ print("Running pytest smoke...")
188
+ rc = subprocess.call([sys.executable, "-m", "pytest", "-q", "-k", "not slow", "--maxfail=1"], cwd=args.repo_root)
189
+ if rc != 0:
190
+ sys.exit(rc)
191
+ else:
192
+ print("No tests detected; skipping pytest smoke.")
193
+ print("Smokecheck OK ✅")
194
+
195
+ if __name__ == "__main__":
196
+ main()
197
+ PY
198
+ chmod +x /usr/local/bin/asv_smokecheck.py
199
+ }
200
+ install_smokecheck
201
+
202
+ # -------- Install an import-name detector CLI --------
203
+ install_detect_import_name() {
204
+ cat >/usr/local/bin/detect_import_name <<'PY'
205
+ #!/usr/bin/env python
206
+ import argparse, pathlib, re, sys, subprocess, configparser, json
207
+
208
+ # --- optional TOML loader (py3.11+: tomllib; else tomli if available) ---
209
+ try:
210
+ import tomllib as toml
211
+ except Exception:
212
+ try:
213
+ import tomli as toml
214
+ except Exception:
215
+ toml = None
216
+
217
+ EXCEPTIONS = {
218
+ # common dist->import mismatches
219
+ "scikit-learn": "sklearn",
220
+ "opencv-python": "cv2",
221
+ "pyyaml": "yaml",
222
+ "beautifulsoup4": "bs4",
223
+ "pillow": "PIL",
224
+ "mysqlclient": "MySQLdb",
225
+ "psycopg2-binary": "psycopg2",
226
+ "opencv-contrib-python": "cv2",
227
+ "protobuf": "google", # top-level package
228
+ "apache-beam": "apache_beam",
229
+ }
230
+
231
+ # All the package names we typically query.
232
+ EXCEPTIONS.update({
233
+ # --- core scientific stack ---
234
+ "scikit-learn": "sklearn",
235
+ "numpy": "numpy",
236
+ "pandas": "pandas",
237
+ "scipy": "scipy",
238
+ "scikit-image": "skimage",
239
+ "pywt": "pywt",
240
+ "xarray": "xarray",
241
+ "bottleneck": "bottleneck",
242
+ "h5py": "h5py",
243
+ "networkx": "networkx",
244
+ "shapely": "shapely",
245
+ "dask": "dask",
246
+ "distributed": "distributed",
247
+ "joblib": "joblib",
248
+ "astropy": "astropy",
249
+ "pymc3": "pymc3",
250
+
251
+ # --- ML / stats / optimization / viz ---
252
+ "optuna": "optuna",
253
+ "arviz": "arviz",
254
+ "pymc": "pymc",
255
+ "kedro": "kedro",
256
+ "modin": "modin",
257
+ "napari": "napari",
258
+ "deepchecks": "deepchecks",
259
+ "voyager": "voyager", # spotify/voyager
260
+ "warp": "warp", # NVIDIA/warp
261
+ "newton": "newton", # newton-physics/newton
262
+
263
+ # --- domain / ecosystem libs ---
264
+ "geopandas": "geopandas",
265
+ "cartopy": "cartopy",
266
+ "iris": "iris",
267
+ "anndata": "anndata",
268
+ "scanpy": "scanpy",
269
+ "sunpy": "sunpy",
270
+ "pvlib-python": "pvlib",
271
+ "PyBaMM": "pybamm",
272
+ "momepy": "momepy",
273
+ "satpy": "satpy",
274
+ "pydicom": "pydicom",
275
+ "pynetdicom": "pynetdicom",
276
+
277
+ # --- file formats / IO / infra ---
278
+ "asdf": "asdf",
279
+ "arrow": "pyarrow", # apache/arrow
280
+ "ArcticDB": "arcticdb",
281
+ "arctic": "arctic",
282
+
283
+ # --- web / frameworks / utils ---
284
+ "django-components": "django_components",
285
+ "h11": "h11",
286
+ "tqdm": "tqdm",
287
+ "rich": "rich",
288
+ "posthog": "posthog",
289
+ "datalad": "datalad",
290
+ "ipyparallel": "ipyparallel",
291
+
292
+ # --- numerical / symbolic / control ---
293
+ "autograd": "autograd",
294
+ "python-control": "control",
295
+ "loopy": "loopy",
296
+ "thermo": "thermo",
297
+ "chempy": "chempy",
298
+ "adaptive": "adaptive",
299
+
300
+ # --- scientific image / signal ---
301
+ "metric-learn": "metric_learn",
302
+
303
+ # --- quantum / physics ---
304
+ "Cirq": "cirq",
305
+ "memray": "memray",
306
+ "devito": "devito",
307
+
308
+ # --- bio / chem / data ---
309
+ "sourmash": "sourmash",
310
+ "dipy": "dipy",
311
+
312
+ # --- protocol buffers / codegen / outlines ---
313
+ "python-betterproto": "betterproto",
314
+ "outlines": "outlines",
315
+
316
+ # --- DS viz / raster ---
317
+ "datashader": "datashader",
318
+ "xarray-spatial": "xarray_spatial",
319
+
320
+ # --- misc ---
321
+ "enlighten": "enlighten",
322
+ "xorbits": "xorbits",
323
+ "geopandas": "geopandas",
324
+ "lmfit-py": "lmfit",
325
+ "mdanalysis": "MDAnalysis",
326
+ "nilearn": "nilearn",
327
+ })
328
+
329
+
330
+ EXCLUDE_DIRS = {
331
+ ".git", ".hg", ".svn", ".tox", ".nox", ".venv", "venv",
332
+ "build", "dist", "__pycache__", ".mypy_cache", ".pytest_cache",
333
+ "docs", "doc", "site", "examples", "benchmarks", "tests", "testing",
334
+ }
335
+
336
+ def _norm(s: str) -> str:
337
+ return re.sub(r"[-_.]+", "", s).lower()
338
+
339
+ def read_pyproject(root: pathlib.Path):
340
+ cfg = {}
341
+ p = root / "pyproject.toml"
342
+ if toml and p.exists():
343
+ try:
344
+ cfg = toml.loads(p.read_text(encoding="utf-8"))
345
+ except Exception:
346
+ pass
347
+ return cfg
348
+
349
+ def read_setup_cfg(root: pathlib.Path):
350
+ p = root / "setup.cfg"
351
+ cp = configparser.ConfigParser()
352
+ if p.exists():
353
+ try:
354
+ cp.read(p, encoding="utf-8")
355
+ except Exception:
356
+ pass
357
+ return cp
358
+
359
+ def dist_name_from_config(pyproject, setup_cfg):
360
+ # PEP 621 name
361
+ name = (pyproject.get("project", {}) or {}).get("name")
362
+ if not name:
363
+ # setup.cfg [metadata] name
364
+ if setup_cfg.has_section("metadata"):
365
+ name = setup_cfg.get("metadata", "name", fallback=None)
366
+ # setup.py --name as last resort
367
+ return name
368
+
369
+ def package_roots_from_config(root, pyproject, setup_cfg):
370
+ roots = set([root])
371
+ # setuptools package-dir mapping
372
+ # pyproject: [tool.setuptools.package-dir] "" = "src"
373
+ pkgdir = ((pyproject.get("tool", {}) or {}).get("setuptools", {}) or {}).get("package-dir", {})
374
+ if isinstance(pkgdir, dict):
375
+ if "" in pkgdir:
376
+ roots.add((root / pkgdir[""]).resolve())
377
+ for _, d in pkgdir.items():
378
+ try:
379
+ roots.add((root / d).resolve())
380
+ except Exception:
381
+ pass
382
+ # setup.cfg [options] package_dir
383
+ if setup_cfg.has_section("options"):
384
+ raw = setup_cfg.get("options", "package_dir", fallback=None)
385
+ if raw:
386
+ # can be "=\nsrc" or mapping lines
387
+ lines = [l.strip() for l in raw.splitlines() if l.strip()]
388
+ # accept simple "=src" or "" = "src"
389
+ for ln in lines:
390
+ m = re.match(r'^("?\'?)*\s*=?\s*("?\'?)*\s*(?P<path>[^#;]+)$', ln)
391
+ if m:
392
+ roots.add((root / m.group("path").strip()).resolve())
393
+ # setup.cfg [options.packages.find] where
394
+ if setup_cfg.has_section("options.packages.find"):
395
+ where = setup_cfg.get("options.packages.find", "where", fallback=None)
396
+ if where:
397
+ for w in re.split(r"[,\s]+", where):
398
+ if w:
399
+ roots.add((root / w).resolve())
400
+ return [r for r in roots if r.exists()]
401
+
402
+ def explicit_modules_from_config(pyproject, setup_cfg):
403
+ mods = set()
404
+ # pyproject (tool.setuptools) py-modules / packages
405
+ st = ((pyproject.get("tool", {}) or {}).get("setuptools", {}) or {})
406
+ for key in ("py-modules", "packages"):
407
+ val = st.get(key)
408
+ if isinstance(val, list):
409
+ mods.update(val)
410
+ # setup.cfg [options] py_modules / packages
411
+ if setup_cfg.has_section("options"):
412
+ for key in ("py_modules", "packages"):
413
+ raw = setup_cfg.get("options", key, fallback=None)
414
+ if raw:
415
+ for tok in re.split(r"[\s,]+", raw.strip()):
416
+ if tok and tok != "find:":
417
+ mods.add(tok)
418
+ return sorted(mods)
419
+
420
+ def read_top_level_from_egg_info(root):
421
+ # editable installs often leave ./<name>.egg-info/top_level.txt
422
+ for ei in root.rglob("*.egg-info"):
423
+ tl = ei / "top_level.txt"
424
+ if tl.exists():
425
+ try:
426
+ names = [l.strip() for l in tl.read_text(encoding="utf-8").splitlines() if l.strip()]
427
+ if names:
428
+ return names
429
+ except Exception:
430
+ pass
431
+ # also consider dist-info during local builds
432
+ for di in root.rglob("*.dist-info"):
433
+ tl = di / "top_level.txt"
434
+ if tl.exists():
435
+ try:
436
+ names = [l.strip() for l in tl.read_text(encoding="utf-8").splitlines() if l.strip()]
437
+ if names:
438
+ return names
439
+ except Exception:
440
+ pass
441
+ return None
442
+
443
+ def walk_candidates(roots):
444
+ """Return set of plausible top-level import names under candidate roots."""
445
+ cands = set()
446
+ for r in roots:
447
+ for path in r.rglob("__init__.py"):
448
+ try:
449
+ pkg_dir = path.parent
450
+ # skip excluded dirs anywhere in the path
451
+ if any(part in EXCLUDE_DIRS for part in pkg_dir.parts):
452
+ continue
453
+ # Construct package name relative to the nearest search root
454
+ try:
455
+ rel = pkg_dir.relative_to(r)
456
+ except Exception:
457
+ continue
458
+ if not rel.parts:
459
+ continue
460
+ top = rel.parts[0]
461
+ if top.startswith("_"):
462
+ # usually private tooling
463
+ continue
464
+ cands.add(top)
465
+ except Exception:
466
+ pass
467
+ # standalone modules at top-level of roots (py_modules case)
468
+ for mod in r.glob("*.py"):
469
+ if mod.stem not in ("setup",):
470
+ cands.add(mod.stem)
471
+ return sorted(cands)
472
+
473
+ def score_candidates(cands, dist_name):
474
+ """Assign a score preferring names that match the dist name."""
475
+ scores = {}
476
+ n_dist = _norm(dist_name) if dist_name else None
477
+ prefer = None
478
+ if dist_name and dist_name.lower() in EXCEPTIONS:
479
+ prefer = EXCEPTIONS[dist_name.lower()]
480
+ # also try normalized exception keys (e.g. capitalization)
481
+ for k, v in EXCEPTIONS.items():
482
+ if _norm(k) == _norm(dist_name or ""):
483
+ prefer = v
484
+
485
+ for c in cands:
486
+ s = 0
487
+ if prefer and _norm(c) == _norm(prefer):
488
+ s += 100
489
+ if n_dist and _norm(c) == n_dist:
490
+ s += 80
491
+ if n_dist and (_norm(c).startswith(n_dist) or n_dist.startswith(_norm(c))):
492
+ s += 20
493
+ # shorter, simpler names get a slight bump
494
+ s += max(0, 10 - len(c))
495
+ scores[c] = s
496
+ return sorted(cands, key=lambda x: (-scores.get(x, 0), x)), scores
497
+
498
+ def detect(root: str, return_all=False):
499
+ root = pathlib.Path(root).resolve()
500
+
501
+ pyproject = read_pyproject(root)
502
+ setup_cfg = read_setup_cfg(root)
503
+ dist_name = dist_name_from_config(pyproject, setup_cfg)
504
+
505
+ # 1) top_level.txt (best signal if present)
506
+ top = read_top_level_from_egg_info(root)
507
+ if top:
508
+ if return_all:
509
+ return top
510
+ # If multiple, score them
511
+ ordered, _ = score_candidates(top, dist_name or "")
512
+ return [ordered[0]]
513
+
514
+ # 2) explicit declarations (py_modules / packages lists)
515
+ explicit = explicit_modules_from_config(pyproject, setup_cfg)
516
+
517
+ # 3) find correct search roots (src layout, package_dir, etc.)
518
+ roots = package_roots_from_config(root, pyproject, setup_cfg)
519
+
520
+ # 4) walk code to infer candidates
521
+ walked = walk_candidates(roots)
522
+
523
+ # merge explicit + walked
524
+ cands = list(dict.fromkeys(explicit + walked)) # keep order & de-dup
525
+
526
+ # 5) fallback from dist name heuristics/exceptions if still empty
527
+ if not cands and dist_name:
528
+ # exception or simple normalization
529
+ guess = EXCEPTIONS.get(dist_name.lower()) or re.sub(r"[-\.]+", "_", dist_name)
530
+ cands = [guess]
531
+
532
+ if not cands:
533
+ return []
534
+
535
+ if return_all:
536
+ # return ordered list
537
+ ordered, _ = score_candidates(cands, dist_name or "")
538
+ return ordered
539
+ else:
540
+ ordered, _ = score_candidates(cands, dist_name or "")
541
+ return [ordered[0]]
542
+
543
+ def main():
544
+ ap = argparse.ArgumentParser(description="Detect the top-level Python import name for a repo.")
545
+ ap.add_argument("--repo-root", default=".", help="Path to repository root")
546
+ ap.add_argument("--all", action="store_true", help="Print all plausible names (JSON list)")
547
+ args = ap.parse_args()
548
+
549
+ names = detect(args.repo_root, return_all=args.all)
550
+ if not names:
551
+ sys.exit(1)
552
+ if args.all:
553
+ print(json.dumps(names))
554
+ else:
555
+ print(names[0])
556
+
557
+ if __name__ == "__main__":
558
+ main()
559
+ PY
560
+ chmod +x /usr/local/bin/detect_import_name
561
+ }
562
+
563
+ install_detect_import_name
564
+
565
+ install_detect_extras() {
566
+ cat >/usr/local/bin/detect_extras <<'PY'
567
+ #!/usr/bin/env python
568
+ """
569
+ Emit space-separated extras discovered in a repo.
570
+ Sources:
571
+ - pyproject.toml -> [project.optional-dependencies] / [tool.poetry.extras]
572
+ - setup.cfg -> [options.extras_require]
573
+ - setup.py -> via `egg_info` then parse *.egg-info/{PKG-INFO,requires.txt}
574
+ """
575
+ import argparse, pathlib, sys, subprocess, configparser, re
576
+ try:
577
+ import tomllib as toml
578
+ except Exception:
579
+ try:
580
+ import tomli as toml
581
+ except Exception:
582
+ toml = None
583
+
584
+ def read_pyproject(root: pathlib.Path):
585
+ p = root / "pyproject.toml"
586
+ if toml and p.exists():
587
+ try:
588
+ return toml.loads(p.read_text(encoding="utf-8"))
589
+ except Exception:
590
+ pass
591
+ return {}
592
+
593
+ def read_setup_cfg(root: pathlib.Path):
594
+ p = root / "setup.cfg"
595
+ cp = configparser.ConfigParser()
596
+ if p.exists():
597
+ try:
598
+ cp.read(p, encoding="utf-8")
599
+ except Exception:
600
+ pass
601
+ return cp
602
+
603
+ def extras_from_pyproject(pyproject):
604
+ names = set()
605
+ proj = (pyproject.get("project", {}) or {})
606
+ opt = proj.get("optional-dependencies", {}) or {}
607
+ names.update(opt.keys())
608
+ poetry = ((pyproject.get("tool", {}) or {}).get("poetry", {}) or {}).get("extras", {}) or {}
609
+ names.update(poetry.keys())
610
+ return names
611
+
612
+ def extras_from_setup_cfg(setup_cfg):
613
+ names = set()
614
+ sec = "options.extras_require"
615
+ if setup_cfg.has_section(sec):
616
+ names.update(setup_cfg.options(sec))
617
+ return names
618
+
619
+ def ensure_egg_info(root: pathlib.Path):
620
+ if (root / "setup.py").exists():
621
+ try:
622
+ subprocess.run([sys.executable, "setup.py", "-q", "egg_info"],
623
+ cwd=root, check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
624
+ except Exception:
625
+ pass
626
+
627
+ def extras_from_egg_info(root: pathlib.Path):
628
+ names=set()
629
+ for ei in root.glob("*.egg-info"):
630
+ pkgi = ei / "PKG-INFO"
631
+ if pkgi.exists():
632
+ try:
633
+ for line in pkgi.read_text(encoding="utf-8", errors="ignore").splitlines():
634
+ if line.startswith("Provides-Extra:"):
635
+ names.add(line.split(":",1)[1].strip())
636
+ except Exception:
637
+ pass
638
+ req = ei / "requires.txt"
639
+ if req.exists():
640
+ try:
641
+ for line in req.read_text(encoding="utf-8", errors="ignore").splitlines():
642
+ m = re.match(r"^\[(.+)\]$", line.strip())
643
+ if m:
644
+ names.add(m.group(1).strip())
645
+ except Exception:
646
+ pass
647
+ return names
648
+
649
+ def main():
650
+ ap = argparse.ArgumentParser()
651
+ ap.add_argument("--repo-root", default=".")
652
+ args = ap.parse_args()
653
+ root = pathlib.Path(args.repo_root).resolve()
654
+
655
+ pyproject = read_pyproject(root)
656
+ setup_cfg = read_setup_cfg(root)
657
+
658
+ names = set()
659
+ names |= extras_from_pyproject(pyproject)
660
+ names |= extras_from_setup_cfg(setup_cfg)
661
+
662
+ if (root / "setup.py").exists():
663
+ ensure_egg_info(root)
664
+ names |= extras_from_egg_info(root)
665
+
666
+ # Print space-separated (sorted) list; empty output if none
667
+ if names:
668
+ print(" ".join(sorted(names)))
669
+ else:
670
+ print("", end="")
671
+
672
+ if __name__ == "__main__":
673
+ main()
674
+ PY
675
+ chmod +x /usr/local/bin/detect_extras
676
+ }
677
+ install_detect_extras
678
+
679
+ # -------- System bootstrap (idempotent) --------
680
+ # When run from a bare image (e.g. ubuntu:22.04) these install the
681
+ # toolchain that Dockerfile.base otherwise provides inline.
682
+ # Each guard checks whether the tool is already present so the block
683
+ # is a no-op when the Dockerfile already set things up.
684
+
685
+ export DEBIAN_FRONTEND=noninteractive
686
+
687
+ if ! command -v cmake >/dev/null 2>&1; then
688
+ echo "[docker_build_base] Installing system packages..."
689
+ apt-get update && apt-get install -y --no-install-recommends \
690
+ build-essential gcc g++ gfortran git curl wget ca-certificates \
691
+ jq cmake ninja-build libopenmpi-dev libgeos-dev pkg-config \
692
+ graphviz libgraphviz-dev libpq-dev \
693
+ libgl1 libegl1 libglib2.0-0 libxkbcommon0 libdbus-1-3 \
694
+ libfontconfig1 libxrender1 libxext6 \
695
+ python3 python3-pip python3-dev && \
696
+ rm -rf /var/lib/apt/lists/*
697
+ fi
698
+
699
+ if ! command -v rustc >/dev/null 2>&1; then
700
+ echo "[docker_build_base] Installing Rust toolchain..."
701
+ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
702
+ export PATH="/root/.cargo/bin:${PATH}"
703
+ fi
704
+
705
+ export MAMBA_ROOT_PREFIX="${MAMBA_ROOT_PREFIX:-/opt/conda}"
706
+
707
+ if ! command -v micromamba >/dev/null 2>&1; then
708
+ echo "[docker_build_base] Installing micromamba..."
709
+ curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest \
710
+ | tar -xvj -C /usr/local/bin --strip-components=1 bin/micromamba
711
+ micromamba shell init --shell=bash --root-prefix="$MAMBA_ROOT_PREFIX"
712
+ fi
713
+
714
+ export PATH="/opt/conda/bin:/root/.cargo/bin:/root/.local/bin:${PATH}"
715
+
716
+ if ! command -v uv >/dev/null 2>&1; then
717
+ echo "[docker_build_base] Installing uv..."
718
+ curl -LsSf https://astral.sh/uv/install.sh | sh
719
+ fi
720
+
721
+ # Ensure base conda env has python + git + asv
722
+ if ! micromamba env list 2>/dev/null | awk '{print $1}' | grep -qx "base"; then
723
+ micromamba install -y -p "$MAMBA_ROOT_PREFIX" -c conda-forge \
724
+ python=3.10 git asv pyperf mamba conda libmambapy && \
725
+ micromamba clean --all --yes
726
+ fi
727
+
728
+ mkdir -p /workspace /output
729
+
730
+ # -------- Script body --------
731
+
732
+ install_profile_helpers
733
+ # shellcheck disable=SC1091
734
+ source /etc/profile.d/asv_utils.sh
735
+
736
+ # Ensure base micromamba is active for introspecting ASV config
737
+ micromamba activate base
738
+
739
+ # Minimal tools in base to parse metadata (pyproject & egg-info)
740
+ micromamba install -y -n base -c conda-forge python tomli setuptools >/dev/null
741
+
742
+ # Create the per-version envs with common build deps & ASV
743
+ if [[ -n "$REQUESTED_PY_VERSION" ]]; then
744
+ PY_VERSIONS="$REQUESTED_PY_VERSION"
745
+ echo "[docker_build_base] Restricting micromamba env creation to Python $PY_VERSIONS"
746
+ else
747
+ PY_VERSIONS="3.7 3.8 3.9 3.10 3.11 3.12"
748
+ echo "[docker_build_base] Building micromamba envs for Python versions: $PY_VERSIONS"
749
+ fi
750
+ write_build_vars "$PY_VERSIONS"
751
+ for version in $PY_VERSIONS; do
752
+ ENV_NAME="asv_${version}"
753
+
754
+ if ! micromamba env list | awk '{print $1}' | grep -qx "$ENV_NAME"; then
755
+ micromamba create -y -n "$ENV_NAME" -c conda-forge "python=$version"
756
+ fi
757
+
758
+ # Generic toolchain useful for many compiled projects (installed once here)
759
+ micromamba install -y -n "$ENV_NAME" -c conda-forge \
760
+ pip git conda mamba "libmambapy<=1.9.9" \
761
+ cython fakeredis threadpoolctl \
762
+ compilers meson-python cmake ninja pkg-config tomli
763
+
764
+ # install hypothesis<7 if python<3.9
765
+ PYTHON_LT_39=$(micromamba run -n "$ENV_NAME" python -c 'import sys; print(sys.version_info < (3,9))')
766
+ PYTHON_BIN="/opt/conda/envs/$ENV_NAME/bin/python"
767
+ if [ "$PYTHON_LT_39" = "True" ]; then
768
+ # uv pip install --python "$PYTHON_BIN" "Cython<3" "setuptools<70" "wheel>=0.38" >/dev/null 2>&1 || true
769
+ uv pip install --python "$PYTHON_BIN" "hypothesis<5" pytest versioneer >/dev/null 2>&1 || true
770
+ # uv pip install --python "$PYTHON_BIN" --upgrade pip "setuptools>79" wheel pytest asv
771
+ uv pip install --python "$PYTHON_BIN" --upgrade asv
772
+ else
773
+ uv pip install --python "$PYTHON_BIN" hypothesis pytest versioneer >/dev/null 2>&1 || true
774
+ # uv pip install --python "$PYTHON_BIN" --upgrade pip "setuptools>79" wheel pytest
775
+ uv pip install --python "$PYTHON_BIN" git+https://github.com/airspeed-velocity/asv
776
+ fi
777
+
778
+ done
779
+
780
+ echo "Base environment setup complete."