alibuild 1.17.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. alibuild-1.17.19.data/scripts/aliBuild +137 -0
  2. alibuild-1.17.19.data/scripts/aliDeps +7 -0
  3. alibuild-1.17.19.data/scripts/aliDoctor +7 -0
  4. alibuild-1.17.19.data/scripts/alienv +344 -0
  5. alibuild-1.17.19.data/scripts/pb +7 -0
  6. alibuild-1.17.19.dist-info/METADATA +78 -0
  7. alibuild-1.17.19.dist-info/RECORD +74 -0
  8. alibuild-1.17.19.dist-info/WHEEL +5 -0
  9. alibuild-1.17.19.dist-info/licenses/LICENSE.md +674 -0
  10. alibuild-1.17.19.dist-info/top_level.txt +5 -0
  11. alibuild_helpers/__init__.py +21 -0
  12. alibuild_helpers/_version.py +21 -0
  13. alibuild_helpers/analytics.py +120 -0
  14. alibuild_helpers/args.py +493 -0
  15. alibuild_helpers/build.py +1209 -0
  16. alibuild_helpers/build_template.sh +314 -0
  17. alibuild_helpers/clean.py +83 -0
  18. alibuild_helpers/cmd.py +154 -0
  19. alibuild_helpers/deps.py +116 -0
  20. alibuild_helpers/doctor.py +195 -0
  21. alibuild_helpers/git.py +104 -0
  22. alibuild_helpers/init.py +103 -0
  23. alibuild_helpers/log.py +132 -0
  24. alibuild_helpers/scm.py +31 -0
  25. alibuild_helpers/sl.py +62 -0
  26. alibuild_helpers/sync.py +693 -0
  27. alibuild_helpers/templating_plugin.py +18 -0
  28. alibuild_helpers/utilities.py +662 -0
  29. alibuild_helpers/workarea.py +179 -0
  30. debian/changelog +11 -0
  31. debian/compat +1 -0
  32. debian/control +14 -0
  33. debian/copyright +10 -0
  34. debian/files +1 -0
  35. debian/rules +7 -0
  36. docs/README.md +1 -0
  37. docs/SUPPORT +3 -0
  38. docs/docs/alice_logo.png +0 -0
  39. docs/docs/deps.png +0 -0
  40. docs/docs/index.md +75 -0
  41. docs/docs/quick.md +89 -0
  42. docs/docs/reference.md +430 -0
  43. docs/docs/stylesheets/extra.css +9 -0
  44. docs/docs/troubleshooting.md +346 -0
  45. docs/docs/user.md +413 -0
  46. docs/mkdocs.yml +37 -0
  47. templates/alibuild_to_please.jnj +63 -0
  48. tests/test_analytics.py +42 -0
  49. tests/test_args.py +119 -0
  50. tests/test_build.py +426 -0
  51. tests/test_clean.py +154 -0
  52. tests/test_cmd.py +73 -0
  53. tests/test_deps.py +79 -0
  54. tests/test_doctor.py +128 -0
  55. tests/test_git.py +48 -0
  56. tests/test_hashing.py +67 -0
  57. tests/test_init.py +103 -0
  58. tests/test_log.py +50 -0
  59. tests/test_packagelist.py +235 -0
  60. tests/test_parseRecipe.py +132 -0
  61. tests/test_sync.py +332 -0
  62. tests/test_utilities.py +383 -0
  63. tests/test_workarea.py +101 -0
  64. tests/testdist/broken1.sh +1 -0
  65. tests/testdist/broken2.sh +1 -0
  66. tests/testdist/broken3.sh +3 -0
  67. tests/testdist/broken4.sh +2 -0
  68. tests/testdist/broken5.sh +2 -0
  69. tests/testdist/broken6.sh +2 -0
  70. tests/testdist/broken7.sh +5 -0
  71. tests/testdist/clobber-initdotsh.sh +4 -0
  72. tests/testdist/defaults-o2.sh +10 -0
  73. tests/testdist/delete-etc.sh +4 -0
  74. tests/testdist/tracking-env.sh +6 -0
@@ -0,0 +1,693 @@
1
+ """Sync backends for alibuild."""
2
+
3
+ import glob
4
+ import os
5
+ import os.path
6
+ import re
7
+ import sys
8
+ import time
9
+ import requests
10
+ from requests.exceptions import RequestException
11
+
12
+ from alibuild_helpers.cmd import execute
13
+ from alibuild_helpers.log import debug, info, error, dieOnError, ProgressPrint
14
+ from alibuild_helpers.utilities import resolve_store_path, resolve_links_path, symlink
15
+
16
+
17
+ def remote_from_url(read_url, write_url, architecture, work_dir, insecure=False):
18
+ """Parse remote store URLs and return the correct RemoteSync instance for them."""
19
+ if read_url.startswith("http"):
20
+ return HttpRemoteSync(read_url, architecture, work_dir, insecure)
21
+ if read_url.startswith("s3://"):
22
+ return S3RemoteSync(read_url, write_url, architecture, work_dir)
23
+ if read_url.startswith("b3://"):
24
+ return Boto3RemoteSync(read_url, write_url, architecture, work_dir)
25
+ if read_url.startswith("cvmfs://"):
26
+ return CVMFSRemoteSync(read_url, None, architecture, work_dir)
27
+ if read_url:
28
+ return RsyncRemoteSync(read_url, write_url, architecture, work_dir)
29
+ return NoRemoteSync()
30
+
31
+
32
+ class NoRemoteSync:
33
+ """Helper class which does not do anything to sync"""
34
+ def fetch_symlinks(self, spec) -> None:
35
+ pass
36
+ def fetch_tarball(self, spec) -> None:
37
+ pass
38
+ def upload_symlinks_and_tarball(self, spec) -> None:
39
+ pass
40
+
41
+ class PartialDownloadError(Exception):
42
+ def __init__(self, downloaded, size) -> None:
43
+ self.downloaded = downloaded
44
+ self.size = size
45
+ def __str__(self):
46
+ return "only %d out of %d bytes downloaded" % (self.downloaded, self.size)
47
+
48
+
49
+ class HttpRemoteSync:
50
+ def __init__(self, remoteStore, architecture, workdir, insecure) -> None:
51
+ self.remoteStore = remoteStore
52
+ self.writeStore = ""
53
+ self.architecture = architecture
54
+ self.workdir = workdir
55
+ self.insecure = insecure
56
+ self.httpTimeoutSec = 15
57
+ self.httpConnRetries = 4
58
+ self.httpBackoff = 0.4
59
+
60
+ def getRetry(self, url, dest=None, returnResult=False, log=True, session=None, progress=debug):
61
+ get = session.get if session is not None else requests.get
62
+ for i in range(0, self.httpConnRetries):
63
+ if i > 0:
64
+ pauseSec = self.httpBackoff * (2 ** (i - 1))
65
+ debug("GET %s failed: retrying in %.2f", url, pauseSec)
66
+ time.sleep(pauseSec)
67
+ # If the download has failed, enable debug output, even if it was
68
+ # disabled before. We disable debug output for e.g. symlink downloads
69
+ # to make sure the output log isn't overwhelmed. If the download
70
+ # failed, we want to know about it, though. Note that aliBuild has to
71
+ # be called with --debug for this to take effect.
72
+ log = True
73
+ try:
74
+ if log:
75
+ debug("GET %s: processing (attempt %d/%d)", url, i+1, self.httpConnRetries)
76
+ if dest or returnResult:
77
+ # Destination specified -- file (dest) or buffer (returnResult).
78
+ # Use requests in stream mode
79
+ resp = get(url, stream=True, verify=not self.insecure, timeout=self.httpTimeoutSec)
80
+ size = int(resp.headers.get("content-length", "-1"))
81
+ downloaded = 0
82
+ reportTime = time.time()
83
+ result = []
84
+
85
+ try:
86
+ destFp = open(dest+".tmp", "wb") if dest else None
87
+ for chunk in filter(bool, resp.iter_content(chunk_size=32768)):
88
+ if destFp:
89
+ destFp.write(chunk)
90
+ if returnResult:
91
+ result.append(chunk)
92
+ downloaded += len(chunk)
93
+ if log and size != -1:
94
+ now = time.time()
95
+ if downloaded == size:
96
+ progress("[100%%] Download complete")
97
+ elif now - reportTime > 1:
98
+ progress("[%.0f%%] downloaded...", 100 * downloaded / size)
99
+ reportTime = now
100
+ finally:
101
+ if destFp:
102
+ destFp.close()
103
+
104
+ if size not in (downloaded, -1):
105
+ raise PartialDownloadError(downloaded, size)
106
+ if dest:
107
+ os.rename(dest+".tmp", dest) # we should not have errors here
108
+ return b''.join(result) if returnResult else True
109
+ else:
110
+ # For CERN S3 we need to construct the JSON ourself...
111
+ s3Request = re.match("https://s3.cern.ch/swift/v1[/]+([^/]*)/(.*)$", url)
112
+ if s3Request:
113
+ [bucket, prefix] = s3Request.groups()
114
+ url = "https://s3.cern.ch/swift/v1/%s/?prefix=%s" % (bucket, prefix.lstrip("/"))
115
+ resp = get(url, verify=not self.insecure, timeout=self.httpTimeoutSec)
116
+ if resp.status_code == 404:
117
+ # No need to retry any further
118
+ return None
119
+ resp.raise_for_status()
120
+ return [{"name": os.path.basename(x), "type": "file"}
121
+ for x in resp.text.split()]
122
+ else:
123
+ # No destination specified: JSON request
124
+ resp = get(url, verify=not self.insecure, timeout=self.httpTimeoutSec)
125
+ if resp.status_code == 404:
126
+ # No need to retry any further
127
+ return None
128
+ resp.raise_for_status()
129
+ return resp.json()
130
+ except (RequestException,ValueError,PartialDownloadError) as e:
131
+ if i == self.httpConnRetries-1:
132
+ error("GET %s failed: %s", url, e)
133
+ if dest:
134
+ try:
135
+ os.unlink(dest+".tmp")
136
+ except:
137
+ pass
138
+ return None
139
+
140
+ def fetch_tarball(self, spec) -> None:
141
+ # Check for any existing tarballs we can use instead of fetching new ones.
142
+ for pkg_hash in spec["remote_hashes"]:
143
+ try:
144
+ have_tarballs = os.listdir(os.path.join(
145
+ self.workdir, resolve_store_path(self.architecture, pkg_hash)))
146
+ except OSError: # store path not readable
147
+ continue
148
+ for tarball in have_tarballs:
149
+ if re.match(r"^{package}-{version}-[0-9]+\.{arch}\.tar\.gz$".format(
150
+ package=re.escape(spec["package"]),
151
+ version=re.escape(spec["version"]),
152
+ arch=re.escape(self.architecture),
153
+ ), os.path.basename(tarball)):
154
+ debug("Previously downloaded tarball for %s with hash %s, reusing",
155
+ spec["package"], pkg_hash)
156
+ return
157
+
158
+ with requests.Session() as session:
159
+ debug("Updating remote store for package %s; trying hashes %s",
160
+ spec["package"], ", ".join(spec["remote_hashes"]))
161
+ store_path = use_tarball = None
162
+ # Find the first tarball that matches any possible hash and fetch it.
163
+ for pkg_hash in spec["remote_hashes"]:
164
+ store_path = resolve_store_path(self.architecture, pkg_hash)
165
+ tarballs = self.getRetry("%s/%s/" % (self.remoteStore, store_path),
166
+ session=session)
167
+ if tarballs:
168
+ use_tarball = tarballs[0]["name"]
169
+ break
170
+
171
+ if store_path is None or use_tarball is None:
172
+ debug("Nothing fetched for %s (%s)", spec["package"],
173
+ ", ".join(spec["remote_hashes"]))
174
+ return
175
+
176
+ os.makedirs(os.path.join(self.workdir, store_path), exist_ok=True)
177
+
178
+ destPath = os.path.join(self.workdir, store_path, use_tarball)
179
+ if not os.path.isfile(destPath): # do not download twice
180
+ progress = ProgressPrint("Downloading tarball for %s@%s" %
181
+ (spec["package"], spec["version"]))
182
+ progress("[0%%] Starting download of %s", use_tarball) # initialise progress bar
183
+ self.getRetry("/".join((self.remoteStore, store_path, use_tarball)),
184
+ destPath, session=session, progress=progress)
185
+ progress.end("done")
186
+
187
+ def fetch_symlinks(self, spec) -> None:
188
+ links_path = resolve_links_path(self.architecture, spec["package"])
189
+ os.makedirs(os.path.join(self.workdir, links_path), exist_ok=True)
190
+
191
+ # If we already have a symlink we can use, don't update the list. This
192
+ # speeds up rebuilds significantly.
193
+ if any(f"/{pkg_hash[:2]}/{pkg_hash}/" in target
194
+ for target in (os.readlink(os.path.join(self.workdir, links_path, link))
195
+ for link in os.listdir(os.path.join(self.workdir, links_path)))
196
+ for pkg_hash in spec["remote_hashes"]):
197
+ debug("Found symlink for %s@%s, not updating", spec["package"], spec["version"])
198
+ return
199
+
200
+ with requests.Session() as session:
201
+ # Fetch manifest file with initial symlinks. This file is updated
202
+ # regularly; we use it to avoid many small network requests.
203
+ manifest = self.getRetry("%s/%s.manifest" % (self.remoteStore, links_path),
204
+ returnResult=True, session=session)
205
+ symlinks = {
206
+ linkname.decode("utf-8"): target.decode("utf-8")
207
+ for linkname, sep, target in (line.partition(b"\t")
208
+ for line in manifest.splitlines())
209
+ if sep and linkname and target
210
+ }
211
+ # Now add any remaining symlinks that aren't in the manifest yet. There
212
+ # should always be relatively few of these, as the separate network
213
+ # requests are a bit expensive.
214
+ for link in self.getRetry("%s/%s/" % (self.remoteStore, links_path),
215
+ session=session):
216
+ linkname = link["name"]
217
+ if linkname in symlinks:
218
+ # This symlink is already present in the manifest.
219
+ continue
220
+ if os.path.islink(os.path.join(self.workdir, links_path, linkname)):
221
+ # We have this symlink locally. With local revisions, we won't produce
222
+ # revisions that will conflict with remote revisions unless we upload
223
+ # them anyway, so there's no need to redownload.
224
+ continue
225
+ # This symlink isn't in the manifest yet, and we don't have it locally,
226
+ # so download it individually.
227
+ symlinks[linkname] = \
228
+ self.getRetry("/".join((self.remoteStore, links_path, linkname)),
229
+ returnResult=True, log=False, session=session) \
230
+ .decode("utf-8").rstrip("\r\n")
231
+ for linkname, target in symlinks.items():
232
+ symlink("../../" + target.lstrip("./"),
233
+ os.path.join(self.workdir, links_path, linkname))
234
+
235
+ def upload_symlinks_and_tarball(self, spec) -> None:
236
+ pass
237
+
238
+
239
+ class RsyncRemoteSync:
240
+ """Helper class to sync package build directory using RSync."""
241
+
242
+ def __init__(self, remoteStore, writeStore, architecture, workdir) -> None:
243
+ self.remoteStore = re.sub("^ssh://", "", remoteStore)
244
+ self.writeStore = re.sub("^ssh://", "", writeStore)
245
+ self.architecture = architecture
246
+ self.workdir = workdir
247
+
248
+ def fetch_tarball(self, spec) -> None:
249
+ info("Downloading tarball for %s@%s, if available", spec["package"], spec["version"])
250
+ debug("Updating remote store for package %s with hashes %s", spec["package"],
251
+ ", ".join(spec["remote_hashes"]))
252
+ err = execute("""\
253
+ for storePath in {storePaths}; do
254
+ # Only get the first matching tarball. If there are multiple with the
255
+ # same hash, we only need one and they should be interchangeable.
256
+ if tars=$(rsync -s --list-only "{remoteStore}/$storePath/{pkg}-{ver}-*.{arch}.tar.gz" 2>/dev/null) &&
257
+ # Strip away the metadata in rsync's file listing, leaving only the first filename.
258
+ tar=$(echo "$tars" | sed -rn '1s#[- a-z0-9,/]* [0-9]{{2}}:[0-9]{{2}}:[0-9]{{2}} ##p') &&
259
+ mkdir -p "{workDir}/$storePath" &&
260
+ # If we already have a file with the same name, assume it's up to date
261
+ # with the remote. In reality, we'll have unpacked, relocated and
262
+ # repacked the tarball from the remote, so the file differs, but
263
+ # there's no point in downloading the one from the remote again.
264
+ rsync -vW --ignore-existing "{remoteStore}/$storePath/$tar" "{workDir}/$storePath/"
265
+ then
266
+ break
267
+ fi
268
+ done
269
+ """.format(pkg=spec["package"], ver=spec["version"], arch=self.architecture,
270
+ remoteStore=self.remoteStore,
271
+ workDir=self.workdir,
272
+ storePaths=" ".join(resolve_store_path(self.architecture, pkg_hash)
273
+ for pkg_hash in spec["remote_hashes"])))
274
+ dieOnError(err, "Unable to fetch tarball from specified store.")
275
+
276
+ def fetch_symlinks(self, spec) -> None:
277
+ links_path = resolve_links_path(self.architecture, spec["package"])
278
+ os.makedirs(os.path.join(self.workdir, links_path), exist_ok=True)
279
+ err = execute("rsync -rlvW --delete {remote_store}/{links_path}/ {workdir}/{links_path}/".format(
280
+ remote_store=self.remoteStore,
281
+ links_path=links_path,
282
+ workdir=self.workdir,
283
+ ))
284
+ dieOnError(err, "Unable to fetch symlinks from specified store.")
285
+
286
+ def upload_symlinks_and_tarball(self, spec) -> None:
287
+ if not self.writeStore:
288
+ return
289
+ dieOnError(execute("""\
290
+ set -e
291
+ cd {workdir}
292
+ tarball={package}-{version}-{revision}.{arch}.tar.gz
293
+ rsync -avR --ignore-existing "{links_path}/$tarball" {remote}/
294
+ for link_dir in dist dist-direct dist-runtime; do
295
+ rsync -avR --ignore-existing "TARS/{arch}/$link_dir/{package}/{package}-{version}-{revision}/" {remote}/
296
+ done
297
+ rsync -avR --ignore-existing "{store_path}/$tarball" {remote}/
298
+ """.format(
299
+ workdir=self.workdir,
300
+ remote=self.remoteStore,
301
+ store_path=resolve_store_path(self.architecture, spec["hash"]),
302
+ links_path=resolve_links_path(self.architecture, spec["package"]),
303
+ arch=self.architecture,
304
+ package=spec["package"],
305
+ version=spec["version"],
306
+ revision=spec["revision"],
307
+ )), "Unable to upload tarball.")
308
+
309
+ class CVMFSRemoteSync:
310
+ """ Sync packages build directory from CVMFS or similar
311
+ FS based deployment. The tarball will be created on the fly with a single
312
+ symlink to the remote store in it, so that unpacking really
313
+ means unpacking the symlink to the wanted package.
314
+ """
315
+
316
+ def __init__(self, remoteStore, writeStore, architecture, workdir) -> None:
317
+ self.remoteStore = re.sub("^cvmfs://", "", remoteStore)
318
+ # We do not support uploading directly to CVMFS, for obvious
319
+ # reasons.
320
+ assert(writeStore is None)
321
+ self.writeStore = None
322
+ self.architecture = architecture
323
+ self.workdir = workdir
324
+
325
+ def fetch_tarball(self, spec) -> None:
326
+ info("Downloading tarball for %s@%s-%s, if available", spec["package"], spec["version"], spec["revision"])
327
+ # If we already have a tarball with any equivalent hash, don't check S3.
328
+ for pkg_hash in spec["remote_hashes"] + spec["local_hashes"]:
329
+ store_path = resolve_store_path(self.architecture, pkg_hash)
330
+ pattern = os.path.join(self.workdir, store_path, "%s-*.tar.gz" % spec["package"])
331
+ if glob.glob(pattern):
332
+ info("Reusing existing tarball for %s@%s", spec["package"], pkg_hash)
333
+ return
334
+ info("Could not find prebuilt tarball for %s@%s-%s, will be rebuilt",
335
+ spec["package"], spec["version"], spec["revision"])
336
+
337
+ def fetch_symlinks(self, spec) -> None:
338
+ # When using CVMFS, we create the symlinks grass by reading the .
339
+ info("Fetching available build hashes for %s, from %s", spec["package"], self.remoteStore)
340
+ links_path = resolve_links_path(self.architecture, spec["package"])
341
+ os.makedirs(os.path.join(self.workdir, links_path), exist_ok=True)
342
+
343
+ cvmfs_architecture = re.sub(r"slc(\d+)_x86-64", r"el\1-x86_64", self.architecture)
344
+ err = execute("""\
345
+ set -x
346
+ # Exit without error in case we do not have any package published
347
+ test -d "{remote_store}/{cvmfs_architecture}/Packages/{package}" || exit 0
348
+ mkdir -p "{workDir}/{links_path}"
349
+ for install_path in $(find "{remote_store}/{cvmfs_architecture}/Packages/{package}" -type d -mindepth 1 -maxdepth 1); do
350
+ full_version="${{install_path##*/}}"
351
+ tarball={package}-$full_version.{architecture}.tar.gz
352
+ pkg_hash=$(cat "${{install_path}}/.build-hash" || jq -r '.package.hash' <${{install_path}}/.meta.json)
353
+ if [ "X$pkg_hash" = X ]; then
354
+ continue
355
+ fi
356
+ ln -sf ../../{architecture}/store/${{pkg_hash:0:2}}/$pkg_hash/$tarball "{workDir}/{links_path}/$tarball"
357
+ # Create the dummy tarball, if it does not exists
358
+ test -f "{workDir}/{architecture}/store/${{pkg_hash:0:2}}/$pkg_hash/$tarball" && continue
359
+ mkdir -p "{workDir}/INSTALLROOT/$pkg_hash/{architecture}/{package}"
360
+ find "{remote_store}/{cvmfs_architecture}/Packages/{package}/$full_version" ! -name etc -maxdepth 1 -mindepth 1 -exec ln -sf {} "{workDir}/INSTALLROOT/$pkg_hash/{architecture}/{package}/" \;
361
+ cp -fr "{remote_store}/{cvmfs_architecture}/Packages/{package}/$full_version/etc" "{workDir}/INSTALLROOT/$pkg_hash/{architecture}/{package}/etc"
362
+ mkdir -p "{workDir}/TARS/{architecture}/store/${{pkg_hash:0:2}}/$pkg_hash"
363
+ tar -C "{workDir}/INSTALLROOT/$pkg_hash" -czf "{workDir}/TARS/{architecture}/store/${{pkg_hash:0:2}}/$pkg_hash/$tarball" .
364
+ rm -rf "{workDir}/INSTALLROOT/$pkg_hash"
365
+ done
366
+ """.format(
367
+ workDir=self.workdir,
368
+ architecture=self.architecture,
369
+ cvmfs_architecture=cvmfs_architecture,
370
+ package=spec["package"],
371
+ remote_store=self.remoteStore,
372
+ links_path=links_path,
373
+ ))
374
+
375
+ def upload_symlinks_and_tarball(self, spec) -> None:
376
+ dieOnError(True, "CVMFS backend does not support uploading directly")
377
+
378
+ class S3RemoteSync:
379
+ """Sync package build directory from and to S3 using s3cmd.
380
+
381
+ s3cmd must be installed separately in order for this to work.
382
+ """
383
+
384
+ def __init__(self, remoteStore, writeStore, architecture, workdir) -> None:
385
+ self.remoteStore = re.sub("^s3://", "", remoteStore)
386
+ self.writeStore = re.sub("^s3://", "", writeStore)
387
+ self.architecture = architecture
388
+ self.workdir = workdir
389
+
390
+ def fetch_tarball(self, spec) -> None:
391
+ info("Downloading tarball for %s@%s, if available", spec["package"], spec["version"])
392
+ debug("Updating remote store for package %s with hashes %s",
393
+ spec["package"], ", ".join(spec["remote_hashes"]))
394
+ err = execute("""\
395
+ for storePath in {storePaths}; do
396
+ # For the first store path that contains tarballs, fetch them, and skip
397
+ # any possible later tarballs (we only need one).
398
+ if [ -n "$(s3cmd ls -s -v --host s3.cern.ch --host-bucket {b}.s3.cern.ch \
399
+ "s3://{b}/$storePath/")" ]; then
400
+ s3cmd --no-check-md5 sync -s -v --host s3.cern.ch --host-bucket {b}.s3.cern.ch \
401
+ "s3://{b}/$storePath/" "{workDir}/$storePath/" 2>&1 || :
402
+ break
403
+ fi
404
+ done
405
+ """.format(
406
+ workDir=self.workdir,
407
+ b=self.remoteStore,
408
+ storePaths=" ".join(resolve_store_path(self.architecture, pkg_hash)
409
+ for pkg_hash in spec["remote_hashes"]),
410
+ ))
411
+ dieOnError(err, "Unable to fetch tarball from specified store.")
412
+
413
+ def fetch_symlinks(self, spec) -> None:
414
+ err = execute("""\
415
+ mkdir -p "{workDir}/{linksPath}"
416
+ find "{workDir}/{linksPath}" -type l -delete
417
+ curl -sL "https://s3.cern.ch/swift/v1/{b}/{linksPath}.manifest" |
418
+ while IFS='\t' read -r symlink target; do
419
+ ln -sf "../../${{target#../../}}" "{workDir}/{linksPath}/$symlink" || true
420
+ done
421
+ for x in $(curl -sL "https://s3.cern.ch/swift/v1/{b}/?prefix={linksPath}/"); do
422
+ # Skip already existing symlinks -- these were from the manifest.
423
+ # (We delete leftover symlinks from previous runs above.)
424
+ [ -L "{workDir}/{linksPath}/$(basename "$x")" ] && continue
425
+ ln -sf "$(curl -sL "https://s3.cern.ch/swift/v1/{b}/$x" | sed -r 's,^(\\.\\./\\.\\./)?,../../,')" \
426
+ "{workDir}/{linksPath}/$(basename "$x")" || true
427
+ done
428
+ """.format(
429
+ b=self.remoteStore,
430
+ linksPath=resolve_links_path(self.architecture, spec["package"]),
431
+ workDir=self.workdir,
432
+ ))
433
+ dieOnError(err, "Unable to fetch symlinks from specified store.")
434
+
435
+ def upload_symlinks_and_tarball(self, spec) -> None:
436
+ if not self.writeStore:
437
+ return
438
+ dieOnError(execute("""\
439
+ set -e
440
+ put () {{
441
+ s3cmd put -s -v --host s3.cern.ch --host-bucket {bucket}.s3.cern.ch "$@" 2>&1
442
+ }}
443
+ tarball={package}-{version}-{revision}.{arch}.tar.gz
444
+ cd {workdir}
445
+
446
+ # First, upload "main" symlink, to reserve this revision number, in case
447
+ # the below steps fail.
448
+ readlink "{links_path}/$tarball" | sed 's|^\\.\\./\\.\\./||' |
449
+ put - "s3://{bucket}/{links_path}/$tarball"
450
+
451
+ # Then, upload dist symlink trees -- these must be in place before the main
452
+ # tarball.
453
+ find TARS/{arch}/{{dist,dist-direct,dist-runtime}}/{package}/{package}-{version}-{revision}/ \
454
+ -type l | while read -r link; do
455
+ hashedurl=$(readlink "$link" | sed 's|.*/\\.\\./TARS|TARS|')
456
+ echo "$hashedurl" |
457
+ put --skip-existing -q -P \\
458
+ --add-header="x-amz-website-redirect-location:\
459
+ https://s3.cern.ch/swift/v1/{bucket}/$hashedurl" \\
460
+ - "s3://{bucket}/$link" 2>&1
461
+ done
462
+
463
+ # Finally, upload the tarball.
464
+ put "{store_path}/$tarball" s3://{bucket}/{store_path}/
465
+ """.format(
466
+ workdir=self.workdir,
467
+ bucket=self.remoteStore,
468
+ store_path=resolve_store_path(self.architecture, spec["hash"]),
469
+ links_path=resolve_links_path(self.architecture, spec["package"]),
470
+ arch=self.architecture,
471
+ package=spec["package"],
472
+ version=spec["version"],
473
+ revision=spec["revision"],
474
+ )), "Unable to upload tarball.")
475
+
476
+
477
+ class Boto3RemoteSync:
478
+ """Sync package build directory from and to S3 using boto3.
479
+
480
+ As boto3 doesn't support Python 2, this class can only be used under Python
481
+ 3. boto3 is only imported at __init__ time, so if this class is never
482
+ instantiated, boto3 doesn't have to be installed.
483
+
484
+ This class has the advantage over S3RemoteSync that it uses the same
485
+ connection to S3 every time, while s3cmd must establish a new connection each
486
+ time.
487
+ """
488
+
489
+ def __init__(self, remoteStore, writeStore, architecture, workdir) -> None:
490
+ self.remoteStore = re.sub("^b3://", "", remoteStore)
491
+ self.writeStore = re.sub("^b3://", "", writeStore)
492
+ self.architecture = architecture
493
+ self.workdir = workdir
494
+ self._s3_init()
495
+
496
+ def _s3_init(self) -> None:
497
+ # This is a separate method so that we can patch it out for unit tests.
498
+ # Import boto3 here, so that if we don't use this remote store, we don't
499
+ # have to install it in the first place.
500
+ try:
501
+ import boto3
502
+ except ImportError:
503
+ error("boto3 must be installed to use %s", Boto3RemoteSync)
504
+ sys.exit(1)
505
+
506
+ try:
507
+ self.s3 = boto3.client("s3", endpoint_url="https://s3.cern.ch",
508
+ aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
509
+ aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"])
510
+ except KeyError:
511
+ error("you must pass the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY env "
512
+ "variables to aliBuild in order to use the S3 remote store")
513
+ sys.exit(1)
514
+
515
+ def _s3_listdir(self, dirname):
516
+ """List keys of items under dirname in the read bucket."""
517
+ pages = self.s3.get_paginator("list_objects_v2") \
518
+ .paginate(Bucket=self.remoteStore, Delimiter="/",
519
+ Prefix=dirname.rstrip("/") + "/")
520
+ return (item["Key"] for pg in pages for item in pg.get("Contents", ()))
521
+
522
+ def _s3_key_exists(self, key):
523
+ """Return whether the given key exists in the write bucket already."""
524
+ from botocore.exceptions import ClientError
525
+ try:
526
+ self.s3.head_object(Bucket=self.writeStore, Key=key)
527
+ except ClientError as err:
528
+ if err.response["Error"]["Code"] == "404":
529
+ return False
530
+ raise
531
+ return True
532
+
533
+ def fetch_tarball(self, spec) -> None:
534
+ debug("Updating remote store for package %s with hashes %s", spec["package"],
535
+ ", ".join(spec["remote_hashes"]))
536
+
537
+ # If we already have a tarball with any equivalent hash, don't check S3.
538
+ for pkg_hash in spec["remote_hashes"]:
539
+ store_path = resolve_store_path(self.architecture, pkg_hash)
540
+ if glob.glob(os.path.join(self.workdir, store_path, "%s-*.tar.gz" % spec["package"])):
541
+ debug("Reusing existing tarball for %s@%s", spec["package"], pkg_hash)
542
+ return
543
+
544
+ for pkg_hash in spec["remote_hashes"]:
545
+ store_path = resolve_store_path(self.architecture, pkg_hash)
546
+
547
+ # We don't already have a tarball with the hash that we need, so download
548
+ # the first existing one from the remote, if possible. (Downloading more
549
+ # than one is a waste of time as they should be equivalent and we only
550
+ # ever use one anyway.)
551
+ for tarball in self._s3_listdir(store_path):
552
+ debug("Fetching tarball %s", tarball)
553
+ progress = ProgressPrint("Downloading tarball for %s@%s" %
554
+ (spec["package"], spec["version"]))
555
+ progress("[0%%] Starting download of %s", tarball) # initialise progress bar
556
+ # Create containing directory locally. (exist_ok= is python3-specific.)
557
+ os.makedirs(os.path.join(self.workdir, store_path), exist_ok=True)
558
+ meta = self.s3.head_object(Bucket=self.remoteStore, Key=tarball)
559
+ total_size = int(meta.get("ContentLength", 0))
560
+ self.s3.download_file(
561
+ Bucket=self.remoteStore, Key=tarball,
562
+ Filename=os.path.join(self.workdir, store_path, os.path.basename(tarball)),
563
+ Callback=lambda num_bytes: progress("[%d/%d] bytes transferred", num_bytes, total_size),
564
+ )
565
+ progress.end("done")
566
+ return
567
+
568
+ debug("Remote has no tarballs for %s with hashes %s", spec["package"],
569
+ ", ".join(spec["remote_hashes"]))
570
+
571
+ def fetch_symlinks(self, spec) -> None:
572
+ from botocore.exceptions import ClientError
573
+ links_path = resolve_links_path(self.architecture, spec["package"])
574
+ os.makedirs(os.path.join(self.workdir, links_path), exist_ok=True)
575
+
576
+ # Remove existing symlinks: we'll fetch the ones from the remote next.
577
+ parent = os.path.join(self.workdir, links_path)
578
+ for fname in os.listdir(parent):
579
+ path = os.path.join(parent, fname)
580
+ if os.path.islink(path):
581
+ os.unlink(path)
582
+
583
+ # Fetch symlink manifest and create local symlinks to match.
584
+ debug("Fetching symlink manifest")
585
+ n_symlinks = 0
586
+ try:
587
+ manifest = self.s3.get_object(Bucket=self.remoteStore, Key=links_path + ".manifest")
588
+ except ClientError as exc:
589
+ debug("Could not fetch manifest: %s", exc)
590
+ else:
591
+ for line in manifest["Body"].iter_lines():
592
+ link_name, has_sep, target = line.rstrip(b"\n").partition(b"\t")
593
+ if not has_sep:
594
+ debug("Ignoring malformed line in manifest: %r", line)
595
+ continue
596
+ if not target.startswith(b"../../"):
597
+ target = b"../../" + target
598
+ target = os.fsdecode(target)
599
+ link_path = os.path.join(self.workdir, links_path, os.fsdecode(link_name))
600
+ symlink(target, link_path)
601
+ n_symlinks += 1
602
+ debug("Got %d entries in manifest", n_symlinks)
603
+
604
+ # Create remote symlinks that aren't in the manifest yet.
605
+ debug("Looking for symlinks not in manifest")
606
+ for link_key in self._s3_listdir(links_path):
607
+ link_path = os.path.join(self.workdir, link_key)
608
+ if os.path.islink(link_path):
609
+ continue
610
+ debug("Fetching leftover symlink %s", link_key)
611
+ resp = self.s3.get_object(Bucket=self.remoteStore, Key=link_key)
612
+ target = os.fsdecode(resp["Body"].read()).rstrip("\n")
613
+ if not target.startswith("../../"):
614
+ target = "../../" + target
615
+ symlink(target, link_path)
616
+
617
+ def upload_symlinks_and_tarball(self, spec) -> None:
618
+ if not self.writeStore:
619
+ return
620
+
621
+ dist_symlinks = {}
622
+ for link_dir in ("dist", "dist-direct", "dist-runtime"):
623
+ link_dir = "TARS/{arch}/{link_dir}/{package}/{package}-{version}-{revision}" \
624
+ .format(arch=self.architecture, link_dir=link_dir, **spec)
625
+
626
+ debug("Comparing dist symlinks against S3 from %s", link_dir)
627
+
628
+ symlinks = []
629
+ for fname in os.listdir(os.path.join(self.workdir, link_dir)):
630
+ link_key = os.path.join(link_dir, fname)
631
+ path = os.path.join(self.workdir, link_key)
632
+ if os.path.islink(path):
633
+ hash_path = re.sub(r"^(\.\./)*", "", os.readlink(path))
634
+ symlinks.append((link_key, hash_path))
635
+
636
+ # To make sure there are no conflicts, see if anything already exists in
637
+ # our symlink directory.
638
+ symlinks_existing = frozenset(self._s3_listdir(link_dir))
639
+
640
+ # If all the symlinks we would upload already exist, skip uploading. We
641
+ # probably just downloaded a prebuilt package earlier, and it already has
642
+ # symlinks available.
643
+ if all(link_key in symlinks_existing for link_key, _ in symlinks):
644
+ debug("All %s symlinks already exist on S3, skipping upload", link_dir)
645
+ continue
646
+
647
+ # Excluding our own symlinks (above), if there is anything in our link_dir
648
+ # on the remote, something else is uploading symlinks (or already has)!
649
+ dieOnError(symlinks_existing,
650
+ "Conflicts detected in %s on S3; aborting: %s" %
651
+ (link_dir, ", ".join(sorted(symlinks_existing))))
652
+
653
+ dist_symlinks[link_dir] = symlinks
654
+
655
+ tarball = "{package}-{version}-{revision}.{architecture}.tar.gz" \
656
+ .format(architecture=self.architecture, **spec)
657
+ tar_path = os.path.join(resolve_store_path(self.architecture, spec["hash"]),
658
+ tarball)
659
+ link_path = os.path.join(resolve_links_path(self.architecture, spec["package"]),
660
+ tarball)
661
+ tar_exists = self._s3_key_exists(tar_path)
662
+ link_exists = self._s3_key_exists(link_path)
663
+ if tar_exists and link_exists:
664
+ debug("%s exists on S3 already, not uploading", tarball)
665
+ return
666
+ dieOnError(tar_exists or link_exists,
667
+ "%s already exists on S3 but %s does not, aborting!" %
668
+ (tar_path if tar_exists else link_path,
669
+ link_path if tar_exists else tar_path))
670
+
671
+ debug("Uploading tarball and symlinks for %s %s-%s (%s) to S3",
672
+ spec["package"], spec["version"], spec["revision"], spec["hash"])
673
+
674
+ # Upload the smaller file first, so that any parallel uploads are more
675
+ # likely to find it and fail.
676
+ self.s3.put_object(Bucket=self.writeStore, Key=link_path,
677
+ Body=os.readlink(os.path.join(self.workdir, link_path))
678
+ .lstrip("./").encode("utf-8"))
679
+
680
+ # Second, upload dist symlinks. These should be in place before the main
681
+ # tarball, to avoid races in the publisher.
682
+ for link_dir, symlinks in dist_symlinks.items():
683
+ for link_key, hash_path in symlinks:
684
+ self.s3.put_object(Bucket=self.writeStore,
685
+ Key=link_key,
686
+ Body=os.fsencode(hash_path),
687
+ ACL="public-read",
688
+ WebsiteRedirectLocation=hash_path)
689
+ debug("Uploaded %d dist symlinks to S3 from %s",
690
+ len(symlinks), link_dir)
691
+
692
+ self.s3.upload_file(Bucket=self.writeStore, Key=tar_path,
693
+ Filename=os.path.join(self.workdir, tar_path))