siliconcompiler 0.34.1__py3-none-any.whl → 0.34.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. siliconcompiler/__init__.py +23 -4
  2. siliconcompiler/__main__.py +1 -7
  3. siliconcompiler/_metadata.py +1 -1
  4. siliconcompiler/apps/_common.py +104 -23
  5. siliconcompiler/apps/sc.py +4 -8
  6. siliconcompiler/apps/sc_dashboard.py +6 -4
  7. siliconcompiler/apps/sc_install.py +10 -6
  8. siliconcompiler/apps/sc_issue.py +7 -5
  9. siliconcompiler/apps/sc_remote.py +1 -1
  10. siliconcompiler/apps/sc_server.py +9 -14
  11. siliconcompiler/apps/sc_show.py +7 -6
  12. siliconcompiler/apps/smake.py +130 -94
  13. siliconcompiler/apps/utils/replay.py +4 -7
  14. siliconcompiler/apps/utils/summarize.py +3 -5
  15. siliconcompiler/asic.py +420 -0
  16. siliconcompiler/checklist.py +25 -2
  17. siliconcompiler/cmdlineschema.py +534 -0
  18. siliconcompiler/constraints/__init__.py +17 -0
  19. siliconcompiler/constraints/asic_component.py +378 -0
  20. siliconcompiler/constraints/asic_floorplan.py +449 -0
  21. siliconcompiler/constraints/asic_pins.py +489 -0
  22. siliconcompiler/constraints/asic_timing.py +517 -0
  23. siliconcompiler/core.py +10 -35
  24. siliconcompiler/data/templates/tcl/manifest.tcl.j2 +8 -0
  25. siliconcompiler/dependencyschema.py +96 -202
  26. siliconcompiler/design.py +327 -241
  27. siliconcompiler/filesetschema.py +250 -0
  28. siliconcompiler/flowgraph.py +298 -106
  29. siliconcompiler/fpga.py +124 -1
  30. siliconcompiler/library.py +331 -0
  31. siliconcompiler/metric.py +327 -92
  32. siliconcompiler/metrics/__init__.py +7 -0
  33. siliconcompiler/metrics/asic.py +245 -0
  34. siliconcompiler/metrics/fpga.py +220 -0
  35. siliconcompiler/package/__init__.py +391 -67
  36. siliconcompiler/package/git.py +92 -16
  37. siliconcompiler/package/github.py +114 -22
  38. siliconcompiler/package/https.py +79 -16
  39. siliconcompiler/packageschema.py +341 -16
  40. siliconcompiler/pathschema.py +255 -0
  41. siliconcompiler/pdk.py +566 -1
  42. siliconcompiler/project.py +1460 -0
  43. siliconcompiler/record.py +38 -1
  44. siliconcompiler/remote/__init__.py +5 -2
  45. siliconcompiler/remote/client.py +11 -6
  46. siliconcompiler/remote/schema.py +5 -23
  47. siliconcompiler/remote/server.py +41 -54
  48. siliconcompiler/report/__init__.py +3 -3
  49. siliconcompiler/report/dashboard/__init__.py +48 -14
  50. siliconcompiler/report/dashboard/cli/__init__.py +99 -21
  51. siliconcompiler/report/dashboard/cli/board.py +364 -179
  52. siliconcompiler/report/dashboard/web/__init__.py +90 -12
  53. siliconcompiler/report/dashboard/web/components/__init__.py +219 -240
  54. siliconcompiler/report/dashboard/web/components/flowgraph.py +49 -26
  55. siliconcompiler/report/dashboard/web/components/graph.py +139 -100
  56. siliconcompiler/report/dashboard/web/layouts/__init__.py +29 -1
  57. siliconcompiler/report/dashboard/web/layouts/_common.py +38 -2
  58. siliconcompiler/report/dashboard/web/layouts/vertical_flowgraph.py +39 -26
  59. siliconcompiler/report/dashboard/web/layouts/vertical_flowgraph_node_tab.py +50 -50
  60. siliconcompiler/report/dashboard/web/layouts/vertical_flowgraph_sac_tabs.py +49 -46
  61. siliconcompiler/report/dashboard/web/state.py +141 -14
  62. siliconcompiler/report/dashboard/web/utils/__init__.py +79 -16
  63. siliconcompiler/report/dashboard/web/utils/file_utils.py +74 -11
  64. siliconcompiler/report/dashboard/web/viewer.py +25 -1
  65. siliconcompiler/report/report.py +5 -2
  66. siliconcompiler/report/summary_image.py +29 -11
  67. siliconcompiler/scheduler/__init__.py +9 -1
  68. siliconcompiler/scheduler/docker.py +81 -4
  69. siliconcompiler/scheduler/run_node.py +37 -20
  70. siliconcompiler/scheduler/scheduler.py +211 -36
  71. siliconcompiler/scheduler/schedulernode.py +394 -60
  72. siliconcompiler/scheduler/send_messages.py +77 -29
  73. siliconcompiler/scheduler/slurm.py +76 -12
  74. siliconcompiler/scheduler/taskscheduler.py +142 -21
  75. siliconcompiler/schema/__init__.py +0 -4
  76. siliconcompiler/schema/baseschema.py +338 -59
  77. siliconcompiler/schema/editableschema.py +14 -6
  78. siliconcompiler/schema/journal.py +28 -17
  79. siliconcompiler/schema/namedschema.py +22 -14
  80. siliconcompiler/schema/parameter.py +89 -28
  81. siliconcompiler/schema/parametertype.py +2 -0
  82. siliconcompiler/schema/parametervalue.py +258 -15
  83. siliconcompiler/schema/safeschema.py +25 -2
  84. siliconcompiler/schema/schema_cfg.py +23 -19
  85. siliconcompiler/schema/utils.py +2 -2
  86. siliconcompiler/schema_obj.py +24 -5
  87. siliconcompiler/tool.py +1131 -265
  88. siliconcompiler/tools/bambu/__init__.py +41 -0
  89. siliconcompiler/tools/builtin/concatenate.py +2 -2
  90. siliconcompiler/tools/builtin/minimum.py +2 -1
  91. siliconcompiler/tools/builtin/mux.py +2 -1
  92. siliconcompiler/tools/builtin/nop.py +2 -1
  93. siliconcompiler/tools/builtin/verify.py +2 -1
  94. siliconcompiler/tools/klayout/__init__.py +95 -0
  95. siliconcompiler/tools/openroad/__init__.py +289 -0
  96. siliconcompiler/tools/openroad/scripts/apr/preamble.tcl +3 -0
  97. siliconcompiler/tools/openroad/scripts/apr/sc_detailed_route.tcl +7 -2
  98. siliconcompiler/tools/openroad/scripts/apr/sc_global_route.tcl +8 -4
  99. siliconcompiler/tools/openroad/scripts/apr/sc_init_floorplan.tcl +9 -5
  100. siliconcompiler/tools/openroad/scripts/common/write_images.tcl +5 -1
  101. siliconcompiler/tools/slang/__init__.py +1 -1
  102. siliconcompiler/tools/slang/elaborate.py +2 -1
  103. siliconcompiler/tools/vivado/scripts/sc_run.tcl +1 -1
  104. siliconcompiler/tools/vivado/scripts/sc_syn_fpga.tcl +8 -1
  105. siliconcompiler/tools/vivado/syn_fpga.py +6 -0
  106. siliconcompiler/tools/vivado/vivado.py +35 -2
  107. siliconcompiler/tools/vpr/__init__.py +150 -0
  108. siliconcompiler/tools/yosys/__init__.py +369 -1
  109. siliconcompiler/tools/yosys/scripts/procs.tcl +0 -1
  110. siliconcompiler/toolscripts/_tools.json +5 -10
  111. siliconcompiler/utils/__init__.py +66 -0
  112. siliconcompiler/utils/flowgraph.py +2 -2
  113. siliconcompiler/utils/issue.py +2 -1
  114. siliconcompiler/utils/logging.py +14 -0
  115. siliconcompiler/utils/multiprocessing.py +256 -0
  116. siliconcompiler/utils/showtools.py +10 -0
  117. {siliconcompiler-0.34.1.dist-info → siliconcompiler-0.34.3.dist-info}/METADATA +6 -6
  118. {siliconcompiler-0.34.1.dist-info → siliconcompiler-0.34.3.dist-info}/RECORD +122 -115
  119. {siliconcompiler-0.34.1.dist-info → siliconcompiler-0.34.3.dist-info}/entry_points.txt +3 -0
  120. siliconcompiler/schema/cmdlineschema.py +0 -250
  121. siliconcompiler/schema/packageschema.py +0 -101
  122. siliconcompiler/toolscripts/rhel8/install-slang.sh +0 -40
  123. siliconcompiler/toolscripts/rhel9/install-slang.sh +0 -40
  124. siliconcompiler/toolscripts/ubuntu20/install-slang.sh +0 -47
  125. siliconcompiler/toolscripts/ubuntu22/install-slang.sh +0 -37
  126. siliconcompiler/toolscripts/ubuntu24/install-slang.sh +0 -37
  127. {siliconcompiler-0.34.1.dist-info → siliconcompiler-0.34.3.dist-info}/WHEEL +0 -0
  128. {siliconcompiler-0.34.1.dist-info → siliconcompiler-0.34.3.dist-info}/licenses/LICENSE +0 -0
  129. {siliconcompiler-0.34.1.dist-info → siliconcompiler-0.34.3.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,11 @@
1
- import shutil
1
+ """
2
+ This module provides a Git-based resolver for SiliconCompiler packages.
2
3
 
4
+ It defines the `GitResolver` class, which is responsible for cloning remote
5
+ Git repositories into a local cache, checking out specific references (like
6
+ branches, tags, or commit hashes), and managing the cached repository's state.
7
+ """
8
+ import shutil
3
9
  import os.path
4
10
 
5
11
  from git import Repo, GitCommandError
@@ -7,6 +13,15 @@ from siliconcompiler.package import RemoteResolver
7
13
 
8
14
 
9
15
  def get_resolver():
16
+ """
17
+ Returns a dictionary mapping Git-related URI schemes to the GitResolver class.
18
+
19
+ This function is used by the resolver system to discover and register this
20
+ resolver for handling git, git+https, git+ssh, and ssh protocols.
21
+
22
+ Returns:
23
+ dict: A dictionary mapping scheme names to the GitResolver class.
24
+ """
10
25
  return {
11
26
  "git": GitResolver,
12
27
  "git+https": GitResolver,
@@ -16,10 +31,32 @@ def get_resolver():
16
31
 
17
32
 
18
33
  class GitResolver(RemoteResolver):
34
+ """
35
+ A resolver for fetching data from remote Git repositories.
36
+
37
+ This class handles cloning repositories, checking out specific references,
38
+ and managing the local cache. It supports authentication via environment
39
+ tokens (e.g., GITHUB_TOKEN) for HTTPS and assumes SSH keys are configured
40
+ for SSH-based URLs.
41
+ """
42
+
19
43
  def __init__(self, name, root, source, reference=None):
44
+ """
45
+ Initializes the GitResolver.
46
+ """
20
47
  super().__init__(name, root, source, reference)
21
48
 
22
49
  def check_cache(self):
50
+ """
51
+ Checks if a valid, clean Git repository exists at the cache path.
52
+
53
+ This method verifies that the path points to a valid Git repository
54
+ and warns the user if the repository is "dirty" (has untracked files
55
+ or uncommitted changes). If the repository is corrupted, it is removed.
56
+
57
+ Returns:
58
+ bool: True if a valid repository exists, False otherwise.
59
+ """
23
60
  if os.path.exists(self.cache_path):
24
61
  try:
25
62
  repo = Repo(self.cache_path)
@@ -33,9 +70,21 @@ class GitResolver(RemoteResolver):
33
70
  return False
34
71
 
35
72
  def __get_token_env(self):
73
+ """
74
+ Searches for a Git authentication token in predefined environment variables.
75
+
76
+ The search order is:
77
+ 1. GITHUB_<PACKAGE_NAME>_TOKEN (e.g., GITHUB_SKYWATER130_TOKEN)
78
+ 2. GITHUB_TOKEN
79
+ 3. GIT_TOKEN
80
+
81
+ Returns:
82
+ str or None: The found token, or None if no token is set.
83
+ """
36
84
  token_name = self.name.upper()
37
- for tok in ('#', '$', '&', '-', '=', '!', '/'):
38
- token_name = token_name.replace(tok, '')
85
+ # Sanitize package name for environment variable compatibility
86
+ for char in ('#', '$', '&', '-', '=', '!', '/'):
87
+ token_name = token_name.replace(char, '')
39
88
 
40
89
  search_env = (
41
90
  f'GITHUB_{token_name}_TOKEN',
@@ -43,40 +92,67 @@ class GitResolver(RemoteResolver):
43
92
  'GIT_TOKEN'
44
93
  )
45
94
 
46
- token = None
47
95
  for env in search_env:
48
- token = os.environ.get(env, None)
49
-
96
+ token = os.environ.get(env)
50
97
  if token:
51
98
  return token
52
99
  return None
53
100
 
54
101
  @property
55
102
  def git_path(self):
56
- if self.urlscheme == "git+ssh":
57
- return f"ssh://{self.urlpath}{self.urlparse.path}"
58
- if self.urlscheme == "ssh":
59
- return self.source
103
+ """
104
+ Constructs the final Git URL for cloning.
105
+
106
+ This method handles different URL schemes and automatically injects
107
+ an authentication token into HTTPS URLs if a token is found in the
108
+ environment.
109
+
110
+ Returns:
111
+ str: The fully-formed URL ready for `git clone`.
112
+ """
113
+ if self.urlscheme == "git+ssh" or self.urlscheme == "ssh":
114
+ # Reconstruct the original SSH URL
115
+ return self.source.replace('git+', '')
116
+
117
+ # For HTTPS, inject token if available
60
118
  url = self.urlparse
61
- if not url.username and self.__get_token_env():
62
- url = url._replace(netloc=f'{self.__get_token_env()}@{url.hostname}')
119
+ token = self.__get_token_env()
120
+ if not url.username and token:
121
+ url = url._replace(netloc=f'{token}@{url.hostname}')
122
+ # Ensure the scheme is HTTPS
63
123
  url = url._replace(scheme='https')
64
124
  return url.geturl()
65
125
 
66
126
  def resolve_remote(self):
127
+ """
128
+ Fetches the remote repository and checks out the specified reference.
129
+
130
+ This method performs the `git clone` operation, followed by `git checkout`
131
+ on the specified branch, tag, or commit. It also initializes all submodules.
132
+
133
+ Raises:
134
+ RuntimeError: If authentication fails.
135
+ GitCommandError: For other Git-related errors.
136
+ """
67
137
  try:
68
138
  path = self.git_path
69
139
  self.logger.info(f'Cloning {self.name} data from {path}')
70
140
  repo = Repo.clone_from(path, self.cache_path, recurse_submodules=True)
141
+
71
142
  self.logger.info(f'Checking out {self.reference}')
72
143
  repo.git.checkout(self.reference)
144
+
145
+ self.logger.info('Updating submodules')
73
146
  for submodule in repo.submodules:
74
147
  submodule.update(recursive=True, init=True, force=True)
75
148
  except GitCommandError as e:
76
- if 'Permission denied' in repr(e):
149
+ if 'Permission denied' in repr(e) or 'could not read Username' in repr(e):
77
150
  if self.urlscheme in ('ssh', 'git+ssh'):
78
- raise RuntimeError('Failed to authenticate. Please setup your git ssh.')
79
- elif self.urlscheme in ('git', 'git+https'):
80
- raise RuntimeError('Failed to authenticate. Please use a token or ssh.')
151
+ raise RuntimeError('Failed to authenticate with Git. Please ensure your SSH '
152
+ 'keys are set up correctly.')
153
+ else: # 'git', 'git+https'
154
+ raise RuntimeError('Failed to authenticate with Git. Please provide a token '
155
+ 'via GITHUB_TOKEN or use an SSH URL.')
81
156
  else:
157
+ # Re-raise other Git errors
82
158
  raise e
@@ -1,3 +1,9 @@
1
+ """
2
+ This module provides a GitHub-based resolver for SiliconCompiler packages.
3
+
4
+ It defines the `GithubResolver` class, which is responsible for downloading
5
+ release assets from public or private GitHub repositories.
6
+ """
1
7
  import os
2
8
 
3
9
  from github import Github, Auth
@@ -7,6 +13,15 @@ from siliconcompiler.package.https import HTTPResolver
7
13
 
8
14
 
9
15
  def get_resolver():
16
+ """
17
+ Returns a dictionary mapping GitHub URI schemes to the GithubResolver class.
18
+
19
+ This function is used by the resolver system to discover and register this
20
+ resolver for handling `github` and `github+private` protocols.
21
+
22
+ Returns:
23
+ dict: A dictionary mapping scheme names to the GithubResolver class.
24
+ """
10
25
  return {
11
26
  "github": GithubResolver,
12
27
  "github+private": GithubResolver
@@ -14,61 +29,129 @@ def get_resolver():
14
29
 
15
30
 
16
31
  class GithubResolver(HTTPResolver):
32
+ """
33
+ A resolver for fetching release assets from GitHub repositories.
34
+
35
+ This class extends the `HTTPResolver` to interact with the GitHub API
36
+ for locating and downloading release assets. It supports both public
37
+ and private repositories.
38
+
39
+ The expected source URI format is:
40
+ `github://<owner>/<repository>/<release_tag>/<asset_name>`
41
+
42
+ For private repositories, the scheme should be `github+private://` and
43
+ a GitHub token must be provided via environment variables.
44
+ """
45
+
17
46
  def __init__(self, name, root, source, reference=None):
47
+ """
48
+ Initializes the GithubResolver.
49
+ """
18
50
  super().__init__(name, root, source, reference)
19
51
 
20
52
  if len(self.gh_path) != 4:
21
53
  raise ValueError(
22
- f"{self.source} is not in the proper form: "
23
- "<owner>/<repository>/<version>/<artifact>")
54
+ f"'{self.source}' is not in the proper form: "
55
+ "github://<owner>/<repository>/<version>/<artifact>")
24
56
 
25
57
  @property
26
58
  def gh_path(self):
59
+ """
60
+ Parses the source URL into its constituent GitHub parts.
61
+
62
+ Returns:
63
+ tuple: A tuple containing (owner, repository, release_tag, asset_name).
64
+ """
27
65
  return self.urlpath, *self.urlparse.path.split("/")[1:]
28
66
 
29
67
  @property
30
68
  def download_url(self):
31
- url_parts = self.gh_path
69
+ """
70
+ Determines the direct download URL for the GitHub release asset.
32
71
 
72
+ This method first attempts to find the asset in a public repository.
73
+ If that fails (e.g., with an `UnknownObjectException`), it then tries
74
+ to find it in a private repository, which requires authentication.
75
+ The `github+private` scheme forces an authenticated private lookup directly.
76
+
77
+ Returns:
78
+ str: The direct URL to download the asset.
79
+ """
80
+ url_parts = self.gh_path
33
81
  repository = "/".join(url_parts[0:2])
34
82
  release = url_parts[2]
35
83
  artifact = url_parts[3]
36
84
 
37
85
  if self.urlscheme == "github+private":
38
- return self.__get_release_url(repository, release, artifact, True)
86
+ return self.__get_release_url(repository, release, artifact, private=True)
39
87
 
40
88
  try:
41
- return self.__get_release_url(repository, release, artifact, False)
89
+ # First, try as a public repository
90
+ return self.__get_release_url(repository, release, artifact, private=False)
42
91
  except UnknownObjectException:
43
- return self.__get_release_url(repository, release, artifact, True)
92
+ # If public access fails, try as a private repository
93
+ self.logger.info("Could not find public release, trying private.")
94
+ return self.__get_release_url(repository, release, artifact, private=True)
44
95
 
45
96
  def __get_release_url(self, repository, release, artifact, private: bool):
97
+ """
98
+ Uses the GitHub API to find the download URL for a specific release asset.
99
+
100
+ Also handles special cases for downloading source code archives (`.zip`
101
+ or `.tar.gz`).
102
+
103
+ Args:
104
+ repository (str): The repository name in 'owner/repo' format.
105
+ release (str): The release tag (e.g., 'v1.0.0').
106
+ artifact (str): The filename of the asset to download.
107
+ private (bool): If True, use an authenticated API client.
108
+
109
+ Returns:
110
+ str: The direct download URL for the asset.
111
+
112
+ Raises:
113
+ ValueError: If the specified release or asset cannot be found.
114
+ """
115
+ # Handle standard source code archive names
46
116
  if artifact == f"{release}.zip":
47
117
  return f"https://github.com/{repository}/archive/refs/tags/{release}.zip"
48
118
  if artifact == f"{release}.tar.gz":
49
119
  return f"https://github.com/{repository}/archive/refs/tags/{release}.tar.gz"
50
120
 
121
+ # Use the GitHub API for other assets
51
122
  repo = self.__gh(private).get_repo(repository)
52
123
 
53
124
  if not release:
54
125
  release = repo.get_latest_release().tag_name
126
+ self.logger.info(f"No release specified, using latest: {release}")
55
127
 
56
- url = None
57
- for repo_release in repo.get_releases():
58
- if repo_release.tag_name == release:
59
- for asset in repo_release.assets:
60
- if asset.name == artifact:
61
- url = asset.url
62
-
63
- if not url:
64
- raise ValueError(f'Unable to find release asset: {repository}/{release}/{artifact}')
128
+ repo_release = repo.get_release(release)
129
+ if repo_release:
130
+ for asset in repo_release.assets:
131
+ if asset.name == artifact:
132
+ return asset.url
65
133
 
66
- return url
134
+ raise ValueError(f'Unable to find release asset: {repository}/{release}/{artifact}')
67
135
 
68
136
  def __get_gh_auth(self):
137
+ """
138
+ Searches for a GitHub authentication token in predefined environment variables.
139
+
140
+ The search order is:
141
+ 1. GITHUB_<PACKAGE_NAME>_TOKEN
142
+ 2. GITHUB_TOKEN
143
+ 3. GIT_TOKEN
144
+
145
+ Returns:
146
+ str: The found token.
147
+
148
+ Raises:
149
+ ValueError: If no token can be found in the environment.
150
+ """
69
151
  token_name = self.name.upper()
70
- for tok in ('#', '$', '&', '-', '=', '!', '/'):
71
- token_name = token_name.replace(tok, '')
152
+ # Sanitize package name for environment variable compatibility
153
+ for char in ('#', '$', '&', '-', '=', '!', '/'):
154
+ token_name = token_name.replace(char, '')
72
155
 
73
156
  search_env = (
74
157
  f'GITHUB_{token_name}_TOKEN',
@@ -78,19 +161,28 @@ class GithubResolver(HTTPResolver):
78
161
 
79
162
  token = None
80
163
  for env in search_env:
81
- token = os.environ.get(env, None)
82
-
164
+ token = os.environ.get(env)
83
165
  if token:
84
166
  break
85
167
 
86
168
  if not token:
87
- raise ValueError('Unable to determine authorization token for GitHub, '
88
- 'please set one of the following environmental variables: '
169
+ raise ValueError('Unable to determine authorization token for GitHub. '
170
+ 'Please set one of the following environment variables: '
89
171
  f'{", ".join(search_env)}')
90
172
 
91
173
  return token
92
174
 
93
175
  def __gh(self, private: bool) -> Github:
176
+ """
177
+ Initializes the PyGithub client.
178
+
179
+ Args:
180
+ private (bool): If True, initializes the client with an authentication
181
+ token. Otherwise, initializes an unauthenticated client.
182
+
183
+ Returns:
184
+ Github: An initialized PyGithub client instance.
185
+ """
94
186
  if private:
95
187
  return Github(auth=Auth.Token(self.__get_gh_auth()))
96
188
  else:
@@ -1,3 +1,9 @@
1
+ """
2
+ This module provides a generic HTTP/HTTPS resolver for SiliconCompiler packages.
3
+
4
+ It defines the `HTTPResolver` class, which is responsible for downloading
5
+ and unpacking archives (TAR or ZIP) from a given URL.
6
+ """
1
7
  import requests
2
8
  import shutil
3
9
  import tarfile
@@ -12,6 +18,15 @@ from siliconcompiler.package import RemoteResolver
12
18
 
13
19
 
14
20
  def get_resolver():
21
+ """
22
+ Returns a dictionary mapping HTTP schemes to the HTTPResolver class.
23
+
24
+ This function is used by the resolver system to discover and register this
25
+ resolver for handling `http` and `https` protocols.
26
+
27
+ Returns:
28
+ dict: A dictionary mapping scheme names to the HTTPResolver class.
29
+ """
15
30
  return {
16
31
  "http": HTTPResolver,
17
32
  "https": HTTPResolver
@@ -19,23 +34,62 @@ def get_resolver():
19
34
 
20
35
 
21
36
  class HTTPResolver(RemoteResolver):
37
+ """
38
+ A resolver for fetching and unpacking data from HTTP/HTTPS URLs.
39
+
40
+ This class downloads a file from a URL, automatically determines if it's a
41
+ gzipped tarball or a zip file, and extracts its contents into the local
42
+ cache. It also includes special handling to flatten the directory structure
43
+ of archives downloaded from GitHub.
44
+ """
45
+
22
46
  def check_cache(self):
47
+ """
48
+ Checks if the data has already been cached.
49
+
50
+ For this resolver, the cache is considered valid if the target cache
51
+ directory simply exists.
52
+
53
+ Returns:
54
+ bool: True if the cache path exists, False otherwise.
55
+ """
23
56
  return os.path.exists(self.cache_path)
24
57
 
25
58
  @property
26
59
  def download_url(self):
60
+ """
61
+ Constructs the final download URL.
62
+
63
+ If the source URL ends with a '/', it appends the reference
64
+ (e.g., version) and a `.tar.gz` extension.
65
+
66
+ Returns:
67
+ str: The fully-formed URL to download from.
68
+ """
27
69
  data_url = self.source
28
70
  if data_url.endswith('/'):
29
71
  data_url = f"{data_url}{self.reference}.tar.gz"
30
72
  return data_url
31
73
 
32
74
  def resolve_remote(self):
75
+ """
76
+ Fetches the remote archive, unpacks it, and stores it in the cache.
77
+
78
+ This method downloads the file, detects the archive type (tar.gz or zip),
79
+ and extracts it. It includes special logic to handle the extra top-level
80
+ directory that GitHub often includes in its source archives.
81
+
82
+ Raises:
83
+ FileNotFoundError: If the download fails (e.g., 404 error).
84
+ """
33
85
  data_url = self.download_url
34
86
 
35
87
  headers = {}
88
+ # Use GIT_TOKEN for authentication if available, primarily for GitHub raw downloads.
36
89
  auth_token = os.environ.get('GIT_TOKEN', self.urlparse.username)
37
90
  if auth_token:
38
91
  headers['Authorization'] = f'token {auth_token}'
92
+ # GitHub release assets require a specific Accept header.
39
93
  if "github" in data_url:
40
94
  headers['Accept'] = 'application/octet-stream'
41
95
 
@@ -43,25 +97,33 @@ class HTTPResolver(RemoteResolver):
43
97
 
44
98
  response = requests.get(data_url, stream=True, headers=headers)
45
99
  if not response.ok:
46
- raise FileNotFoundError(f'Failed to download {self.name} data source.')
100
+ raise FileNotFoundError(f'Failed to download {self.name} data source from {data_url}. '
101
+ f'Status code: {response.status_code}')
47
102
 
48
103
  os.makedirs(self.cache_path, exist_ok=True)
49
104
 
105
+ # Download content into an in-memory buffer
50
106
  fileobj = BytesIO(response.content)
107
+
108
+ # Attempt to extract as a tarball, fall back to zip
51
109
  try:
52
- with tarfile.open(fileobj=fileobj, mode='r|gz') as tar_ref:
110
+ with tarfile.open(fileobj=fileobj, mode='r:gz') as tar_ref:
53
111
  tar_ref.extractall(path=self.cache_path)
54
112
  except tarfile.ReadError:
55
113
  fileobj.seek(0)
56
- # Try as zip
57
- with zipfile.ZipFile(fileobj) as zip_ref:
58
- zip_ref.extractall(path=self.cache_path)
59
-
114
+ try:
115
+ with zipfile.ZipFile(fileobj) as zip_ref:
116
+ zip_ref.extractall(path=self.cache_path)
117
+ except zipfile.BadZipFile:
118
+ raise TypeError(f"Could not extract file from {data_url}. "
119
+ "File is not a valid tar.gz or zip archive.")
120
+
121
+ # --- GitHub-specific directory flattening ---
122
+ # GitHub archives often have a single top-level directory like 'repo-v1.0'.
123
+ # This logic moves the contents of that directory up one level for a cleaner cache.
60
124
  if 'github' in data_url and len(os.listdir(self.cache_path)) == 1:
61
- # Github inserts one folder at the highest level of the tar file
62
- # this compensates for this behavior
125
+ # Heuristically determine the name of the top-level directory
63
126
  gh_url = urlparse(data_url)
64
-
65
127
  repo = gh_url.path.split('/')[2]
66
128
 
67
129
  gh_ref = gh_url.path.split('/')[-1]
@@ -78,10 +140,11 @@ class HTTPResolver(RemoteResolver):
78
140
  gh_ref = gh_ref[1:]
79
141
 
80
142
  github_folder = f"{repo}-{gh_ref}"
81
-
82
- if github_folder in os.listdir(self.cache_path):
83
- # This moves all files one level up
84
- git_path = os.path.join(self.cache_path, github_folder)
85
- for data_file in os.listdir(git_path):
86
- shutil.move(os.path.join(git_path, data_file), self.cache_path)
87
- os.removedirs(git_path)
143
+ potential_path = os.path.join(self.cache_path, github_folder)
144
+
145
+ if os.path.isdir(potential_path):
146
+ # Move all files from the subdirectory to the cache root
147
+ for data_file in os.listdir(potential_path):
148
+ shutil.move(os.path.join(potential_path, data_file), self.cache_path)
149
+ # Clean up the now-empty directory
150
+ os.rmdir(potential_path)