dayhoff-tools 1.4.19__py3-none-any.whl → 1.4.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dayhoff-tools might be problematic. Click here for more details.
- dayhoff_tools/cli/main.py +9 -5
- dayhoff_tools/cli/utility_commands.py +0 -261
- dayhoff_tools/warehouse.py +224 -21
- {dayhoff_tools-1.4.19.dist-info → dayhoff_tools-1.4.21.dist-info}/METADATA +1 -1
- {dayhoff_tools-1.4.19.dist-info → dayhoff_tools-1.4.21.dist-info}/RECORD +7 -7
- {dayhoff_tools-1.4.19.dist-info → dayhoff_tools-1.4.21.dist-info}/WHEEL +0 -0
- {dayhoff_tools-1.4.19.dist-info → dayhoff_tools-1.4.21.dist-info}/entry_points.txt +0 -0
dayhoff_tools/cli/main.py
CHANGED
|
@@ -3,18 +3,23 @@
|
|
|
3
3
|
import sys
|
|
4
4
|
|
|
5
5
|
import typer
|
|
6
|
+
|
|
6
7
|
from dayhoff_tools.cli.cloud_commands import aws_app, gcp_app
|
|
7
8
|
from dayhoff_tools.cli.engine_commands import engine_app, studio_app
|
|
8
9
|
from dayhoff_tools.cli.utility_commands import (
|
|
9
|
-
add_to_warehouse_typer,
|
|
10
10
|
build_and_upload_wheel,
|
|
11
11
|
delete_local_branch,
|
|
12
|
-
get_ancestry,
|
|
13
|
-
import_from_warehouse_typer,
|
|
14
12
|
install_dependencies,
|
|
15
13
|
test_github_actions_locally,
|
|
16
14
|
update_dependencies,
|
|
17
15
|
)
|
|
16
|
+
from dayhoff_tools.warehouse import (
|
|
17
|
+
_warn_if_gcp_default_sa,
|
|
18
|
+
add_to_warehouse_typer,
|
|
19
|
+
get_ancestry,
|
|
20
|
+
get_from_warehouse_typer,
|
|
21
|
+
import_from_warehouse_typer,
|
|
22
|
+
)
|
|
18
23
|
|
|
19
24
|
app = typer.Typer()
|
|
20
25
|
|
|
@@ -33,6 +38,7 @@ app.command("update", help="Update dayhoff-tools (or all deps) and sync environm
|
|
|
33
38
|
app.command("gha")(test_github_actions_locally)
|
|
34
39
|
app.command("wadd")(add_to_warehouse_typer)
|
|
35
40
|
app.command("wancestry")(get_ancestry)
|
|
41
|
+
app.command("wget")(get_from_warehouse_typer)
|
|
36
42
|
app.command("wimport")(import_from_warehouse_typer)
|
|
37
43
|
|
|
38
44
|
# Cloud commands
|
|
@@ -114,8 +120,6 @@ def deploy_command(
|
|
|
114
120
|
config = yaml.safe_load(f)
|
|
115
121
|
|
|
116
122
|
if config.get("cloud") == "gcp":
|
|
117
|
-
from dayhoff_tools.cli.utility_commands import _warn_if_gcp_default_sa
|
|
118
|
-
|
|
119
123
|
_warn_if_gcp_default_sa(force_prompt=True)
|
|
120
124
|
except Exception as e:
|
|
121
125
|
# Don't block deployment if config can't be read or other errors occur
|
|
@@ -14,64 +14,6 @@ import yaml
|
|
|
14
14
|
# Import cloud helper lazily inside functions to avoid heavy deps at module load
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
def _warn_if_gcp_default_sa(force_prompt: bool = False) -> None:
|
|
18
|
-
"""Warn the user when the active gcloud principal is the default VM service
|
|
19
|
-
account. See detailed docstring later in file (duplicate for early
|
|
20
|
-
availability)."""
|
|
21
|
-
|
|
22
|
-
from dayhoff_tools.cli import cloud_commands as _cc
|
|
23
|
-
|
|
24
|
-
try:
|
|
25
|
-
impersonation = _cc._get_current_gcp_impersonation()
|
|
26
|
-
user = _cc._get_current_gcp_user()
|
|
27
|
-
active = impersonation if impersonation != "None" else user
|
|
28
|
-
short = _cc._get_short_name(active)
|
|
29
|
-
|
|
30
|
-
# Determine if user creds are valid
|
|
31
|
-
auth_valid = _cc._is_gcp_user_authenticated()
|
|
32
|
-
except Exception:
|
|
33
|
-
# If any helper errors out, don't block execution
|
|
34
|
-
return
|
|
35
|
-
|
|
36
|
-
problem_type = None # "default_sa" | "stale"
|
|
37
|
-
if short == "default VM service account":
|
|
38
|
-
problem_type = "default_sa"
|
|
39
|
-
elif not auth_valid:
|
|
40
|
-
problem_type = "stale"
|
|
41
|
-
|
|
42
|
-
if problem_type is None:
|
|
43
|
-
return # Everything looks good
|
|
44
|
-
|
|
45
|
-
YELLOW = getattr(_cc, "YELLOW", "\033[0;33m")
|
|
46
|
-
BLUE = getattr(_cc, "BLUE", "\033[0;36m")
|
|
47
|
-
RED = getattr(_cc, "RED", "\033[0;31m")
|
|
48
|
-
NC = getattr(_cc, "NC", "\033[0m")
|
|
49
|
-
|
|
50
|
-
if problem_type == "default_sa":
|
|
51
|
-
msg_body = (
|
|
52
|
-
f"You are currently authenticated as the *default VM service account*.\n"
|
|
53
|
-
f" This will block gsutil/DVC access to private buckets (e.g. warehouse)."
|
|
54
|
-
)
|
|
55
|
-
else: # stale creds
|
|
56
|
-
msg_body = (
|
|
57
|
-
f"Your GCP credentials appear to be *expired/stale*.\n"
|
|
58
|
-
f" Re-authenticate to refresh the access token."
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
print(
|
|
62
|
-
f"{YELLOW}⚠ {msg_body}{NC}\n"
|
|
63
|
-
f"{YELLOW} Run {BLUE}dh gcp login{YELLOW} or {BLUE}dh gcp use-devcon{YELLOW} before retrying.{NC}",
|
|
64
|
-
file=sys.stderr,
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
if force_prompt and sys.stdin.isatty() and sys.stdout.isatty():
|
|
68
|
-
import questionary
|
|
69
|
-
|
|
70
|
-
if not questionary.confirm("Proceed anyway?", default=False).ask():
|
|
71
|
-
print(f"{RED}Aborted due to unsafe GCP credentials.{NC}", file=sys.stderr)
|
|
72
|
-
raise SystemExit(1)
|
|
73
|
-
|
|
74
|
-
|
|
75
17
|
def test_github_actions_locally():
|
|
76
18
|
"""Run the script test_pytest_in_github_actions_container.sh.sh."""
|
|
77
19
|
script_path = ".devcontainer/scripts/test_pytest_in_github_actions_container.sh"
|
|
@@ -83,163 +25,6 @@ def test_github_actions_locally():
|
|
|
83
25
|
print(f"Error occurred while running the script: {e}")
|
|
84
26
|
|
|
85
27
|
|
|
86
|
-
def get_ancestry(filepath: str) -> None:
|
|
87
|
-
"""Take a .dvc file created from import, and generate an ancestry entry
|
|
88
|
-
that can be manually copied into other .dvc files."""
|
|
89
|
-
with open(filepath, "r") as file:
|
|
90
|
-
assert filepath.endswith(".dvc"), "ERROR: Not a .dvc file"
|
|
91
|
-
ancestor_content = yaml.safe_load(file)
|
|
92
|
-
|
|
93
|
-
error_msg = "Unexpected file structure. Are you sure this is a .dvc file generated from `dvc import`?"
|
|
94
|
-
assert "deps" in ancestor_content, error_msg
|
|
95
|
-
|
|
96
|
-
error_msg = "Please only reference data imported from main branches."
|
|
97
|
-
assert "rev" not in ancestor_content["deps"][0]["repo"], error_msg
|
|
98
|
-
|
|
99
|
-
ancestor_info = {
|
|
100
|
-
"name": os.path.basename(ancestor_content["outs"][0]["path"]),
|
|
101
|
-
"file_md5_hash": ancestor_content["outs"][0]["md5"],
|
|
102
|
-
"size": ancestor_content["outs"][0]["size"],
|
|
103
|
-
"repo_url": ancestor_content["deps"][0]["repo"]["url"],
|
|
104
|
-
"repo_path": ancestor_content["deps"][0]["path"],
|
|
105
|
-
"commit_hash": ancestor_content["deps"][0]["repo"]["rev_lock"],
|
|
106
|
-
}
|
|
107
|
-
print()
|
|
108
|
-
yaml.safe_dump(
|
|
109
|
-
[ancestor_info], sys.stdout, default_flow_style=False, sort_keys=False
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def import_from_warehouse_typer() -> None:
|
|
114
|
-
"""Import a file from warehouse.
|
|
115
|
-
|
|
116
|
-
Emits an early warning if the active GCP credentials are the *default VM
|
|
117
|
-
service account* because this will prevent DVC/gsutil from accessing the
|
|
118
|
-
warehouse bucket. The user can abort the command when running
|
|
119
|
-
interactively.
|
|
120
|
-
"""
|
|
121
|
-
|
|
122
|
-
# Early-exit guard for wrong GCP credentials
|
|
123
|
-
_warn_if_gcp_default_sa(force_prompt=True)
|
|
124
|
-
|
|
125
|
-
# Import only when the function is called
|
|
126
|
-
import questionary
|
|
127
|
-
from dayhoff_tools.warehouse import import_from_warehouse
|
|
128
|
-
|
|
129
|
-
# Check if we're in Lightning Studio
|
|
130
|
-
if _is_lightning_studio():
|
|
131
|
-
# Lightning Studio behavior
|
|
132
|
-
_check_dvc_initialized()
|
|
133
|
-
_configure_studio_cache()
|
|
134
|
-
|
|
135
|
-
# Ensure we're running from repo root using REPO_ROOT env var
|
|
136
|
-
repo_root = os.environ.get("REPO_ROOT")
|
|
137
|
-
if not repo_root:
|
|
138
|
-
raise Exception(
|
|
139
|
-
"REPO_ROOT environment variable not set. Make sure you're in a repo with an active UV virtual environment."
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
current_dir = os.getcwd()
|
|
143
|
-
if current_dir != repo_root:
|
|
144
|
-
raise Exception(
|
|
145
|
-
f"This command must be run from the repo root. "
|
|
146
|
-
f"Current directory: {current_dir}, Expected: {repo_root}"
|
|
147
|
-
)
|
|
148
|
-
else:
|
|
149
|
-
# Original devcontainer behavior - ensure execution from root
|
|
150
|
-
cwd = Path(os.getcwd())
|
|
151
|
-
if cwd.parent.name != "workspaces" or str(cwd.parent.parent) != cwd.root:
|
|
152
|
-
raise Exception(
|
|
153
|
-
f"This command must be executed from the repo's root directory (/workspaces/reponame). Current directory: {cwd}"
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
# Use questionary for prompts instead of typer
|
|
157
|
-
warehouse_path = questionary.text("Warehouse path:").ask()
|
|
158
|
-
|
|
159
|
-
# Provide multiple-choice options for output folder
|
|
160
|
-
output_folder_choice = questionary.select(
|
|
161
|
-
"Output folder:",
|
|
162
|
-
choices=["data/imports", "same_as_warehouse", "Custom path..."],
|
|
163
|
-
).ask()
|
|
164
|
-
|
|
165
|
-
# If custom path is selected, ask for the path
|
|
166
|
-
if output_folder_choice == "Custom path...":
|
|
167
|
-
output_folder = questionary.text("Enter custom output folder:").ask()
|
|
168
|
-
else:
|
|
169
|
-
output_folder = output_folder_choice
|
|
170
|
-
|
|
171
|
-
branch = questionary.text("Branch (default: main):", default="main").ask()
|
|
172
|
-
|
|
173
|
-
final_path = import_from_warehouse(
|
|
174
|
-
warehouse_path=warehouse_path,
|
|
175
|
-
output_folder=output_folder,
|
|
176
|
-
branch=branch,
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
def add_to_warehouse_typer() -> None:
|
|
181
|
-
"""Add a new data file to warehouse and enrich its generated .dvc file.
|
|
182
|
-
|
|
183
|
-
As with *dh wimport*, this command fails when the user is logged in with
|
|
184
|
-
the default VM service account. A guard therefore warns the user first
|
|
185
|
-
and allows them to abort interactively.
|
|
186
|
-
"""
|
|
187
|
-
|
|
188
|
-
# Early-exit guard for wrong GCP credentials
|
|
189
|
-
_warn_if_gcp_default_sa(force_prompt=True)
|
|
190
|
-
|
|
191
|
-
# Import only when the function is called
|
|
192
|
-
import questionary
|
|
193
|
-
from dayhoff_tools.warehouse import add_to_warehouse
|
|
194
|
-
|
|
195
|
-
# Check if we're in Lightning Studio
|
|
196
|
-
if _is_lightning_studio():
|
|
197
|
-
# Lightning Studio behavior
|
|
198
|
-
_check_dvc_initialized()
|
|
199
|
-
_configure_studio_cache()
|
|
200
|
-
|
|
201
|
-
# Ensure we're running from repo root using REPO_ROOT env var
|
|
202
|
-
repo_root = os.environ.get("REPO_ROOT")
|
|
203
|
-
if not repo_root:
|
|
204
|
-
raise Exception(
|
|
205
|
-
"REPO_ROOT environment variable not set. Make sure you're in a repo with an active UV virtual environment."
|
|
206
|
-
)
|
|
207
|
-
|
|
208
|
-
current_dir = os.getcwd()
|
|
209
|
-
if current_dir != repo_root:
|
|
210
|
-
raise Exception(
|
|
211
|
-
f"This command must be run from the repo root. "
|
|
212
|
-
f"Current directory: {current_dir}, Expected: {repo_root}"
|
|
213
|
-
)
|
|
214
|
-
else:
|
|
215
|
-
# Original devcontainer behavior - ensure execution from root
|
|
216
|
-
cwd = Path(os.getcwd())
|
|
217
|
-
if cwd.parent.name != "workspaces" or str(cwd.parent.parent) != cwd.root:
|
|
218
|
-
raise Exception(
|
|
219
|
-
f"This command must be executed from the repo's root directory (/workspaces/reponame). Current directory: {cwd}"
|
|
220
|
-
)
|
|
221
|
-
|
|
222
|
-
# Prompt for the data file path
|
|
223
|
-
warehouse_path = questionary.text("Data file to be registered:").ask()
|
|
224
|
-
|
|
225
|
-
# Prompt for the ancestor .dvc file paths
|
|
226
|
-
ancestor_dvc_paths = []
|
|
227
|
-
print("\nEnter the path of all ancestor .dvc files (or hit Enter to finish).")
|
|
228
|
-
print("These files must be generated by `dvc import` or `dh wimport`.")
|
|
229
|
-
while True:
|
|
230
|
-
ancestor_path = questionary.text("Ancestor path: ").ask()
|
|
231
|
-
if ancestor_path:
|
|
232
|
-
ancestor_dvc_paths.append(ancestor_path)
|
|
233
|
-
else:
|
|
234
|
-
print()
|
|
235
|
-
break
|
|
236
|
-
|
|
237
|
-
dvc_path = add_to_warehouse(
|
|
238
|
-
warehouse_path=warehouse_path,
|
|
239
|
-
ancestor_dvc_paths=ancestor_dvc_paths,
|
|
240
|
-
)
|
|
241
|
-
|
|
242
|
-
|
|
243
28
|
def delete_local_branch(branch_name: str, folder_path: str):
|
|
244
29
|
"""Delete a local Git branch after fetching with pruning.
|
|
245
30
|
|
|
@@ -684,49 +469,3 @@ def update_dependencies(
|
|
|
684
469
|
except Exception as e:
|
|
685
470
|
print(f"An unexpected error occurred: {e}")
|
|
686
471
|
sys.exit(1)
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
# ----------------------
|
|
690
|
-
# Lightning Studio Commands
|
|
691
|
-
# ----------------------
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
def _is_lightning_studio() -> bool:
|
|
695
|
-
"""Check if we're running in Lightning Studio environment."""
|
|
696
|
-
return os.path.exists("/teamspace/studios/this_studio")
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
def _check_dvc_initialized() -> None:
|
|
700
|
-
"""Check if DVC is initialized in the current directory."""
|
|
701
|
-
if not os.path.exists(".dvc"):
|
|
702
|
-
raise Exception(
|
|
703
|
-
"DVC is not initialized in this repository. Run 'dvc init' first."
|
|
704
|
-
)
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
def _configure_studio_cache() -> None:
|
|
708
|
-
"""Configure DVC to use studio-level cache if not already configured."""
|
|
709
|
-
studio_cache_dir = "/teamspace/studios/this_studio/.dvc_cache"
|
|
710
|
-
|
|
711
|
-
# Create cache directory if it doesn't exist
|
|
712
|
-
os.makedirs(studio_cache_dir, exist_ok=True)
|
|
713
|
-
|
|
714
|
-
# Check current cache configuration
|
|
715
|
-
try:
|
|
716
|
-
result = subprocess.run(
|
|
717
|
-
["dvc", "cache", "dir"], capture_output=True, text=True, check=True
|
|
718
|
-
)
|
|
719
|
-
current_cache = result.stdout.strip()
|
|
720
|
-
|
|
721
|
-
if current_cache != studio_cache_dir:
|
|
722
|
-
print(
|
|
723
|
-
f"Configuring DVC cache to use studio-level directory: {studio_cache_dir}"
|
|
724
|
-
)
|
|
725
|
-
# Use --local flag to save in .dvc/config.local (git-ignored)
|
|
726
|
-
subprocess.run(
|
|
727
|
-
["dvc", "cache", "dir", studio_cache_dir, "--local"], check=True
|
|
728
|
-
)
|
|
729
|
-
print("✅ DVC cache configured for Lightning Studio (in .dvc/config.local)")
|
|
730
|
-
except subprocess.CalledProcessError:
|
|
731
|
-
# If cache dir command fails, try to set it anyway
|
|
732
|
-
subprocess.run(["dvc", "cache", "dir", studio_cache_dir, "--local"], check=True)
|
dayhoff_tools/warehouse.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import subprocess
|
|
3
|
+
import sys
|
|
3
4
|
from datetime import datetime
|
|
4
5
|
from io import StringIO
|
|
5
6
|
from pathlib import Path
|
|
@@ -7,6 +8,66 @@ from zoneinfo import ZoneInfo
|
|
|
7
8
|
|
|
8
9
|
from ruamel.yaml import YAML
|
|
9
10
|
|
|
11
|
+
# Import cloud helper lazily inside functions to avoid heavy deps at module load
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _warn_if_gcp_default_sa(force_prompt: bool = False) -> None:
|
|
15
|
+
"""Warn the user when the active gcloud principal is the default VM service
|
|
16
|
+
account. See detailed docstring later in file (duplicate for early
|
|
17
|
+
availability)."""
|
|
18
|
+
|
|
19
|
+
from dayhoff_tools.cli import cloud_commands as _cc
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
impersonation = _cc._get_current_gcp_impersonation()
|
|
23
|
+
user = _cc._get_current_gcp_user()
|
|
24
|
+
active = impersonation if impersonation != "None" else user
|
|
25
|
+
short = _cc._get_short_name(active)
|
|
26
|
+
|
|
27
|
+
# Determine if user creds are valid
|
|
28
|
+
auth_valid = _cc._is_gcp_user_authenticated()
|
|
29
|
+
except Exception:
|
|
30
|
+
# If any helper errors out, don't block execution
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
problem_type = None # "default_sa" | "stale"
|
|
34
|
+
if short == "default VM service account":
|
|
35
|
+
problem_type = "default_sa"
|
|
36
|
+
elif not auth_valid:
|
|
37
|
+
problem_type = "stale"
|
|
38
|
+
|
|
39
|
+
if problem_type is None:
|
|
40
|
+
return # Everything looks good
|
|
41
|
+
|
|
42
|
+
YELLOW = getattr(_cc, "YELLOW", "\033[0;33m")
|
|
43
|
+
BLUE = getattr(_cc, "BLUE", "\033[0;36m")
|
|
44
|
+
RED = getattr(_cc, "RED", "\033[0;31m")
|
|
45
|
+
NC = getattr(_cc, "NC", "\033[0m")
|
|
46
|
+
|
|
47
|
+
if problem_type == "default_sa":
|
|
48
|
+
msg_body = (
|
|
49
|
+
f"You are currently authenticated as the *default VM service account*.\n"
|
|
50
|
+
f" This will block gsutil/DVC access to private buckets (e.g. warehouse)."
|
|
51
|
+
)
|
|
52
|
+
else: # stale creds
|
|
53
|
+
msg_body = (
|
|
54
|
+
f"Your GCP credentials appear to be *expired/stale*.\n"
|
|
55
|
+
f" Re-authenticate to refresh the access token."
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
print(
|
|
59
|
+
f"{YELLOW}⚠ {msg_body}{NC}\n"
|
|
60
|
+
f"{YELLOW} Run {BLUE}dh gcp login{YELLOW} or {BLUE}dh gcp use-devcon{YELLOW} before retrying.{NC}",
|
|
61
|
+
file=sys.stderr,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
if force_prompt and sys.stdin.isatty() and sys.stdout.isatty():
|
|
65
|
+
import questionary
|
|
66
|
+
|
|
67
|
+
if not questionary.confirm("Proceed anyway?", default=False).ask():
|
|
68
|
+
print(f"{RED}Aborted due to unsafe GCP credentials.{NC}", file=sys.stderr)
|
|
69
|
+
raise SystemExit(1)
|
|
70
|
+
|
|
10
71
|
|
|
11
72
|
def human_readable_size(size_bytes):
|
|
12
73
|
"""Convert size in bytes to a human-readable format"""
|
|
@@ -282,27 +343,6 @@ def add_to_warehouse(
|
|
|
282
343
|
# Change the working directory to the warehouse folder
|
|
283
344
|
os.chdir("warehouse")
|
|
284
345
|
|
|
285
|
-
# Configure DVC cache for Lightning Studio if needed
|
|
286
|
-
if os.path.exists("/teamspace/studios/this_studio"):
|
|
287
|
-
studio_cache_dir = "/teamspace/studios/this_studio/.dvc_cache"
|
|
288
|
-
os.makedirs(studio_cache_dir, exist_ok=True)
|
|
289
|
-
try:
|
|
290
|
-
result = subprocess.run(
|
|
291
|
-
["dvc", "cache", "dir"], capture_output=True, text=True, check=True
|
|
292
|
-
)
|
|
293
|
-
current_cache = result.stdout.strip()
|
|
294
|
-
if current_cache != studio_cache_dir:
|
|
295
|
-
subprocess.run(
|
|
296
|
-
["dvc", "cache", "dir", studio_cache_dir, "--local"], check=True
|
|
297
|
-
)
|
|
298
|
-
print(
|
|
299
|
-
f"✅ Configured warehouse DVC cache for Lightning Studio: {studio_cache_dir}"
|
|
300
|
-
)
|
|
301
|
-
except subprocess.CalledProcessError:
|
|
302
|
-
subprocess.run(
|
|
303
|
-
["dvc", "cache", "dir", studio_cache_dir, "--local"], check=True
|
|
304
|
-
)
|
|
305
|
-
|
|
306
346
|
# Add and push the data file
|
|
307
347
|
subprocess.run(["dvc", "add", warehouse_path], check=True)
|
|
308
348
|
|
|
@@ -437,3 +477,166 @@ def get_from_warehouse(
|
|
|
437
477
|
subprocess.run(command, check=True)
|
|
438
478
|
|
|
439
479
|
return final_path
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def get_ancestry(filepath: str) -> None:
|
|
483
|
+
"""Take a .dvc file created from import, and generate an ancestry entry
|
|
484
|
+
that can be manually copied into other .dvc files."""
|
|
485
|
+
with open(filepath, "r") as file:
|
|
486
|
+
assert filepath.endswith(".dvc"), "ERROR: Not a .dvc file"
|
|
487
|
+
import yaml
|
|
488
|
+
|
|
489
|
+
ancestor_content = yaml.safe_load(file)
|
|
490
|
+
|
|
491
|
+
error_msg = "Unexpected file structure. Are you sure this is a .dvc file generated from `dvc import`?"
|
|
492
|
+
assert "deps" in ancestor_content, error_msg
|
|
493
|
+
|
|
494
|
+
error_msg = "Please only reference data imported from main branches."
|
|
495
|
+
assert "rev" not in ancestor_content["deps"][0]["repo"], error_msg
|
|
496
|
+
|
|
497
|
+
ancestor_info = {
|
|
498
|
+
"name": os.path.basename(ancestor_content["outs"][0]["path"]),
|
|
499
|
+
"file_md5_hash": ancestor_content["outs"][0]["md5"],
|
|
500
|
+
"size": ancestor_content["outs"][0]["size"],
|
|
501
|
+
"repo_url": ancestor_content["deps"][0]["repo"]["url"],
|
|
502
|
+
"repo_path": ancestor_content["deps"][0]["path"],
|
|
503
|
+
"commit_hash": ancestor_content["deps"][0]["repo"]["rev_lock"],
|
|
504
|
+
}
|
|
505
|
+
print()
|
|
506
|
+
yaml.safe_dump(
|
|
507
|
+
[ancestor_info], sys.stdout, default_flow_style=False, sort_keys=False
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def import_from_warehouse_typer() -> None:
|
|
512
|
+
"""Import a file from warehouse.
|
|
513
|
+
|
|
514
|
+
Emits an early warning if the active GCP credentials are the *default VM
|
|
515
|
+
service account* because this will prevent DVC/gsutil from accessing the
|
|
516
|
+
warehouse bucket. The user can abort the command when running
|
|
517
|
+
interactively.
|
|
518
|
+
"""
|
|
519
|
+
|
|
520
|
+
# Early-exit guard for wrong GCP credentials
|
|
521
|
+
_warn_if_gcp_default_sa(force_prompt=True)
|
|
522
|
+
|
|
523
|
+
# Import only when the function is called
|
|
524
|
+
import questionary
|
|
525
|
+
|
|
526
|
+
# Ensure execution from root directory
|
|
527
|
+
cwd = Path(os.getcwd())
|
|
528
|
+
if cwd.parent.name != "workspaces" or str(cwd.parent.parent) != cwd.root:
|
|
529
|
+
raise Exception(
|
|
530
|
+
f"This command must be executed from the repo's root directory (/workspaces/reponame). Current directory: {cwd}"
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
# Use questionary for prompts instead of typer
|
|
534
|
+
warehouse_path = questionary.text("Warehouse path:").ask()
|
|
535
|
+
|
|
536
|
+
# Provide multiple-choice options for output folder
|
|
537
|
+
output_folder_choice = questionary.select(
|
|
538
|
+
"Output folder:",
|
|
539
|
+
choices=["data/imports", "same_as_warehouse", "Custom path..."],
|
|
540
|
+
).ask()
|
|
541
|
+
|
|
542
|
+
# If custom path is selected, ask for the path
|
|
543
|
+
if output_folder_choice == "Custom path...":
|
|
544
|
+
output_folder = questionary.text("Enter custom output folder:").ask()
|
|
545
|
+
else:
|
|
546
|
+
output_folder = output_folder_choice
|
|
547
|
+
|
|
548
|
+
branch = questionary.text("Branch (default: main):", default="main").ask()
|
|
549
|
+
|
|
550
|
+
final_path = import_from_warehouse(
|
|
551
|
+
warehouse_path=warehouse_path,
|
|
552
|
+
output_folder=output_folder,
|
|
553
|
+
branch=branch,
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def get_from_warehouse_typer() -> None:
|
|
558
|
+
"""Get a file from warehouse using `dvc get`.
|
|
559
|
+
|
|
560
|
+
Emits an early warning if the active GCP credentials are the *default VM
|
|
561
|
+
service account* because this will prevent DVC/gsutil from accessing the
|
|
562
|
+
warehouse bucket. The user can abort the command when running
|
|
563
|
+
interactively.
|
|
564
|
+
"""
|
|
565
|
+
|
|
566
|
+
# Early-exit guard for wrong GCP credentials
|
|
567
|
+
_warn_if_gcp_default_sa(force_prompt=True)
|
|
568
|
+
|
|
569
|
+
# Import only when the function is called
|
|
570
|
+
import questionary
|
|
571
|
+
|
|
572
|
+
# Ensure execution from root directory
|
|
573
|
+
cwd = Path(os.getcwd())
|
|
574
|
+
if cwd.parent.name != "workspaces" or str(cwd.parent.parent) != cwd.root:
|
|
575
|
+
raise Exception(
|
|
576
|
+
f"This command must be executed from the repo's root directory (/workspaces/reponame). Current directory: {cwd}"
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
# Use questionary for prompts instead of typer
|
|
580
|
+
warehouse_path = questionary.text("Warehouse path:").ask()
|
|
581
|
+
|
|
582
|
+
# Provide multiple-choice options for output folder
|
|
583
|
+
output_folder_choice = questionary.select(
|
|
584
|
+
"Output folder:",
|
|
585
|
+
choices=["data/imports", "same_as_warehouse", "Custom path..."],
|
|
586
|
+
).ask()
|
|
587
|
+
|
|
588
|
+
# If custom path is selected, ask for the path
|
|
589
|
+
if output_folder_choice == "Custom path...":
|
|
590
|
+
output_folder = questionary.text("Enter custom output folder:").ask()
|
|
591
|
+
else:
|
|
592
|
+
output_folder = output_folder_choice
|
|
593
|
+
|
|
594
|
+
branch = questionary.text("Branch (default: main):", default="main").ask()
|
|
595
|
+
|
|
596
|
+
final_path = get_from_warehouse(
|
|
597
|
+
warehouse_path=warehouse_path,
|
|
598
|
+
output_folder=output_folder,
|
|
599
|
+
branch=branch,
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def add_to_warehouse_typer() -> None:
|
|
604
|
+
"""Add a new data file to warehouse and enrich its generated .dvc file.
|
|
605
|
+
|
|
606
|
+
As with *dh wimport*, this command fails when the user is logged in with
|
|
607
|
+
the default VM service account. A guard therefore warns the user first
|
|
608
|
+
and allows them to abort interactively.
|
|
609
|
+
"""
|
|
610
|
+
|
|
611
|
+
# Early-exit guard for wrong GCP credentials
|
|
612
|
+
_warn_if_gcp_default_sa(force_prompt=True)
|
|
613
|
+
|
|
614
|
+
# Import only when the function is called
|
|
615
|
+
import questionary
|
|
616
|
+
|
|
617
|
+
# Ensure execution from root directory
|
|
618
|
+
cwd = Path(os.getcwd())
|
|
619
|
+
if cwd.parent.name != "workspaces" or str(cwd.parent.parent) != cwd.root:
|
|
620
|
+
raise Exception(
|
|
621
|
+
f"This command must be executed from the repo's root directory (/workspaces/reponame). Current directory: {cwd}"
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
# Prompt for the data file path
|
|
625
|
+
warehouse_path = questionary.text("Data file to be registered:").ask()
|
|
626
|
+
|
|
627
|
+
# Prompt for the ancestor .dvc file paths
|
|
628
|
+
ancestor_dvc_paths = []
|
|
629
|
+
print("\nEnter the path of all ancestor .dvc files (or hit Enter to finish).")
|
|
630
|
+
print("These files must be generated by `dvc import` or `dh wimport`.")
|
|
631
|
+
while True:
|
|
632
|
+
ancestor_path = questionary.text("Ancestor path: ").ask()
|
|
633
|
+
if ancestor_path:
|
|
634
|
+
ancestor_dvc_paths.append(ancestor_path)
|
|
635
|
+
else:
|
|
636
|
+
print()
|
|
637
|
+
break
|
|
638
|
+
|
|
639
|
+
dvc_path = add_to_warehouse(
|
|
640
|
+
warehouse_path=warehouse_path,
|
|
641
|
+
ancestor_dvc_paths=ancestor_dvc_paths,
|
|
642
|
+
)
|
|
@@ -4,9 +4,9 @@ dayhoff_tools/chemistry/utils.py,sha256=jt-7JgF-GeeVC421acX-bobKbLU_X94KNOW24p_P
|
|
|
4
4
|
dayhoff_tools/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
dayhoff_tools/cli/cloud_commands.py,sha256=33qcWLmq-FwEXMdL3F0OHm-5Stlh2r65CldyEZgQ1no,40904
|
|
6
6
|
dayhoff_tools/cli/engine_commands.py,sha256=jXaUXbdpZKssFbMdwT6brvTuBmLkYdIHcjt3L5ZP6Ys,86313
|
|
7
|
-
dayhoff_tools/cli/main.py,sha256=
|
|
7
|
+
dayhoff_tools/cli/main.py,sha256=tRN7WCBHg6uyNp6rA54pKTCoVmBntta2i0Yas3bUpZ4,4853
|
|
8
8
|
dayhoff_tools/cli/swarm_commands.py,sha256=5EyKj8yietvT5lfoz8Zx0iQvVaNgc3SJX1z2zQR6o6M,5614
|
|
9
|
-
dayhoff_tools/cli/utility_commands.py,sha256=
|
|
9
|
+
dayhoff_tools/cli/utility_commands.py,sha256=FRZTPrjsG_qmIIqoNxd1Q1vVkS_5w8aY33IrVYVNCLg,18131
|
|
10
10
|
dayhoff_tools/deployment/base.py,sha256=mYp560l6hSDFtyY2H42VoM8k9VUzfwuiyh9Knqpgc28,17441
|
|
11
11
|
dayhoff_tools/deployment/deploy_aws.py,sha256=GvZpE2YIFA5Dl9rkAljFjtUypmPDNbWgw8NicHYTP24,18265
|
|
12
12
|
dayhoff_tools/deployment/deploy_gcp.py,sha256=xgaOVsUDmP6wSEMYNkm1yRNcVskfdz80qJtCulkBIAM,8860
|
|
@@ -26,8 +26,8 @@ dayhoff_tools/intake/structure.py,sha256=ufN3gAodQxhnt7psK1VTQeu9rKERmo_PhoxIbB4
|
|
|
26
26
|
dayhoff_tools/intake/uniprot.py,sha256=BZYJQF63OtPcBBnQ7_P9gulxzJtqyorgyuDiPeOJqE4,16456
|
|
27
27
|
dayhoff_tools/logs.py,sha256=DKdeP0k0kliRcilwvX0mUB2eipO5BdWUeHwh-VnsICs,838
|
|
28
28
|
dayhoff_tools/sqlite.py,sha256=jV55ikF8VpTfeQqqlHSbY8OgfyfHj8zgHNpZjBLos_E,18672
|
|
29
|
-
dayhoff_tools/warehouse.py,sha256=
|
|
30
|
-
dayhoff_tools-1.4.
|
|
31
|
-
dayhoff_tools-1.4.
|
|
32
|
-
dayhoff_tools-1.4.
|
|
33
|
-
dayhoff_tools-1.4.
|
|
29
|
+
dayhoff_tools/warehouse.py,sha256=6HxM8JO8NlYXxXApjptkNfnGrRLMScVeQI_xdFu4His,23062
|
|
30
|
+
dayhoff_tools-1.4.21.dist-info/METADATA,sha256=1ShjNFbdjRyyuam3mjiqNQ8WvOUC23nn9E0bgyBMLk4,2825
|
|
31
|
+
dayhoff_tools-1.4.21.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
32
|
+
dayhoff_tools-1.4.21.dist-info/entry_points.txt,sha256=iAf4jteNqW3cJm6CO6czLxjW3vxYKsyGLZ8WGmxamSc,49
|
|
33
|
+
dayhoff_tools-1.4.21.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|