aind-data-transfer-service 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aind-data-transfer-service might be problematic. Click here for more details.
- aind_data_transfer_service/__init__.py +9 -0
- aind_data_transfer_service/configs/__init__.py +1 -0
- aind_data_transfer_service/configs/csv_handler.py +59 -0
- aind_data_transfer_service/configs/job_configs.py +545 -0
- aind_data_transfer_service/configs/job_upload_template.py +153 -0
- aind_data_transfer_service/hpc/__init__.py +1 -0
- aind_data_transfer_service/hpc/client.py +151 -0
- aind_data_transfer_service/hpc/models.py +492 -0
- aind_data_transfer_service/log_handler.py +58 -0
- aind_data_transfer_service/models/__init__.py +1 -0
- aind_data_transfer_service/models/core.py +300 -0
- aind_data_transfer_service/models/internal.py +277 -0
- aind_data_transfer_service/server.py +1125 -0
- aind_data_transfer_service/templates/index.html +245 -0
- aind_data_transfer_service/templates/job_params.html +194 -0
- aind_data_transfer_service/templates/job_status.html +323 -0
- aind_data_transfer_service/templates/job_tasks_table.html +146 -0
- aind_data_transfer_service/templates/task_logs.html +31 -0
- aind_data_transfer_service-1.12.0.dist-info/METADATA +49 -0
- aind_data_transfer_service-1.12.0.dist-info/RECORD +23 -0
- aind_data_transfer_service-1.12.0.dist-info/WHEEL +5 -0
- aind_data_transfer_service-1.12.0.dist-info/licenses/LICENSE +21 -0
- aind_data_transfer_service-1.12.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Module to configure and create xlsx job upload template"""
|
|
2
|
+
import datetime
|
|
3
|
+
from io import BytesIO
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
|
+
|
|
6
|
+
from aind_data_schema_models.modalities import Modality
|
|
7
|
+
from aind_data_schema_models.platforms import Platform
|
|
8
|
+
from openpyxl import Workbook
|
|
9
|
+
from openpyxl.styles import Font
|
|
10
|
+
from openpyxl.utils import get_column_letter
|
|
11
|
+
from openpyxl.worksheet.datavalidation import DataValidation
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# TODO: convert to pydantic model
|
|
15
|
+
class JobUploadTemplate:
|
|
16
|
+
"""Class to configure and create xlsx job upload template"""
|
|
17
|
+
|
|
18
|
+
FILE_NAME = "job_upload_template.xlsx"
|
|
19
|
+
NUM_TEMPLATE_ROWS = 20
|
|
20
|
+
XLSX_DATETIME_FORMAT = "YYYY-MM-DDTHH:mm:ss"
|
|
21
|
+
HEADERS = [
|
|
22
|
+
"project_name",
|
|
23
|
+
"process_capsule_id",
|
|
24
|
+
"input_data_mount",
|
|
25
|
+
"platform",
|
|
26
|
+
"acq_datetime",
|
|
27
|
+
"subject_id",
|
|
28
|
+
"metadata_dir",
|
|
29
|
+
"modality0",
|
|
30
|
+
"modality0.source",
|
|
31
|
+
"modality1",
|
|
32
|
+
"modality1.source",
|
|
33
|
+
]
|
|
34
|
+
SAMPLE_JOBS = [
|
|
35
|
+
[
|
|
36
|
+
"Behavior Platform",
|
|
37
|
+
"1f999652-00a0-4c4b-99b5-64c2985ad070",
|
|
38
|
+
"data_mount",
|
|
39
|
+
Platform.BEHAVIOR.abbreviation,
|
|
40
|
+
datetime.datetime(2023, 10, 4, 4, 0, 0),
|
|
41
|
+
"123456",
|
|
42
|
+
"/allen/aind/stage/fake/metadata_dir",
|
|
43
|
+
Modality.BEHAVIOR_VIDEOS.abbreviation,
|
|
44
|
+
"/allen/aind/stage/fake/dir",
|
|
45
|
+
Modality.BEHAVIOR.abbreviation,
|
|
46
|
+
"/allen/aind/stage/fake/dir",
|
|
47
|
+
],
|
|
48
|
+
[
|
|
49
|
+
"Ophys Platform - SLAP2",
|
|
50
|
+
None,
|
|
51
|
+
None,
|
|
52
|
+
Platform.SMARTSPIM.abbreviation,
|
|
53
|
+
datetime.datetime(2023, 3, 4, 16, 30, 0),
|
|
54
|
+
"654321",
|
|
55
|
+
"/allen/aind/stage/fake/Config",
|
|
56
|
+
Modality.SPIM.abbreviation,
|
|
57
|
+
"/allen/aind/stage/fake/dir",
|
|
58
|
+
],
|
|
59
|
+
[
|
|
60
|
+
"Ephys Platform",
|
|
61
|
+
None,
|
|
62
|
+
None,
|
|
63
|
+
Platform.ECEPHYS.abbreviation,
|
|
64
|
+
datetime.datetime(2023, 1, 30, 19, 1, 0),
|
|
65
|
+
"654321",
|
|
66
|
+
None,
|
|
67
|
+
Modality.ECEPHYS.abbreviation,
|
|
68
|
+
"/allen/aind/stage/fake/dir",
|
|
69
|
+
Modality.BEHAVIOR_VIDEOS.abbreviation,
|
|
70
|
+
"/allen/aind/stage/fake/dir",
|
|
71
|
+
],
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def validators(self) -> List[Dict[str, Any]]:
|
|
76
|
+
"""
|
|
77
|
+
Returns
|
|
78
|
+
-------
|
|
79
|
+
List[Dict[str, Any]]
|
|
80
|
+
A list of validators for fields that require validation.
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
return [
|
|
84
|
+
{
|
|
85
|
+
"name": "platform",
|
|
86
|
+
"type": "list",
|
|
87
|
+
"options": list(Platform.abbreviation_map.keys()),
|
|
88
|
+
"column_indexes": [self.HEADERS.index("platform")],
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"name": "modality",
|
|
92
|
+
"type": "list",
|
|
93
|
+
"options": list(Modality.abbreviation_map.keys()),
|
|
94
|
+
"column_indexes": [
|
|
95
|
+
self.HEADERS.index("modality0"),
|
|
96
|
+
self.HEADERS.index("modality1"),
|
|
97
|
+
],
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
"name": "datetime",
|
|
101
|
+
"type": "date",
|
|
102
|
+
"column_indexes": [self.HEADERS.index("acq_datetime")],
|
|
103
|
+
},
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def excel_sheet_filestream(self) -> BytesIO:
|
|
108
|
+
"""Create job template as xlsx filestream"""
|
|
109
|
+
xl_io = BytesIO()
|
|
110
|
+
workbook = Workbook()
|
|
111
|
+
workbook.iso_dates = True
|
|
112
|
+
worksheet = workbook.active
|
|
113
|
+
worksheet.append(self.HEADERS)
|
|
114
|
+
for job in self.SAMPLE_JOBS:
|
|
115
|
+
worksheet.append(job)
|
|
116
|
+
# data validators
|
|
117
|
+
for validator in self.validators:
|
|
118
|
+
dv_type = validator["type"]
|
|
119
|
+
dv_name = validator["name"]
|
|
120
|
+
dv_params = {
|
|
121
|
+
"type": dv_type,
|
|
122
|
+
"promptTitle": dv_name,
|
|
123
|
+
"error": f"Invalid {dv_name}.",
|
|
124
|
+
"allow_blank": True,
|
|
125
|
+
"showErrorMessage": True,
|
|
126
|
+
"showInputMessage": True,
|
|
127
|
+
}
|
|
128
|
+
if dv_type == "list":
|
|
129
|
+
dv_params["formula1"] = f'"{(",").join(validator["options"])}"'
|
|
130
|
+
dv_params["prompt"] = f"Select a {dv_name} from the dropdown"
|
|
131
|
+
elif dv_type == "date":
|
|
132
|
+
dv_params["prompt"] = "Provide a {} using {}".format(
|
|
133
|
+
dv_name, self.XLSX_DATETIME_FORMAT
|
|
134
|
+
)
|
|
135
|
+
dv = DataValidation(**dv_params)
|
|
136
|
+
for i in validator["column_indexes"]:
|
|
137
|
+
col = get_column_letter(i + 1)
|
|
138
|
+
col_range = f"{col}2:{col}{self.NUM_TEMPLATE_ROWS}"
|
|
139
|
+
dv.add(col_range)
|
|
140
|
+
if dv_type != "date":
|
|
141
|
+
continue
|
|
142
|
+
for (cell,) in worksheet[col_range]:
|
|
143
|
+
cell.number_format = self.XLSX_DATETIME_FORMAT
|
|
144
|
+
worksheet.add_data_validation(dv)
|
|
145
|
+
# formatting
|
|
146
|
+
bold = Font(bold=True)
|
|
147
|
+
for cell in worksheet[1]:
|
|
148
|
+
cell.font = bold
|
|
149
|
+
worksheet.column_dimensions[cell.column_letter].auto_size = True
|
|
150
|
+
# save file
|
|
151
|
+
workbook.save(xl_io)
|
|
152
|
+
workbook.close()
|
|
153
|
+
return xl_io
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Client to manage connection with slurm cluster"""
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""Module to manage connection with hpc cluster"""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from typing import List, Optional, Union
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
from pydantic import Field, SecretStr, field_validator
|
|
8
|
+
from pydantic_settings import BaseSettings
|
|
9
|
+
from requests.models import Response
|
|
10
|
+
|
|
11
|
+
from aind_data_transfer_service.hpc.models import HpcJobSubmitSettings
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class HpcClientConfigs(BaseSettings):
|
|
15
|
+
"""Configs needed to connect to the hpc cluster"""
|
|
16
|
+
|
|
17
|
+
hpc_host: str = Field(...)
|
|
18
|
+
hpc_port: Optional[int] = Field(default=None)
|
|
19
|
+
hpc_api_endpoint: Optional[str] = Field(default=None)
|
|
20
|
+
hpc_username: str = Field(...)
|
|
21
|
+
hpc_password: SecretStr = Field(...)
|
|
22
|
+
hpc_token: SecretStr = Field(...)
|
|
23
|
+
|
|
24
|
+
@field_validator("hpc_host", "hpc_api_endpoint", mode="before")
|
|
25
|
+
def _strip_slash(cls, input_str: Optional[str]):
|
|
26
|
+
"""Strips trailing slash from domain."""
|
|
27
|
+
return None if input_str is None else input_str.strip("/")
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def hpc_url(self) -> str:
|
|
31
|
+
"""Construct base url from host, port, and api endpoint"""
|
|
32
|
+
base_url = f"http://{self.hpc_host}"
|
|
33
|
+
if self.hpc_port is not None:
|
|
34
|
+
base_url = base_url + f":{self.hpc_port}"
|
|
35
|
+
if self.hpc_api_endpoint:
|
|
36
|
+
base_url = base_url + f"/{self.hpc_api_endpoint}"
|
|
37
|
+
return base_url
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class HpcClient:
|
|
41
|
+
"""Class to manage client api"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, configs: HpcClientConfigs):
|
|
44
|
+
"""Class constructor"""
|
|
45
|
+
self.configs = configs
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def _job_submit_url(self):
|
|
49
|
+
"""Url for job submission"""
|
|
50
|
+
return f"{self.configs.hpc_url}/job/submit"
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def _node_status_url(self):
|
|
54
|
+
"""Url to check status of nodes"""
|
|
55
|
+
return f"{self.configs.hpc_url}/nodes"
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def _job_status_url(self):
|
|
59
|
+
"""Url to check status of job"""
|
|
60
|
+
return f"{self.configs.hpc_url}/job"
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def _jobs_url(self):
|
|
64
|
+
"""Url to check statuses of all jobs"""
|
|
65
|
+
return f"{self.configs.hpc_url}/jobs"
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def __headers(self):
|
|
69
|
+
"""Headers needed for rest api"""
|
|
70
|
+
return {
|
|
71
|
+
"X-SLURM-USER-NAME": self.configs.hpc_username,
|
|
72
|
+
"X-SLURM-USER-PASSWORD": (
|
|
73
|
+
self.configs.hpc_password.get_secret_value()
|
|
74
|
+
),
|
|
75
|
+
"X-SLURM-USER-TOKEN": self.configs.hpc_token.get_secret_value(),
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
def get_node_status(self) -> Response:
|
|
79
|
+
"""Get status of nodes"""
|
|
80
|
+
response = requests.get(
|
|
81
|
+
url=self._node_status_url, headers=self.__headers
|
|
82
|
+
)
|
|
83
|
+
return response
|
|
84
|
+
|
|
85
|
+
def get_job_status(self, job_id: Union[str, int]) -> Response:
|
|
86
|
+
"""Get status of job"""
|
|
87
|
+
response = requests.get(
|
|
88
|
+
url=self._job_status_url + "/" + str(job_id),
|
|
89
|
+
headers=self.__headers,
|
|
90
|
+
)
|
|
91
|
+
return response
|
|
92
|
+
|
|
93
|
+
def get_jobs(self) -> Response:
|
|
94
|
+
"""Get status of job"""
|
|
95
|
+
response = requests.get(
|
|
96
|
+
url=self._jobs_url,
|
|
97
|
+
headers=self.__headers,
|
|
98
|
+
)
|
|
99
|
+
return response
|
|
100
|
+
|
|
101
|
+
def submit_job(self, job_def: dict) -> Response:
|
|
102
|
+
"""Submit a job defined by job def"""
|
|
103
|
+
response = requests.post(
|
|
104
|
+
url=self._job_submit_url, json=job_def, headers=self.__headers
|
|
105
|
+
)
|
|
106
|
+
return response
|
|
107
|
+
|
|
108
|
+
def submit_hpc_job(
|
|
109
|
+
self,
|
|
110
|
+
script: str,
|
|
111
|
+
job: Optional[HpcJobSubmitSettings] = None,
|
|
112
|
+
jobs: Optional[List[HpcJobSubmitSettings]] = None,
|
|
113
|
+
) -> Response:
|
|
114
|
+
"""
|
|
115
|
+
Submit a job following the v0.0.36 Slurm rest api job submission guide
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
script : str
|
|
119
|
+
Executable script (full contents) to run in batch step
|
|
120
|
+
job : Optional[HpcJobSubmitSettings]
|
|
121
|
+
v0.0.36_job_properties (Default is None)
|
|
122
|
+
jobs : Optional[List[HpcJobSubmitSettings]]
|
|
123
|
+
List of properties of an HetJob (Default is None)
|
|
124
|
+
|
|
125
|
+
Returns
|
|
126
|
+
-------
|
|
127
|
+
Response
|
|
128
|
+
|
|
129
|
+
"""
|
|
130
|
+
# Assert at least one of job or jobs is defined
|
|
131
|
+
assert job is not None or jobs is not None
|
|
132
|
+
# Assert not both job and jobs are defined
|
|
133
|
+
assert job is None or jobs is None
|
|
134
|
+
if job is not None:
|
|
135
|
+
job_def = {
|
|
136
|
+
"job": json.loads(job.model_dump_json(exclude_none=True)),
|
|
137
|
+
"script": script,
|
|
138
|
+
}
|
|
139
|
+
else:
|
|
140
|
+
job_def = {
|
|
141
|
+
"jobs": [
|
|
142
|
+
json.loads(j.model_dump_json(exclude_none=True))
|
|
143
|
+
for j in jobs
|
|
144
|
+
],
|
|
145
|
+
"script": script,
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
response = requests.post(
|
|
149
|
+
url=self._job_submit_url, json=job_def, headers=self.__headers
|
|
150
|
+
)
|
|
151
|
+
return response
|