seer-pas-sdk 0.1.3__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- seer_pas_sdk/auth/auth.py +23 -1
- seer_pas_sdk/common/__init__.py +370 -72
- seer_pas_sdk/common/errors.py +5 -0
- seer_pas_sdk/common/groupanalysis.py +55 -0
- seer_pas_sdk/core/sdk.py +1858 -374
- seer_pas_sdk/core/unsupported.py +1634 -0
- seer_pas_sdk/objects/__init__.py +2 -0
- seer_pas_sdk/objects/groupanalysis.py +30 -0
- seer_pas_sdk/objects/platemap.py +67 -22
- seer_pas_sdk/objects/volcanoplot.py +290 -0
- seer_pas_sdk-0.3.0.dist-info/METADATA +231 -0
- seer_pas_sdk-0.3.0.dist-info/RECORD +18 -0
- {seer_pas_sdk-0.1.3.dist-info → seer_pas_sdk-0.3.0.dist-info}/WHEEL +1 -1
- {seer_pas_sdk-0.1.3.dist-info → seer_pas_sdk-0.3.0.dist-info}/top_level.txt +0 -1
- seer_pas_sdk-0.1.3.dist-info/METADATA +0 -50
- seer_pas_sdk-0.1.3.dist-info/RECORD +0 -19
- tests/__init__.py +0 -0
- tests/conftest.py +0 -17
- tests/test_auth.py +0 -48
- tests/test_common.py +0 -99
- tests/test_objects.py +0 -91
- tests/test_sdk.py +0 -11
- {seer_pas_sdk-0.1.3.dist-info → seer_pas_sdk-0.3.0.dist-info/licenses}/LICENSE.txt +0 -0
|
@@ -0,0 +1,1634 @@
|
|
|
1
|
+
"""
|
|
2
|
+
seer_pas_sdk.core.unsupported -- in development
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from tqdm import tqdm
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import jwt
|
|
9
|
+
import requests
|
|
10
|
+
import urllib.request
|
|
11
|
+
import ssl
|
|
12
|
+
import shutil
|
|
13
|
+
|
|
14
|
+
from typing import List as _List
|
|
15
|
+
|
|
16
|
+
from ..common import *
|
|
17
|
+
from ..auth import Auth
|
|
18
|
+
from ..objects import PlateMap
|
|
19
|
+
|
|
20
|
+
from .sdk import SeerSDK as _SeerSDK
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class _UnsupportedSDK(_SeerSDK):
|
|
24
|
+
"""
|
|
25
|
+
**************
|
|
26
|
+
[UNEXPOSED MODULE]
|
|
27
|
+
**************
|
|
28
|
+
|
|
29
|
+
This module is currently not supported and should be considered unstable. Use at your own risk.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def _add_sample(self, sample_entry: dict):
|
|
33
|
+
"""
|
|
34
|
+
****************
|
|
35
|
+
[UNEXPOSED METHOD CALL]
|
|
36
|
+
****************
|
|
37
|
+
Add a sample given a plate_id, sample_id, sample_name and space.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
sample_entry: dict
|
|
42
|
+
A dictionary containing all keys and values for the sample entry. These may or may not have been inferred from the sample description file.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
dict
|
|
47
|
+
The response from the backend.
|
|
48
|
+
|
|
49
|
+
Examples
|
|
50
|
+
-------
|
|
51
|
+
>>> from core import SeerSDK
|
|
52
|
+
>>> seer_sdk = SeerSDK()
|
|
53
|
+
>>> seer_sdk._add_sample("YOUR_PLATE_ID_HERE", "YOUR_SAMPLE_ID_HERE", "YOUR_SAMPLE_NAME_HERE")
|
|
54
|
+
>>> {
|
|
55
|
+
"id": "SAMPLE_ID_HERE",
|
|
56
|
+
"tenant_id": "TENANT_ID_HERE",
|
|
57
|
+
"plate_id": "PLATE_ID_HERE",
|
|
58
|
+
"sample_name": "SAMPLE_NAME_HERE",
|
|
59
|
+
"sample_type": "SAMPLE_TYPE_HERE",
|
|
60
|
+
"species": "Human",
|
|
61
|
+
"description": None,
|
|
62
|
+
...
|
|
63
|
+
...
|
|
64
|
+
}
|
|
65
|
+
"""
|
|
66
|
+
for key in ["plateID", "sampleID", "sampleName"]:
|
|
67
|
+
if key not in sample_entry:
|
|
68
|
+
raise ValueError(
|
|
69
|
+
f"{key} is missing. Please check your parameters again."
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
URL = f"{self._auth.url}api/v1/samples"
|
|
73
|
+
|
|
74
|
+
with self._get_auth_session() as s:
|
|
75
|
+
|
|
76
|
+
response = s.post(URL, json=sample_entry)
|
|
77
|
+
|
|
78
|
+
if response.status_code != 200:
|
|
79
|
+
raise ValueError(
|
|
80
|
+
"Invalid request. Please check your parameters."
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
return response.json()
|
|
84
|
+
|
|
85
|
+
# Add samples in batch
|
|
86
|
+
def _add_samples(self, sample_info: list):
|
|
87
|
+
"""
|
|
88
|
+
****************
|
|
89
|
+
[UNEXPOSED METHOD CALL]
|
|
90
|
+
****************
|
|
91
|
+
Add samples in batch given a list of sample entries.
|
|
92
|
+
|
|
93
|
+
Parameters:
|
|
94
|
+
-----------
|
|
95
|
+
sample_info: list
|
|
96
|
+
A list of dictionaries containing all keys and values for the sample entries. These may or may not have been inferred from the sample description file.
|
|
97
|
+
Required keys: ["plateID", "sampleID", "sampleName"]
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
--------
|
|
101
|
+
dict
|
|
102
|
+
The response from the backend.
|
|
103
|
+
"""
|
|
104
|
+
# Validate keys in samples
|
|
105
|
+
for sample in sample_info:
|
|
106
|
+
if not all(
|
|
107
|
+
key in sample for key in ["plateID", "sampleID", "sampleName"]
|
|
108
|
+
):
|
|
109
|
+
raise ValueError(
|
|
110
|
+
f"Invalid sample entry for sample {sample}. Please check your parameters again."
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
URL = f"{self._auth.url}api/v1/samples/batch"
|
|
114
|
+
|
|
115
|
+
with self._get_auth_session() as s:
|
|
116
|
+
response = s.post(URL, json={"samples": sample_info})
|
|
117
|
+
|
|
118
|
+
if response.status_code != 200:
|
|
119
|
+
raise ValueError(
|
|
120
|
+
"Invalid request. Please check your parameters."
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
return response.json()
|
|
124
|
+
|
|
125
|
+
def add_project(
|
|
126
|
+
self,
|
|
127
|
+
project_name: str,
|
|
128
|
+
plate_ids: _List[str],
|
|
129
|
+
description: str = None,
|
|
130
|
+
notes: str = None,
|
|
131
|
+
space: str = None,
|
|
132
|
+
):
|
|
133
|
+
"""
|
|
134
|
+
Creates a new project with a given project_name and a project_ids list.
|
|
135
|
+
|
|
136
|
+
Parameters
|
|
137
|
+
----------
|
|
138
|
+
project_name : str
|
|
139
|
+
Name of the project.
|
|
140
|
+
plate_ids : list[str]
|
|
141
|
+
List of plate ids to be added to the project.
|
|
142
|
+
description : str, optional
|
|
143
|
+
Description of the project.
|
|
144
|
+
notes : str, optional
|
|
145
|
+
Notes for the project.
|
|
146
|
+
space : str, optional
|
|
147
|
+
User group id of the project. Defaults to the user group id of the user who is creating the project (i.e null).
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
res: dict
|
|
152
|
+
A dictionary containing the status of the request if succeeded.
|
|
153
|
+
|
|
154
|
+
Examples
|
|
155
|
+
-------
|
|
156
|
+
>>> from core import SeerSDK
|
|
157
|
+
>>> seer_sdk = SeerSDK()
|
|
158
|
+
>>> seer_sdk.add_project("YOUR_PROJECT_NAME_HERE", ["PLATE_ID_1, PLATE_ID_2"])
|
|
159
|
+
>>> {
|
|
160
|
+
"status": "Project with id = PROJECT_ID started."
|
|
161
|
+
}
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
if not project_name:
|
|
165
|
+
raise ValueError("Project name cannot be empty.")
|
|
166
|
+
|
|
167
|
+
all_plate_ids = set(
|
|
168
|
+
[plate["id"] for plate in self.get_plate_metadata()]
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
for plate_id in plate_ids:
|
|
172
|
+
if plate_id not in all_plate_ids:
|
|
173
|
+
raise ValueError(
|
|
174
|
+
f"Plate ID '{plate_id}' is not valid. Please check your parameters again."
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
URL = f"{self._auth.url}api/v1/projects"
|
|
178
|
+
|
|
179
|
+
with self._get_auth_session() as s:
|
|
180
|
+
|
|
181
|
+
project = s.post(
|
|
182
|
+
URL,
|
|
183
|
+
json={
|
|
184
|
+
"projectName": project_name,
|
|
185
|
+
"plateIDs": plate_ids,
|
|
186
|
+
"notes": notes,
|
|
187
|
+
"description": description,
|
|
188
|
+
"projectUserGroup": space,
|
|
189
|
+
},
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
if project.status_code != 200:
|
|
193
|
+
raise ValueError(
|
|
194
|
+
"Invalid request. Please check your parameters."
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
res = {
|
|
198
|
+
"status": f"Project started with id = {project.json()['id']}"
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
return res
|
|
202
|
+
|
|
203
|
+
def add_samples_to_project(self, samples: _List[str], project_id: str):
|
|
204
|
+
"""
|
|
205
|
+
Add samples to a project given a list of sample ids and a project id.
|
|
206
|
+
|
|
207
|
+
Parameters
|
|
208
|
+
----------
|
|
209
|
+
samples : list[str]
|
|
210
|
+
List of sample ids to be added to the project.
|
|
211
|
+
project_id : str
|
|
212
|
+
ID of the project to which the samples are to be added.
|
|
213
|
+
|
|
214
|
+
Returns
|
|
215
|
+
-------
|
|
216
|
+
res : dict
|
|
217
|
+
A dictionary containing the status of the request if succeeded.
|
|
218
|
+
|
|
219
|
+
Examples
|
|
220
|
+
-------
|
|
221
|
+
>>> from core import SeerSDK
|
|
222
|
+
>>> seer_sdk = SeerSDK()
|
|
223
|
+
>>> seer_sdk.add_samples_to_project(["SAMPLE_ID_1", "SAMPLE_ID_2"], "PROJECT_ID")
|
|
224
|
+
>>> {
|
|
225
|
+
"status": "Samples added to PROJECT_ID"
|
|
226
|
+
}
|
|
227
|
+
"""
|
|
228
|
+
if not project_id:
|
|
229
|
+
raise ValueError("Project ID cannot be empty.")
|
|
230
|
+
|
|
231
|
+
if not samples:
|
|
232
|
+
raise ValueError("Samples cannot be empty.")
|
|
233
|
+
|
|
234
|
+
URL = f"{self._auth.url}api/v1/addSamplesToProject/{project_id}"
|
|
235
|
+
|
|
236
|
+
with self._get_auth_session() as s:
|
|
237
|
+
|
|
238
|
+
response = s.put(
|
|
239
|
+
URL,
|
|
240
|
+
json={
|
|
241
|
+
"sampleIDs": samples,
|
|
242
|
+
},
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
if response.status_code != 200:
|
|
246
|
+
raise ValueError(
|
|
247
|
+
"Invalid request. Please check your parameters."
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
res = {"status": f"Samples added to {project_id}"}
|
|
251
|
+
return res
|
|
252
|
+
|
|
253
|
+
def add_plates_to_project(self, plates: _List[str], project_id: str):
|
|
254
|
+
"""
|
|
255
|
+
Add plates to a project given a list of plate ids and a project id.
|
|
256
|
+
|
|
257
|
+
Parameters
|
|
258
|
+
----------
|
|
259
|
+
plates : list[str]
|
|
260
|
+
List of plate ids to be added to the project.
|
|
261
|
+
project_id : str
|
|
262
|
+
ID of the project to which the plates are to be added.
|
|
263
|
+
|
|
264
|
+
Returns
|
|
265
|
+
-------
|
|
266
|
+
res : dict
|
|
267
|
+
A dictionary containing the status of the request if succeeded.
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
if not project_id:
|
|
271
|
+
raise ValueError("Project ID cannot be empty.")
|
|
272
|
+
|
|
273
|
+
if not plates:
|
|
274
|
+
raise ValueError("Plates cannot be empty.")
|
|
275
|
+
|
|
276
|
+
# get samples
|
|
277
|
+
samples = (
|
|
278
|
+
x["id"]
|
|
279
|
+
for plate_id in plates
|
|
280
|
+
for x in self.get_samples(plate_id=plate_id)
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
return self.add_samples_to_project(
|
|
284
|
+
project_id=project_id, samples=list(samples)
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
def add_plate(
|
|
288
|
+
self,
|
|
289
|
+
ms_data_files: _List[str],
|
|
290
|
+
plate_map_file: str,
|
|
291
|
+
plate_id: str,
|
|
292
|
+
plate_name: str,
|
|
293
|
+
sample_description_file: str = None,
|
|
294
|
+
space: str = None,
|
|
295
|
+
):
|
|
296
|
+
"""
|
|
297
|
+
Add a plate given a list of (existing or new) ms_data_files, plate_map_file, plate_id, plate_name, sample_description_file and space.
|
|
298
|
+
|
|
299
|
+
Parameters
|
|
300
|
+
----------
|
|
301
|
+
ms_data_files : list[str]
|
|
302
|
+
List of ms_data_files.
|
|
303
|
+
plate_map_file : str or `PlateMap` Object
|
|
304
|
+
The plate map file.
|
|
305
|
+
plate_id : str
|
|
306
|
+
The plate ID. Must be unique.
|
|
307
|
+
plate_name : str
|
|
308
|
+
The plate name.
|
|
309
|
+
sample_description_file : str, optional
|
|
310
|
+
The sample description file. Defaults to None.
|
|
311
|
+
space : str, optional
|
|
312
|
+
The space or usergroup. Defaults to the user group id of the user who is creating the plate (i.e None).
|
|
313
|
+
|
|
314
|
+
Returns
|
|
315
|
+
-------
|
|
316
|
+
id_uuid : str
|
|
317
|
+
- The UUID of the plate.
|
|
318
|
+
|
|
319
|
+
Examples
|
|
320
|
+
-------
|
|
321
|
+
>>> from core import SeerSDK
|
|
322
|
+
>>> seer_sdk = SeerSDK()
|
|
323
|
+
>>> seer_sdk.add_plate(["MS_DATA_FILE_1", "MS_DATA_FILE_2"], "PLATE_MAP_FILE", "PLATE_ID", "PLATE_NAME")
|
|
324
|
+
"9d5b6ab0-5a8c-11ef-8110-dd5cb94025eb"
|
|
325
|
+
"""
|
|
326
|
+
|
|
327
|
+
plate_ids = (
|
|
328
|
+
set()
|
|
329
|
+
) # contains all the plate_ids fetched from self.get_plate_metadata()
|
|
330
|
+
files = [] # to be uploaded to sync frontend
|
|
331
|
+
samples = [] # list of all the sample responses from the backend
|
|
332
|
+
id_uuid = "" # uuid for the plate id
|
|
333
|
+
raw_file_paths = {} # list of all the AWS raw file paths
|
|
334
|
+
s3_upload_path = None
|
|
335
|
+
s3_bucket = ""
|
|
336
|
+
dir_exists = (
|
|
337
|
+
True # flag to check if the generated_files directory exists
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
# Step 0: Check if the file paths are valid.
|
|
341
|
+
for file in ms_data_files:
|
|
342
|
+
if not os.path.exists(file):
|
|
343
|
+
raise ValueError(
|
|
344
|
+
f"File path '{file}' is invalid. Please check your parameters."
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
if type(plate_map_file) == str and not os.path.exists(plate_map_file):
|
|
348
|
+
raise ValueError(
|
|
349
|
+
f"File path '{plate_map_file}' is invalid. Please check your parameters."
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
if sample_description_file and not os.path.exists(
|
|
353
|
+
sample_description_file
|
|
354
|
+
):
|
|
355
|
+
raise ValueError(
|
|
356
|
+
f"File path '{sample_description_file}' is invalid. Please check your parameters."
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# Validate plate id, plate name as entity names
|
|
360
|
+
# Enforcing this on the SDK level to prevent the creation of empty records before the backend validation
|
|
361
|
+
if not entity_name_ruler(plate_id):
|
|
362
|
+
raise ValueError("Plate ID contains unsupported characters.")
|
|
363
|
+
|
|
364
|
+
if not entity_name_ruler(plate_name):
|
|
365
|
+
raise ValueError("Plate Name contains unsupported characters.")
|
|
366
|
+
|
|
367
|
+
# Validate plate map
|
|
368
|
+
if isinstance(plate_map_file, PlateMap):
|
|
369
|
+
plate_map_data = plate_map_file.to_df()
|
|
370
|
+
else:
|
|
371
|
+
plate_map_data = pd.read_csv(plate_map_file)
|
|
372
|
+
|
|
373
|
+
local_file_names = [os.path.basename(x) for x in ms_data_files]
|
|
374
|
+
|
|
375
|
+
validate_plate_map(plate_map_data, local_file_names)
|
|
376
|
+
|
|
377
|
+
# Step 1: Check for duplicates in the user-inputted plate id. Populates `plate_ids` set.
|
|
378
|
+
with self._get_auth_session() as s:
|
|
379
|
+
plate_response = s.get(f"{self._auth.url}api/v1/plateids")
|
|
380
|
+
|
|
381
|
+
if plate_response.status_code != 200:
|
|
382
|
+
raise ValueError(
|
|
383
|
+
"Failed to fetch plate ids from the server. Please check your connection and reauthenticate."
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
plate_ids = set(plate_response.json()["data"])
|
|
387
|
+
|
|
388
|
+
if not plate_ids:
|
|
389
|
+
raise ValueError(
|
|
390
|
+
"No plate ids returned from the server. Please reattempt."
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
# Step 2: Fetch the UUID that needs to be passed into the backend from `/api/v1/plates` to fetch the AWS upload config and raw file path. This will sync the plates backend with samples when the user uploads later. This UUID will also be void of duplicates since duplication is handled by the backend.
|
|
394
|
+
|
|
395
|
+
with self._get_auth_session() as s:
|
|
396
|
+
plate_response = s.post(
|
|
397
|
+
f"{self._auth.url}api/v1/plates",
|
|
398
|
+
json={
|
|
399
|
+
"plateId": plate_id,
|
|
400
|
+
"plateName": plate_name,
|
|
401
|
+
"plateUserGroup": space,
|
|
402
|
+
},
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
if plate_response.status_code != 200:
|
|
406
|
+
raise ValueError(
|
|
407
|
+
"Failed to connect to the server. Please check your connection and reauthenticate."
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
id_uuid = plate_response.json()["id"]
|
|
411
|
+
|
|
412
|
+
if not id_uuid:
|
|
413
|
+
raise ValueError(
|
|
414
|
+
"Failed to fetch a UUID from the server. Please check your connection and reauthenticate."
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
# Step 3: Fetch AWS upload config from the backend with the plateId we just generated. Populates `s3_upload_path` and `s3_bucket` global variables.
|
|
418
|
+
with self._get_auth_session() as s:
|
|
419
|
+
config_response = s.post(
|
|
420
|
+
f"{self._auth.url}api/v1/msdatas/getuploadconfig",
|
|
421
|
+
json={"plateId": id_uuid},
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
if (
|
|
425
|
+
config_response.status_code != 200
|
|
426
|
+
or not config_response.json()
|
|
427
|
+
):
|
|
428
|
+
raise ValueError(
|
|
429
|
+
"Failed to fetch AWS upload config for the plate. Please check your connection and reauthenticate."
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
if "s3Bucket" not in config_response.json():
|
|
433
|
+
raise ValueError(
|
|
434
|
+
"Failed to fetch the S3 bucket from AWS. Please check your connection and reauthenticate."
|
|
435
|
+
)
|
|
436
|
+
elif "s3UploadPath" not in config_response.json():
|
|
437
|
+
raise ValueError(
|
|
438
|
+
"Failed to fetch the S3 upload path from AWS. Please check your connection and reauthenticate."
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
s3_bucket = config_response.json()["s3Bucket"]
|
|
442
|
+
s3_upload_path = config_response.json()["s3UploadPath"]
|
|
443
|
+
|
|
444
|
+
with self._get_auth_session() as s:
|
|
445
|
+
config_response = s.get(
|
|
446
|
+
f"{self._auth.url}auth/getawscredential",
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
if (
|
|
450
|
+
config_response.status_code != 200
|
|
451
|
+
or not config_response.json()
|
|
452
|
+
):
|
|
453
|
+
raise ValueError(
|
|
454
|
+
"Failed to fetch credentials. Please check your connection and reauthenticate."
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
if "S3Bucket" not in config_response.json()["credentials"]:
|
|
458
|
+
raise ValueError(
|
|
459
|
+
"Failed to fetch data from AWS. Please check your connection and reauthenticate."
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
credentials = config_response.json()["credentials"]
|
|
463
|
+
|
|
464
|
+
# Step 4: Upload the platemap file to the S3 bucket.
|
|
465
|
+
if isinstance(plate_map_file, PlateMap):
|
|
466
|
+
plate_map_file_name = f"plateMap_{id_uuid}.csv"
|
|
467
|
+
|
|
468
|
+
if not os.path.exists("generated_files"):
|
|
469
|
+
dir_exists = False
|
|
470
|
+
os.makedirs("generated_files")
|
|
471
|
+
|
|
472
|
+
plate_map_file.to_csv(f"generated_files/{plate_map_file_name}")
|
|
473
|
+
plate_map_file = f"generated_files/{plate_map_file_name}"
|
|
474
|
+
|
|
475
|
+
else:
|
|
476
|
+
plate_map_file_name = os.path.basename(plate_map_file)
|
|
477
|
+
|
|
478
|
+
res = upload_file(
|
|
479
|
+
plate_map_file,
|
|
480
|
+
s3_bucket,
|
|
481
|
+
credentials,
|
|
482
|
+
f"{s3_upload_path}{plate_map_file_name}",
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
if not res:
|
|
486
|
+
raise ValueError(
|
|
487
|
+
"Failed to upload plate map to AWS. Please check your connection and reauthenticate."
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
with self._get_auth_session() as s:
|
|
491
|
+
plate_map_response = s.post(
|
|
492
|
+
f"{self._auth.url}api/v1/msdataindex/file",
|
|
493
|
+
json={
|
|
494
|
+
"files": [
|
|
495
|
+
{
|
|
496
|
+
"filePath": f"{s3_upload_path}{plate_map_file_name}",
|
|
497
|
+
"fileSize": os.stat(plate_map_file).st_size,
|
|
498
|
+
"userGroupId": space,
|
|
499
|
+
}
|
|
500
|
+
]
|
|
501
|
+
},
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
if (
|
|
505
|
+
plate_map_response.status_code != 200
|
|
506
|
+
or not plate_map_response.json()
|
|
507
|
+
or "created" not in plate_map_response.json()
|
|
508
|
+
):
|
|
509
|
+
raise ValueError(
|
|
510
|
+
"Failed to upload raw files to PAS. Please check your connection and reauthenticate."
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
# Step 5: Populate `raw_file_paths` for sample upload.
|
|
514
|
+
for file in ms_data_files:
|
|
515
|
+
filename = os.path.basename(file)
|
|
516
|
+
filesize = os.stat(file).st_size
|
|
517
|
+
raw_file_paths[f"{filename}"] = (
|
|
518
|
+
f"/{s3_bucket}/{s3_upload_path}{filename}"
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
sample_info = get_sample_info(
|
|
522
|
+
id_uuid,
|
|
523
|
+
plate_map_file,
|
|
524
|
+
space,
|
|
525
|
+
sample_description_file,
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
# Step 6: Get sample info from the plate map file and make a call to `/api/v1/samples` with the sample_info. This returns the plateId, sampleId and sampleName for each sample in the plate map file. Also validate and upload the sample_description_file if it exists.
|
|
529
|
+
if sample_description_file:
|
|
530
|
+
|
|
531
|
+
sdf_upload = upload_file(
|
|
532
|
+
sample_description_file,
|
|
533
|
+
s3_bucket,
|
|
534
|
+
credentials,
|
|
535
|
+
f"{s3_upload_path}{os.path.basename(sample_description_file)}",
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
if not sdf_upload:
|
|
539
|
+
raise ValueError(
|
|
540
|
+
"Failed to upload sample description file to AWS. Please check your connection and reauthenticate."
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
with self._get_auth_session() as s:
|
|
544
|
+
sdf_response = s.post(
|
|
545
|
+
f"{self._auth.url}api/v1/msdataindex/file",
|
|
546
|
+
json={
|
|
547
|
+
"files": [
|
|
548
|
+
{
|
|
549
|
+
"filePath": f"{s3_upload_path}{os.path.basename(sample_description_file)}",
|
|
550
|
+
"fileSize": os.stat(
|
|
551
|
+
sample_description_file
|
|
552
|
+
).st_size,
|
|
553
|
+
"userGroupId": space,
|
|
554
|
+
}
|
|
555
|
+
]
|
|
556
|
+
},
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
if (
|
|
560
|
+
sdf_response.status_code != 200
|
|
561
|
+
or not sdf_response.json()
|
|
562
|
+
or "created" not in sdf_response.json()
|
|
563
|
+
):
|
|
564
|
+
raise ValueError(
|
|
565
|
+
"Failed to upload sample description file to PAS DB. Please check your connection and reauthenticate."
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
samples = self._add_samples(sample_info)
|
|
569
|
+
|
|
570
|
+
# Step 7: Parse the plate map file and convert the data into a form that can be POSTed to `/api/v1/msdatas`.
|
|
571
|
+
plate_map_data = parse_plate_map_file(
|
|
572
|
+
plate_map_file, samples, raw_file_paths, space
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
# Step 8: Make a request to `/api/v1/msdatas/batch` with the processed samples data.
|
|
576
|
+
with self._get_auth_session() as s:
|
|
577
|
+
ms_data_response = s.post(
|
|
578
|
+
f"{self._auth.url}api/v1/msdatas/batch",
|
|
579
|
+
json={"msdatas": plate_map_data},
|
|
580
|
+
)
|
|
581
|
+
if ms_data_response.status_code != 200:
|
|
582
|
+
raise ValueError(
|
|
583
|
+
"Failed to create samples in PAS. Please check your connection and reauthenticate."
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
# Step 9: Upload each msdata file to the S3 bucket.
|
|
587
|
+
with self._get_auth_session() as s:
|
|
588
|
+
config_response = s.get(
|
|
589
|
+
f"{self._auth.url}auth/getawscredential",
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
if (
|
|
593
|
+
config_response.status_code != 200
|
|
594
|
+
or not config_response.json()
|
|
595
|
+
):
|
|
596
|
+
raise ValueError("Could not fetch config for user.")
|
|
597
|
+
|
|
598
|
+
if "S3Bucket" not in config_response.json()["credentials"]:
|
|
599
|
+
raise ValueError(
|
|
600
|
+
"Failed to connect to AWS. Please check your connection and reauthenticate."
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
credentials = config_response.json()["credentials"]
|
|
604
|
+
|
|
605
|
+
os.environ["AWS_ACCESS_KEY_ID"] = credentials["AccessKeyId"]
|
|
606
|
+
os.environ["AWS_SECRET_ACCESS_KEY"] = credentials[
|
|
607
|
+
"SecretAccessKey"
|
|
608
|
+
]
|
|
609
|
+
os.environ["AWS_SESSION_TOKEN"] = credentials["SessionToken"]
|
|
610
|
+
|
|
611
|
+
for file in ms_data_files:
|
|
612
|
+
filename = os.path.basename(file)
|
|
613
|
+
filesize = os.stat(file).st_size
|
|
614
|
+
res = upload_file(
|
|
615
|
+
file, s3_bucket, credentials, f"{s3_upload_path}{filename}"
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
if not res:
|
|
619
|
+
raise ValueError(
|
|
620
|
+
"Failed to upload MS data files to AWS. Please check your connection and reauthenticate."
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
files.append(
|
|
624
|
+
{
|
|
625
|
+
"filePath": f"{s3_upload_path}{filename}",
|
|
626
|
+
"fileSize": filesize,
|
|
627
|
+
"userGroupId": space,
|
|
628
|
+
}
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
# Step 10: Make a call to `api/v1/msdataindex/file` to sync with frontend. This should only be done after all files have finished uploading, simulating an async "promise"-like scenario in JavaScript.
|
|
632
|
+
with self._get_auth_session() as s:
|
|
633
|
+
file_response = s.post(
|
|
634
|
+
f"{self._auth.url}api/v1/msdataindex/file",
|
|
635
|
+
json={"files": files},
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
if (
|
|
639
|
+
file_response.status_code != 200
|
|
640
|
+
or not file_response.json()
|
|
641
|
+
or "created" not in file_response.json()
|
|
642
|
+
):
|
|
643
|
+
raise ValueError(
|
|
644
|
+
"Failed to update PAS MS Files view. Your data has been uploaded."
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
if os.path.exists("generated_files") and not dir_exists:
|
|
648
|
+
shutil.rmtree("generated_files")
|
|
649
|
+
|
|
650
|
+
print(f"Plate generated with id: '{id_uuid}'")
|
|
651
|
+
return id_uuid
|
|
652
|
+
|
|
653
|
+
def start_analysis(
|
|
654
|
+
self,
|
|
655
|
+
name: str,
|
|
656
|
+
project_id: str,
|
|
657
|
+
sample_ids: list = None,
|
|
658
|
+
analysis_protocol_name: str = None,
|
|
659
|
+
analysis_protocol_id: str = None,
|
|
660
|
+
notes: str = "",
|
|
661
|
+
description: str = "",
|
|
662
|
+
space: str = None,
|
|
663
|
+
filter: str = None,
|
|
664
|
+
):
|
|
665
|
+
"""
|
|
666
|
+
Given a name, analysis_protocol_id, project_id, creates a new analysis for the authenticated user.
|
|
667
|
+
|
|
668
|
+
Parameters
|
|
669
|
+
----------
|
|
670
|
+
name : str
|
|
671
|
+
Name of the analysis.
|
|
672
|
+
|
|
673
|
+
project_id : str
|
|
674
|
+
ID of the project to which the analysis belongs. Can be fetched using the get_project_metadata() function.
|
|
675
|
+
|
|
676
|
+
sample_ids: list[str], optional
|
|
677
|
+
List of sample IDs to be used for the analysis. Should be omitted if analysis is to be run with all samples.
|
|
678
|
+
|
|
679
|
+
analysis_protocol_name : str, optional
|
|
680
|
+
Name of the analysis protocol to be used for the analysis. Can be fetched using the get_analysis_protocols() function. Should be omitted if analysis_protocol_id is provided.
|
|
681
|
+
|
|
682
|
+
analysis_protocol_id : str, optional
|
|
683
|
+
ID of the analysis protocol to be used for the analysis. Can be fetched using the get_analysis_protocols() function. Should be omitted if analysis_protocol_name is provided.
|
|
684
|
+
|
|
685
|
+
notes : str, optional
|
|
686
|
+
Notes for the analysis, defaulted to an empty string.
|
|
687
|
+
|
|
688
|
+
description : str, optional
|
|
689
|
+
Description for the analysis, defaulted to an empty string.
|
|
690
|
+
|
|
691
|
+
space : str, optional
|
|
692
|
+
ID of the user group to which the analysis belongs, defaulted to None.
|
|
693
|
+
|
|
694
|
+
filter : str, optional
|
|
695
|
+
Filter to be applied to the samples, defaulted to None. Acceptable values are 'sample', 'control', or None.
|
|
696
|
+
|
|
697
|
+
Returns
|
|
698
|
+
-------
|
|
699
|
+
dict
|
|
700
|
+
Contains message whether the analysis was started or not.
|
|
701
|
+
|
|
702
|
+
Examples
|
|
703
|
+
-------
|
|
704
|
+
>>> from core import SeerSDK
|
|
705
|
+
>>> seer_sdk = SeerSDK()
|
|
706
|
+
>>> seer_sdk.start_analysis("YOUR_ANALYSIS_NAME_HERE", "YOUR_PROJECT_ID_HERE", "YOUR_ANALYSIS_PROTOCOL_ID_HERE")
|
|
707
|
+
>>> { "message": "Analysis has been started successfully" }
|
|
708
|
+
"""
|
|
709
|
+
|
|
710
|
+
if not name:
|
|
711
|
+
raise ValueError("Analysis name cannot be empty.")
|
|
712
|
+
|
|
713
|
+
if not project_id:
|
|
714
|
+
raise ValueError("Project ID cannot be empty.")
|
|
715
|
+
|
|
716
|
+
if not analysis_protocol_id and analysis_protocol_name:
|
|
717
|
+
valid_analysis_protocol = self.get_analysis_protocols(
|
|
718
|
+
analysis_protocol_name=analysis_protocol_name
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
if not valid_analysis_protocol:
|
|
722
|
+
raise ValueError(
|
|
723
|
+
f"Analysis protocol not found with name {analysis_protocol_name}."
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
analysis_protocol_id = valid_analysis_protocol[0]["id"]
|
|
727
|
+
|
|
728
|
+
if analysis_protocol_id and not analysis_protocol_name:
|
|
729
|
+
valid_analysis_protocol = self.get_analysis_protocols(
|
|
730
|
+
analysis_protocol_id=analysis_protocol_id
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
if not valid_analysis_protocol:
|
|
734
|
+
raise ValueError(
|
|
735
|
+
f"Analysis protocol not found with ID {analysis_protocol_id}."
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
if not analysis_protocol_id and not analysis_protocol_name:
|
|
739
|
+
raise ValueError(
|
|
740
|
+
"You must specify either analysis protocol ID or analysis protocol name."
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
if sample_ids:
|
|
744
|
+
valid_ids = [
|
|
745
|
+
entry["id"]
|
|
746
|
+
for entry in self.get_samples(project_id=project_id)
|
|
747
|
+
]
|
|
748
|
+
|
|
749
|
+
for sample_id in sample_ids:
|
|
750
|
+
if sample_id not in valid_ids:
|
|
751
|
+
raise ValueError(
|
|
752
|
+
f"Sample ID '{sample_id}' is either not valid or not associated with the project."
|
|
753
|
+
)
|
|
754
|
+
if filter:
|
|
755
|
+
sample_ids = self._filter_samples_metadata(
|
|
756
|
+
project_id, filter, sample_ids
|
|
757
|
+
)
|
|
758
|
+
|
|
759
|
+
URL = f"{self._auth.url}api/v1/analyze"
|
|
760
|
+
|
|
761
|
+
with self._get_auth_session() as s:
|
|
762
|
+
req_payload = {
|
|
763
|
+
"analysisName": name,
|
|
764
|
+
"analysisProtocolId": analysis_protocol_id,
|
|
765
|
+
"projectId": project_id,
|
|
766
|
+
"notes": notes,
|
|
767
|
+
"description": description,
|
|
768
|
+
"userGroupId": space,
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
if sample_ids:
|
|
772
|
+
sample_ids = ",".join(sample_ids)
|
|
773
|
+
req_payload["selectedSampleIDs"] = sample_ids
|
|
774
|
+
|
|
775
|
+
analysis = s.post(URL, json=req_payload)
|
|
776
|
+
|
|
777
|
+
if analysis.status_code != 200:
|
|
778
|
+
raise ValueError(
|
|
779
|
+
"Failed to start analysis. Please check your connection."
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
# Analysis id is not contained in response.
|
|
783
|
+
return analysis.json()
|
|
784
|
+
|
|
785
|
+
def upload_ms_data_files(
|
|
786
|
+
self,
|
|
787
|
+
ms_data_files: list,
|
|
788
|
+
path: str,
|
|
789
|
+
space: str = None,
|
|
790
|
+
filenames=[],
|
|
791
|
+
):
|
|
792
|
+
"""
|
|
793
|
+
Upload MS data files to the backend.
|
|
794
|
+
|
|
795
|
+
Parameters
|
|
796
|
+
----------
|
|
797
|
+
ms_data_files : List
|
|
798
|
+
List of MS data files to be uploaded.
|
|
799
|
+
path : str
|
|
800
|
+
The name of the destination folder in PAS. Does not accept leading, trailing or consecutive forward slashes. Example: "path/to/pas/folder".
|
|
801
|
+
space: str, optional
|
|
802
|
+
ID of the user group to which the files belongs, defaulted to None.
|
|
803
|
+
filenames: list, optional
|
|
804
|
+
List of preferred PAS filenames. This rename occurs on both the cloud and the user interface level. Indexes should be mapped to the correlating source file in ms_data_files. Folder paths are not accepted. Defaulted to [].
|
|
805
|
+
|
|
806
|
+
Returns
|
|
807
|
+
-------
|
|
808
|
+
dict
|
|
809
|
+
Contains message whether the files were uploaded or not.
|
|
810
|
+
|
|
811
|
+
Examples
|
|
812
|
+
-------
|
|
813
|
+
>>> from core import SeerSDK
|
|
814
|
+
>>> seer_sdk = SeerSDK()
|
|
815
|
+
>>> seer_sdk.upload_ms_data_files(["/path/to/file1", "/path/to/file2"])
|
|
816
|
+
[{'filePath': '/path/to/file1', 'fileSize': 1234, 'userGroupId': None}, {'filePath': '/path/to/file2', 'fileSize': 1234, 'userGroupId': None}]
|
|
817
|
+
|
|
818
|
+
>>> seer_sdk.upload_ms_data_files(["/path/to/file1", "/path/to/file2"], path="path/to/pas/folder")
|
|
819
|
+
[{'filePath': 'path/to/pas/folder/file1', 'fileSize': 1234, 'userGroupId': None}, {'filePath': 'path/to/pas/folder/file2', 'fileSize': 1234, 'userGroupId': None}]
|
|
820
|
+
|
|
821
|
+
>>> seer_sdk.upload_ms_data_files(["/path/to/file1", "/path/to/file2"], path="path/to/pas/folder", space="user_group_id")
|
|
822
|
+
[{'filePath': 'path/to/pas/folder/file1', 'fileSize': 1234, 'userGroupId': 'user_group_id'}, {'filePath': 'path/to/pas/folder/file2', 'fileSize': 1234, 'userGroupId': 'user_group_id'}]
|
|
823
|
+
|
|
824
|
+
>>> seer_sdk.upload_ms_data_files(["/path/to/file1", "/path/to/file2"], path="path/to/pas/folder", space="user_group_id", filenames=["fileA", "fileB"])
|
|
825
|
+
[{'filePath': 'path/to/pas/folder/fileA', 'fileSize': 1234, 'userGroupId': 'user_group_id'}, {'filePath': 'path/to/pas/folder/fileB', 'fileSize': 1234, 'userGroupId': 'user_group_id'}]
|
|
826
|
+
|
|
827
|
+
"""
|
|
828
|
+
|
|
829
|
+
files = []
|
|
830
|
+
tenant_id = self._auth.active_tenant_id
|
|
831
|
+
s3_bucket = ""
|
|
832
|
+
|
|
833
|
+
if not path:
|
|
834
|
+
raise ValueError(
|
|
835
|
+
"A folder path is required to upload files into PAS."
|
|
836
|
+
)
|
|
837
|
+
|
|
838
|
+
# Step 1: Check if paths and file extensions are valid.
|
|
839
|
+
for file in ms_data_files:
|
|
840
|
+
if not valid_ms_data_file(file):
|
|
841
|
+
raise ValueError(
|
|
842
|
+
"Invalid file or file format. Please check your file."
|
|
843
|
+
)
|
|
844
|
+
|
|
845
|
+
extensions = set(
|
|
846
|
+
[os.path.splitext(file.lower())[1] for file in ms_data_files]
|
|
847
|
+
)
|
|
848
|
+
|
|
849
|
+
if filenames and ".d.zip" in extensions:
|
|
850
|
+
raise ValueError(
|
|
851
|
+
"Please leave the 'filenames' parameter empty when working with .d.zip files. SeerSDK.rename_d_zip_file() is available for this use case."
|
|
852
|
+
)
|
|
853
|
+
# Step 2: Use active tenant to fetch the tenant_id.
|
|
854
|
+
tenant_id = self.get_active_tenant_id()
|
|
855
|
+
|
|
856
|
+
# Step 3: Fetch the S3 bucket name by making a call to `/api/v1/auth/getawscredential`
|
|
857
|
+
with self._get_auth_session() as s:
|
|
858
|
+
config_response = s.get(
|
|
859
|
+
f"{self._auth.url}auth/getawscredential",
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
if (
|
|
863
|
+
config_response.status_code != 200
|
|
864
|
+
or not config_response.json()
|
|
865
|
+
):
|
|
866
|
+
raise ValueError("Could not fetch config for user.")
|
|
867
|
+
|
|
868
|
+
if "S3Bucket" not in config_response.json()["credentials"]:
|
|
869
|
+
raise ValueError(
|
|
870
|
+
"Could not fetch config for user - incomplete response."
|
|
871
|
+
)
|
|
872
|
+
|
|
873
|
+
s3_bucket = config_response.json()["credentials"]["S3Bucket"]
|
|
874
|
+
|
|
875
|
+
credentials = config_response.json()["credentials"]
|
|
876
|
+
|
|
877
|
+
# Step 4: Upload each msdata file to the S3 bucket.
|
|
878
|
+
for i, file in enumerate(ms_data_files):
|
|
879
|
+
filename = (
|
|
880
|
+
filenames[i]
|
|
881
|
+
if filenames
|
|
882
|
+
else os.path.basename(file).replace("/", "")
|
|
883
|
+
)
|
|
884
|
+
filesize = os.stat(file).st_size
|
|
885
|
+
s3_upload_path = (
|
|
886
|
+
f"{tenant_id}" if not path else f"{tenant_id}/{path}"
|
|
887
|
+
)
|
|
888
|
+
|
|
889
|
+
res = upload_file(
|
|
890
|
+
file, s3_bucket, credentials, f"{s3_upload_path}/{filename}"
|
|
891
|
+
)
|
|
892
|
+
|
|
893
|
+
if not res:
|
|
894
|
+
raise ServerError(
|
|
895
|
+
f"Failed to upload to cloud storage. {filename}"
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
files.append(
|
|
899
|
+
{
|
|
900
|
+
"filePath": f"{s3_upload_path}/{filename}",
|
|
901
|
+
"fileSize": filesize,
|
|
902
|
+
"userGroupId": space,
|
|
903
|
+
}
|
|
904
|
+
)
|
|
905
|
+
|
|
906
|
+
# Step 5: Make a call to `/api/v1/msdataindex/file` to sync with frontend. This should only be done after all files have finished uploading, simulating an async "promise"-like scenario in JavaScript.
|
|
907
|
+
result_files = None
|
|
908
|
+
with self._get_auth_session() as s:
|
|
909
|
+
file_response = s.post(
|
|
910
|
+
f"{self._auth.url}api/v1/msdataindex/file",
|
|
911
|
+
json={"files": files},
|
|
912
|
+
)
|
|
913
|
+
|
|
914
|
+
if (
|
|
915
|
+
file_response.status_code != 200
|
|
916
|
+
or not file_response.json()
|
|
917
|
+
or "created" not in file_response.json()
|
|
918
|
+
):
|
|
919
|
+
raise ServerError("Could not upload MS Files to PAS.")
|
|
920
|
+
result_files = file_response.json()["files"]
|
|
921
|
+
|
|
922
|
+
# omit tenant_id from return file path
|
|
923
|
+
for result in result_files:
|
|
924
|
+
result["filePath"] = "/".join(result["filePath"].split("/")[1:])
|
|
925
|
+
|
|
926
|
+
print(
|
|
927
|
+
f"Files uploaded successfully to {self.get_active_tenant_name()}."
|
|
928
|
+
)
|
|
929
|
+
|
|
930
|
+
return result_files
|
|
931
|
+
|
|
932
|
+
def _move_ms_data_files(
|
|
933
|
+
self,
|
|
934
|
+
source_data_files: _List,
|
|
935
|
+
target_data_files: _List,
|
|
936
|
+
target_space: str = None,
|
|
937
|
+
):
|
|
938
|
+
"""
|
|
939
|
+
Move MS data files from one location to another.
|
|
940
|
+
|
|
941
|
+
Parameters
|
|
942
|
+
----------
|
|
943
|
+
source_data_files : List
|
|
944
|
+
List of MS data files to be moved.
|
|
945
|
+
target_data_files : List
|
|
946
|
+
List of target MS data files.
|
|
947
|
+
target_space : str, optional
|
|
948
|
+
Name of the user group to move the files to.
|
|
949
|
+
If None is provided, the files will remain in the same space prior to the move action.
|
|
950
|
+
|
|
951
|
+
Returns
|
|
952
|
+
-------
|
|
953
|
+
list
|
|
954
|
+
The list of files moved.
|
|
955
|
+
|
|
956
|
+
Examples
|
|
957
|
+
-------
|
|
958
|
+
>>> from core import SeerSDK
|
|
959
|
+
>>> seer_sdk = SeerSDK()
|
|
960
|
+
>>> seer_sdk.move_ms_data_files(["/path/to/file1", "/path/to/file2"], ["/path/to/target_file1", "/path/to/target_file2"])
|
|
961
|
+
["/path/to/target_file1", "/path/to/target_file2"]
|
|
962
|
+
"""
|
|
963
|
+
|
|
964
|
+
tenant_id = self._auth.active_tenant_id
|
|
965
|
+
|
|
966
|
+
if not source_data_files:
|
|
967
|
+
raise ValueError("Source data files cannot be empty.")
|
|
968
|
+
|
|
969
|
+
if len(source_data_files) != len(target_data_files):
|
|
970
|
+
raise ValueError(
|
|
971
|
+
"Source and target files should have the same number of files."
|
|
972
|
+
)
|
|
973
|
+
|
|
974
|
+
folder_paths = list({os.path.dirname(x) for x in source_data_files})
|
|
975
|
+
if len(folder_paths) > 1:
|
|
976
|
+
raise ValueError(
|
|
977
|
+
"Files can only be moved from one folder path at a time."
|
|
978
|
+
)
|
|
979
|
+
folder_path = f"{tenant_id}/{folder_paths[0]}"
|
|
980
|
+
|
|
981
|
+
target_folder_paths = list(
|
|
982
|
+
{os.path.dirname(x) for x in target_data_files}
|
|
983
|
+
)
|
|
984
|
+
if len(target_folder_paths) > 1:
|
|
985
|
+
raise ValueError(
|
|
986
|
+
"Files can only be moved to one folder path at a time."
|
|
987
|
+
)
|
|
988
|
+
|
|
989
|
+
available_spaces = self.get_spaces()
|
|
990
|
+
target_space_id = None
|
|
991
|
+
if target_space:
|
|
992
|
+
target_spaces = [
|
|
993
|
+
x["id"]
|
|
994
|
+
for x in available_spaces
|
|
995
|
+
if x["usergroup_name"].lower() == target_space.lower()
|
|
996
|
+
]
|
|
997
|
+
if not target_spaces:
|
|
998
|
+
raise ValueError(
|
|
999
|
+
f"Target space not found with name {target_space}. Please correct this value."
|
|
1000
|
+
)
|
|
1001
|
+
target_space_id = target_spaces[0]
|
|
1002
|
+
|
|
1003
|
+
target_folder_path = f"{tenant_id}/{target_folder_paths[0]}"
|
|
1004
|
+
# Retrieve msdatafileindex metadata to determine source space
|
|
1005
|
+
base_space = None
|
|
1006
|
+
with self._get_auth_session() as s:
|
|
1007
|
+
URL = self._auth.url + "api/v1/msdataindex/getmetadata"
|
|
1008
|
+
params = {"folderKey": folder_path}
|
|
1009
|
+
r = s.get(URL, params=params)
|
|
1010
|
+
if r.status_code != 200:
|
|
1011
|
+
raise ValueError("Failed to locate source files in PAS.")
|
|
1012
|
+
data = r.json()["files"]
|
|
1013
|
+
found_files = [
|
|
1014
|
+
x
|
|
1015
|
+
for x in data
|
|
1016
|
+
if x["filename"]
|
|
1017
|
+
in [os.path.basename(x) for x in source_data_files]
|
|
1018
|
+
]
|
|
1019
|
+
if len(found_files) != len(source_data_files):
|
|
1020
|
+
raise ValueError(
|
|
1021
|
+
"Not all source files were found in the source folder."
|
|
1022
|
+
)
|
|
1023
|
+
spaces = list({x["userGroupId"] for x in found_files})
|
|
1024
|
+
if len(spaces) > 1:
|
|
1025
|
+
raise ValueError(
|
|
1026
|
+
"Files are located in multiple spaces. Please separate these into multiple move requests."
|
|
1027
|
+
)
|
|
1028
|
+
base_space = spaces[0]
|
|
1029
|
+
|
|
1030
|
+
if not target_space:
|
|
1031
|
+
target_space_id = base_space
|
|
1032
|
+
|
|
1033
|
+
json = {
|
|
1034
|
+
"type": "file",
|
|
1035
|
+
"sourceFolder": folder_path,
|
|
1036
|
+
"targetFolder": target_folder_path,
|
|
1037
|
+
"sourceFiles": [os.path.basename(x) for x in source_data_files],
|
|
1038
|
+
"targetFiles": [os.path.basename(x) for x in target_data_files],
|
|
1039
|
+
}
|
|
1040
|
+
|
|
1041
|
+
# we must specify base_space if not General because it's a criteria for finding source files.
|
|
1042
|
+
if base_space:
|
|
1043
|
+
json["sourceUserGroupId"] = base_space
|
|
1044
|
+
|
|
1045
|
+
# If target space is General, we still omit it
|
|
1046
|
+
if target_space_id and base_space != target_space_id:
|
|
1047
|
+
json["targetUserGroupId"] = target_space_id
|
|
1048
|
+
|
|
1049
|
+
with self._get_auth_session() as s:
|
|
1050
|
+
URL = self._auth.url + "api/v1/msdataindex/move"
|
|
1051
|
+
json = json
|
|
1052
|
+
r = s.post(URL, json=json)
|
|
1053
|
+
if r.status_code != 200:
|
|
1054
|
+
raise ServerError("Failed to move files in PAS.")
|
|
1055
|
+
return target_data_files
|
|
1056
|
+
|
|
1057
|
+
def change_ms_file_space(
|
|
1058
|
+
self, ms_data_files: _List, destination_space: str
|
|
1059
|
+
):
|
|
1060
|
+
"""
|
|
1061
|
+
Change the space of MS data files.
|
|
1062
|
+
|
|
1063
|
+
Parameters
|
|
1064
|
+
----------
|
|
1065
|
+
ms_data_files : List
|
|
1066
|
+
List of MS data files to be moved.
|
|
1067
|
+
destination_space : str
|
|
1068
|
+
name of the desired user group
|
|
1069
|
+
|
|
1070
|
+
Returns
|
|
1071
|
+
-------
|
|
1072
|
+
List
|
|
1073
|
+
List of files that were converted to the new space.
|
|
1074
|
+
"""
|
|
1075
|
+
return self._move_ms_data_files(
|
|
1076
|
+
ms_data_files, ms_data_files, destination_space
|
|
1077
|
+
)
|
|
1078
|
+
|
|
1079
|
+
def move_ms_data_files(
|
|
1080
|
+
self, source_ms_data_files: _List, target_ms_data_files: _List
|
|
1081
|
+
):
|
|
1082
|
+
"""
|
|
1083
|
+
Move MS data files from one PAS file location to another. Space will be unchanged.
|
|
1084
|
+
|
|
1085
|
+
Parameters
|
|
1086
|
+
----------
|
|
1087
|
+
source_ms_data_files : List
|
|
1088
|
+
List of file paths of the MS data files to be moved.
|
|
1089
|
+
target_ms_data_files : List
|
|
1090
|
+
List of destination file paths. Should be indexed one to one with the source ms data files list.
|
|
1091
|
+
|
|
1092
|
+
Returns
|
|
1093
|
+
-------
|
|
1094
|
+
List
|
|
1095
|
+
List of files that were moved.
|
|
1096
|
+
"""
|
|
1097
|
+
return self._move_ms_data_files(
|
|
1098
|
+
source_ms_data_files, target_ms_data_files
|
|
1099
|
+
)
|
|
1100
|
+
|
|
1101
|
+
def download_analysis_files(
|
|
1102
|
+
self, analysis_id: str, download_path: str = "", file_name: str = ""
|
|
1103
|
+
):
|
|
1104
|
+
"""
|
|
1105
|
+
Download a specific analysis file from the backend given an `analysis_id` to the specified `download_path`.
|
|
1106
|
+
|
|
1107
|
+
If no `download_path` is specified, the file will be downloaded to the current working directory.
|
|
1108
|
+
|
|
1109
|
+
If no `file_name` is specified, all files for the analysis will be downloaded.
|
|
1110
|
+
|
|
1111
|
+
Parameters
|
|
1112
|
+
----------
|
|
1113
|
+
analysis_id : str
|
|
1114
|
+
ID of the analysis to download.
|
|
1115
|
+
download_path : str, optional
|
|
1116
|
+
Path to download the analysis file to, defaulted to current working directory.
|
|
1117
|
+
file_name : str, optional
|
|
1118
|
+
Name of the analysis file to download, defaulted to None.
|
|
1119
|
+
|
|
1120
|
+
Returns
|
|
1121
|
+
-------
|
|
1122
|
+
dict
|
|
1123
|
+
Message containing whether the file was downloaded or not.
|
|
1124
|
+
|
|
1125
|
+
Examples
|
|
1126
|
+
-------
|
|
1127
|
+
>>> from core import SeerSDK
|
|
1128
|
+
>>> sdk = SeerSDK()
|
|
1129
|
+
>>> sdk.download_analysis_files("analysis_id", "/path/to/download")
|
|
1130
|
+
>>> Downloading EXP22006_2022ms0031bX25_B_BA4_1_4768/diann.log
|
|
1131
|
+
Finished downloading EXP22006_2022ms0031bX25_B_BA4_1_4768/diann.log
|
|
1132
|
+
|
|
1133
|
+
Downloading EXP20004_2020ms0007X11_A.mzML.quant
|
|
1134
|
+
Finished downloading EXP20004_2020ms0007X11_A.mzML.quant
|
|
1135
|
+
|
|
1136
|
+
Downloading EXP20004_2020ms0007X11_A/0714-diann181-libfree-mbr.json
|
|
1137
|
+
Finished downloading EXP20004_2020ms0007X11_A/0714-diann181-libfree-mbr.json
|
|
1138
|
+
|
|
1139
|
+
Downloading EXP20004_2020ms0007X11_A/diann.log
|
|
1140
|
+
Finished downloading EXP20004_2020ms0007X11_A/diann.log
|
|
1141
|
+
>>> { "message": "File downloaded successfully." }
|
|
1142
|
+
"""
|
|
1143
|
+
|
|
1144
|
+
def get_url(analysis_id, file_name, project_id):
|
|
1145
|
+
URL = f"{self._auth.url}api/v1/analysisResultFiles/getUrl"
|
|
1146
|
+
|
|
1147
|
+
with self._get_auth_session() as s:
|
|
1148
|
+
|
|
1149
|
+
download_url = s.post(
|
|
1150
|
+
URL,
|
|
1151
|
+
json={
|
|
1152
|
+
"analysisId": analysis_id,
|
|
1153
|
+
"filename": file_name,
|
|
1154
|
+
"projectId": project_id,
|
|
1155
|
+
},
|
|
1156
|
+
)
|
|
1157
|
+
|
|
1158
|
+
if download_url.status_code != 200:
|
|
1159
|
+
raise ValueError(
|
|
1160
|
+
"Could not download file. Please check if the analysis ID is valid or the backend is running."
|
|
1161
|
+
)
|
|
1162
|
+
|
|
1163
|
+
return download_url.json()["url"]
|
|
1164
|
+
|
|
1165
|
+
if not analysis_id:
|
|
1166
|
+
raise ValueError("Analysis ID cannot be empty.")
|
|
1167
|
+
|
|
1168
|
+
try:
|
|
1169
|
+
valid_analysis = self.get_analyses(analysis_id)[0]
|
|
1170
|
+
except:
|
|
1171
|
+
raise ValueError(
|
|
1172
|
+
"Invalid analysis ID. Please check if the analysis ID is valid or the backend is running."
|
|
1173
|
+
)
|
|
1174
|
+
|
|
1175
|
+
project_id = valid_analysis["project_id"]
|
|
1176
|
+
|
|
1177
|
+
if not download_path:
|
|
1178
|
+
download_path = os.getcwd()
|
|
1179
|
+
print(f"\nDownload path not specified.\n")
|
|
1180
|
+
|
|
1181
|
+
if not os.path.isdir(download_path):
|
|
1182
|
+
print(
|
|
1183
|
+
f'\nThe path "{download_path}" you specified does not exist, was either invalid or not absolute.\n'
|
|
1184
|
+
)
|
|
1185
|
+
download_path = os.getcwd()
|
|
1186
|
+
|
|
1187
|
+
name = f"{download_path}/downloads/{analysis_id}"
|
|
1188
|
+
|
|
1189
|
+
if not os.path.exists(name):
|
|
1190
|
+
os.makedirs(name)
|
|
1191
|
+
|
|
1192
|
+
URL = f"{self._auth.url}api/v1/analysisResultFiles"
|
|
1193
|
+
|
|
1194
|
+
with self._get_auth_session() as s:
|
|
1195
|
+
|
|
1196
|
+
analysis_files = s.get(f"{URL}/{analysis_id}")
|
|
1197
|
+
|
|
1198
|
+
if analysis_files.status_code != 200:
|
|
1199
|
+
raise ValueError(
|
|
1200
|
+
"Invalid request. Please check if the analysis ID is valid or the backend is running."
|
|
1201
|
+
)
|
|
1202
|
+
|
|
1203
|
+
res = analysis_files.json()
|
|
1204
|
+
|
|
1205
|
+
if file_name:
|
|
1206
|
+
filenames = set([file["filename"] for file in res])
|
|
1207
|
+
|
|
1208
|
+
if file_name not in filenames:
|
|
1209
|
+
raise ValueError(
|
|
1210
|
+
"Invalid file name. Please check if the file name is correct."
|
|
1211
|
+
)
|
|
1212
|
+
|
|
1213
|
+
res = [file for file in res if file["filename"] == file_name]
|
|
1214
|
+
|
|
1215
|
+
print(f'Downloading files to "{name}"\n')
|
|
1216
|
+
|
|
1217
|
+
for file in res:
|
|
1218
|
+
filename = file["filename"]
|
|
1219
|
+
url = get_url(analysis_id, filename, project_id)
|
|
1220
|
+
|
|
1221
|
+
print(f"Downloading {filename}")
|
|
1222
|
+
|
|
1223
|
+
for _ in range(2):
|
|
1224
|
+
try:
|
|
1225
|
+
with tqdm(
|
|
1226
|
+
unit="B",
|
|
1227
|
+
unit_scale=True,
|
|
1228
|
+
unit_divisor=1024,
|
|
1229
|
+
miniters=1,
|
|
1230
|
+
desc=f"Progress",
|
|
1231
|
+
) as t:
|
|
1232
|
+
ssl._create_default_https_context = (
|
|
1233
|
+
ssl._create_unverified_context
|
|
1234
|
+
)
|
|
1235
|
+
urllib.request.urlretrieve(
|
|
1236
|
+
url,
|
|
1237
|
+
f"{name}/{filename}",
|
|
1238
|
+
reporthook=download_hook(t),
|
|
1239
|
+
data=None,
|
|
1240
|
+
)
|
|
1241
|
+
break
|
|
1242
|
+
except:
|
|
1243
|
+
filename = filename.split("/")
|
|
1244
|
+
name += "/" + "/".join(
|
|
1245
|
+
[filename[i] for i in range(len(filename) - 1)]
|
|
1246
|
+
)
|
|
1247
|
+
filename = filename[-1]
|
|
1248
|
+
if not os.path.isdir(f"{name}/{filename}"):
|
|
1249
|
+
os.makedirs(f"{name}/")
|
|
1250
|
+
|
|
1251
|
+
else:
|
|
1252
|
+
raise ValueError(
|
|
1253
|
+
"Your download failed. Please check if the backend is still running."
|
|
1254
|
+
)
|
|
1255
|
+
|
|
1256
|
+
print(f"Finished downloading {filename}\n")
|
|
1257
|
+
|
|
1258
|
+
return {
|
|
1259
|
+
"message": f"Files downloaded successfully to '{download_path}/downloads/{analysis_id}'"
|
|
1260
|
+
}
|
|
1261
|
+
|
|
1262
|
+
def link_plate(
|
|
1263
|
+
self,
|
|
1264
|
+
ms_data_files: _List[str],
|
|
1265
|
+
plate_map_file: str,
|
|
1266
|
+
plate_id: str,
|
|
1267
|
+
plate_name: str,
|
|
1268
|
+
sample_description_file: str = None,
|
|
1269
|
+
space: str = None,
|
|
1270
|
+
):
|
|
1271
|
+
"""
|
|
1272
|
+
Links existing MS data files to user uploaded files to create a new plate.
|
|
1273
|
+
|
|
1274
|
+
Parameters
|
|
1275
|
+
----------
|
|
1276
|
+
ms_data_files : list[str]
|
|
1277
|
+
Path to MS data files on the PAS backend or S3 bucket.
|
|
1278
|
+
plate_map_file : str
|
|
1279
|
+
Path to the plate map file to be linked.
|
|
1280
|
+
plate_id : str
|
|
1281
|
+
ID of the plate to be linked.
|
|
1282
|
+
plate_name : str
|
|
1283
|
+
Name of the plate to be linked.
|
|
1284
|
+
sample_description_file : str, optional
|
|
1285
|
+
Path to the sample description file to be linked, defaulted to None.
|
|
1286
|
+
space : str, optional
|
|
1287
|
+
ID of the user group to which the files belongs, defaulted to None.
|
|
1288
|
+
|
|
1289
|
+
Returns
|
|
1290
|
+
-------
|
|
1291
|
+
dict
|
|
1292
|
+
Contains the message whether the plate was created or not.
|
|
1293
|
+
|
|
1294
|
+
Examples
|
|
1295
|
+
-------
|
|
1296
|
+
>>> from core import SeerSDK
|
|
1297
|
+
>>> sdk = SeerSDK()
|
|
1298
|
+
>>> sdk.link_plate(["/path/to/file1", "/path/to/file2"], "/path/to/plate_map_file", "plate_id", "plate_name")
|
|
1299
|
+
>>> { "message": "Plate generated with id: 'plate_id'" }
|
|
1300
|
+
"""
|
|
1301
|
+
|
|
1302
|
+
plate_ids = (
|
|
1303
|
+
set()
|
|
1304
|
+
) # contains all the plate_ids fetched from self.get_plate_metadata()
|
|
1305
|
+
files = [] # to be uploaded to sync frontend
|
|
1306
|
+
samples = [] # list of all the sample responses from the backend
|
|
1307
|
+
id_uuid = "" # uuid for the plate id
|
|
1308
|
+
raw_file_paths = {} # list of all the AWS raw file paths
|
|
1309
|
+
s3_upload_path = None
|
|
1310
|
+
s3_bucket = ""
|
|
1311
|
+
ms_data_file_names = []
|
|
1312
|
+
dir_exists = (
|
|
1313
|
+
True # flag to check if the generated_files directory exists
|
|
1314
|
+
)
|
|
1315
|
+
|
|
1316
|
+
# Step 0: Check if the file paths exist in the S3 bucket.
|
|
1317
|
+
for file in ms_data_files:
|
|
1318
|
+
if not self.list_ms_data_files(file):
|
|
1319
|
+
raise ValueError(
|
|
1320
|
+
f"File '{file}' does not exist. Please check your parameters."
|
|
1321
|
+
)
|
|
1322
|
+
|
|
1323
|
+
if sample_description_file and not os.path.exists(
|
|
1324
|
+
sample_description_file
|
|
1325
|
+
):
|
|
1326
|
+
raise ValueError(
|
|
1327
|
+
f"File path '{sample_description_file}' is invalid. Please check your parameters."
|
|
1328
|
+
)
|
|
1329
|
+
|
|
1330
|
+
# Validate plate id, plate name as entity names
|
|
1331
|
+
# Enforcing this on the SDK level to prevent the creation of empty records before the backend validation
|
|
1332
|
+
if not entity_name_ruler(plate_id):
|
|
1333
|
+
raise ValueError("Plate ID contains unsupported characters.")
|
|
1334
|
+
|
|
1335
|
+
if not entity_name_ruler(plate_name):
|
|
1336
|
+
raise ValueError("Plate Name contains unsupported characters.")
|
|
1337
|
+
|
|
1338
|
+
# Validate plate map
|
|
1339
|
+
if isinstance(plate_map_file, PlateMap):
|
|
1340
|
+
plate_map_data = plate_map_file.to_df()
|
|
1341
|
+
else:
|
|
1342
|
+
plate_map_data = pd.read_csv(plate_map_file)
|
|
1343
|
+
|
|
1344
|
+
validate_plate_map(plate_map_data, ms_data_files)
|
|
1345
|
+
|
|
1346
|
+
# Step 1: Check for duplicates in the user-inputted plate id. Populates `plate_ids` set.
|
|
1347
|
+
with self._get_auth_session() as s:
|
|
1348
|
+
plate_response = s.get(f"{self._auth.url}api/v1/plateids")
|
|
1349
|
+
|
|
1350
|
+
if plate_response.status_code != 200:
|
|
1351
|
+
raise ServerError(
|
|
1352
|
+
"Failed to fetch plate ids from the server. Please check your connection and reauthenticate."
|
|
1353
|
+
)
|
|
1354
|
+
|
|
1355
|
+
plate_ids = set(plate_response.json()["data"])
|
|
1356
|
+
|
|
1357
|
+
if not plate_ids:
|
|
1358
|
+
raise ServerError(
|
|
1359
|
+
"No plate ids returned from the server. Please reattempt."
|
|
1360
|
+
)
|
|
1361
|
+
|
|
1362
|
+
# Step 2: Fetch the UUID that needs to be passed into the backend from `/api/v1/plates` to fetch the AWS upload config and raw file path. This will sync the plates backend with samples when the user uploads later. This UUID will also be void of duplicates since duplication is handled by the backend.
|
|
1363
|
+
|
|
1364
|
+
with self._get_auth_session() as s:
|
|
1365
|
+
plate_response = s.post(
|
|
1366
|
+
f"{self._auth.url}api/v1/plates",
|
|
1367
|
+
json={
|
|
1368
|
+
"plateId": plate_id,
|
|
1369
|
+
"plateName": plate_name,
|
|
1370
|
+
"plateUserGroup": space,
|
|
1371
|
+
},
|
|
1372
|
+
)
|
|
1373
|
+
|
|
1374
|
+
if plate_response.status_code != 200:
|
|
1375
|
+
raise ServerError(
|
|
1376
|
+
"Failed to connect to the server. Please check your connection and reauthenticate."
|
|
1377
|
+
)
|
|
1378
|
+
|
|
1379
|
+
id_uuid = plate_response.json()["id"]
|
|
1380
|
+
|
|
1381
|
+
if not id_uuid:
|
|
1382
|
+
raise ServerError(
|
|
1383
|
+
"Failed to fetch a UUID from the server. Please check your connection and reauthenticate."
|
|
1384
|
+
)
|
|
1385
|
+
|
|
1386
|
+
# Step 3: Fetch AWS upload config from the backend with the plateId we just generated. Populates `s3_upload_path` and `s3_bucket` global variables.
|
|
1387
|
+
with self._get_auth_session() as s:
|
|
1388
|
+
config_response = s.post(
|
|
1389
|
+
f"{self._auth.url}api/v1/msdatas/getuploadconfig",
|
|
1390
|
+
json={"plateId": id_uuid},
|
|
1391
|
+
)
|
|
1392
|
+
|
|
1393
|
+
if (
|
|
1394
|
+
config_response.status_code != 200
|
|
1395
|
+
or not config_response.json()
|
|
1396
|
+
):
|
|
1397
|
+
raise ServerError(
|
|
1398
|
+
"Failed to fetch AWS upload config for the plate. Please check your connection and reauthenticate."
|
|
1399
|
+
)
|
|
1400
|
+
|
|
1401
|
+
if "s3Bucket" not in config_response.json():
|
|
1402
|
+
raise ServerError(
|
|
1403
|
+
"Failed to fetch the S3 bucket from AWS. Please check your connection and reauthenticate."
|
|
1404
|
+
)
|
|
1405
|
+
elif "s3UploadPath" not in config_response.json():
|
|
1406
|
+
raise ServerError(
|
|
1407
|
+
"Failed to fetch the S3 upload path from AWS. Please check your connection and reauthenticate."
|
|
1408
|
+
)
|
|
1409
|
+
|
|
1410
|
+
s3_bucket = config_response.json()["s3Bucket"]
|
|
1411
|
+
s3_upload_path = config_response.json()["s3UploadPath"]
|
|
1412
|
+
|
|
1413
|
+
with self._get_auth_session() as s:
|
|
1414
|
+
config_response = s.get(
|
|
1415
|
+
f"{self._auth.url}auth/getawscredential",
|
|
1416
|
+
)
|
|
1417
|
+
|
|
1418
|
+
if (
|
|
1419
|
+
config_response.status_code != 200
|
|
1420
|
+
or not config_response.json()
|
|
1421
|
+
):
|
|
1422
|
+
raise ServerError(
|
|
1423
|
+
"Failed to fetch credentials. Please check your connection and reauthenticate."
|
|
1424
|
+
)
|
|
1425
|
+
|
|
1426
|
+
if "S3Bucket" not in config_response.json()["credentials"]:
|
|
1427
|
+
raise ServerError(
|
|
1428
|
+
"Failed to fetch data from AWS. Please check your connection and reauthenticate."
|
|
1429
|
+
)
|
|
1430
|
+
|
|
1431
|
+
credentials = config_response.json()["credentials"]
|
|
1432
|
+
|
|
1433
|
+
# Step 4: Upload the platemap file to the S3 bucket.
|
|
1434
|
+
if isinstance(plate_map_file, PlateMap):
|
|
1435
|
+
plate_map_file_name = f"plateMap_{id_uuid}.csv"
|
|
1436
|
+
|
|
1437
|
+
if not os.path.exists("generated_files"):
|
|
1438
|
+
dir_exists = False
|
|
1439
|
+
os.makedirs("generated_files")
|
|
1440
|
+
|
|
1441
|
+
plate_map_file.to_csv(f"generated_files/{plate_map_file_name}")
|
|
1442
|
+
plate_map_file = f"generated_files/{plate_map_file_name}"
|
|
1443
|
+
|
|
1444
|
+
else:
|
|
1445
|
+
plate_map_file_name = os.path.basename(plate_map_file)
|
|
1446
|
+
|
|
1447
|
+
res = upload_file(
|
|
1448
|
+
plate_map_file,
|
|
1449
|
+
s3_bucket,
|
|
1450
|
+
credentials,
|
|
1451
|
+
f"{s3_upload_path}{plate_map_file_name}",
|
|
1452
|
+
)
|
|
1453
|
+
|
|
1454
|
+
if not res:
|
|
1455
|
+
raise ServerError(
|
|
1456
|
+
"Failed to upload plate map to AWS. Please check your connection and reauthenticate."
|
|
1457
|
+
)
|
|
1458
|
+
|
|
1459
|
+
with self._get_auth_session() as s:
|
|
1460
|
+
plate_map_response = s.post(
|
|
1461
|
+
f"{self._auth.url}api/v1/msdataindex/file",
|
|
1462
|
+
json={
|
|
1463
|
+
"files": [
|
|
1464
|
+
{
|
|
1465
|
+
"filePath": f"{s3_upload_path}{plate_map_file_name}",
|
|
1466
|
+
"fileSize": os.stat(plate_map_file).st_size,
|
|
1467
|
+
"userGroupId": space,
|
|
1468
|
+
}
|
|
1469
|
+
]
|
|
1470
|
+
},
|
|
1471
|
+
)
|
|
1472
|
+
|
|
1473
|
+
if (
|
|
1474
|
+
plate_map_response.status_code != 200
|
|
1475
|
+
or not plate_map_response.json()
|
|
1476
|
+
or "created" not in plate_map_response.json()
|
|
1477
|
+
):
|
|
1478
|
+
raise ServerError(
|
|
1479
|
+
"Failed to upload raw files to PAS. Please check your connection and reauthenticate."
|
|
1480
|
+
)
|
|
1481
|
+
|
|
1482
|
+
# Step 5: Populate `raw_file_paths` for sample upload.
|
|
1483
|
+
raw_file_paths = self._get_msdataindex_path(ms_data_files)
|
|
1484
|
+
|
|
1485
|
+
# Step 5.5: trim display paths to basename to align with plate map
|
|
1486
|
+
raw_file_paths = {
|
|
1487
|
+
os.path.basename(k): v for k, v in raw_file_paths.items()
|
|
1488
|
+
}
|
|
1489
|
+
|
|
1490
|
+
# Step 6: Get sample info from the plate map file and make a call to `/api/v1/samples` with the sample_info. This returns the plateId, sampleId and sampleName for each sample in the plate map file. Also validate and upload the sample_description_file if it exists.
|
|
1491
|
+
sample_info = get_sample_info(
|
|
1492
|
+
id_uuid,
|
|
1493
|
+
plate_map_file,
|
|
1494
|
+
space,
|
|
1495
|
+
sample_description_file,
|
|
1496
|
+
)
|
|
1497
|
+
if sample_description_file:
|
|
1498
|
+
sdf_upload = upload_file(
|
|
1499
|
+
sample_description_file,
|
|
1500
|
+
s3_bucket,
|
|
1501
|
+
credentials,
|
|
1502
|
+
f"{s3_upload_path}{os.path.basename(sample_description_file)}",
|
|
1503
|
+
)
|
|
1504
|
+
|
|
1505
|
+
if not sdf_upload:
|
|
1506
|
+
raise ValueError(
|
|
1507
|
+
"Failed to upload sample description file to AWS. Please check your connection and reauthenticate."
|
|
1508
|
+
)
|
|
1509
|
+
|
|
1510
|
+
with self._get_auth_session() as s:
|
|
1511
|
+
sdf_response = s.post(
|
|
1512
|
+
f"{self._auth.url}api/v1/msdataindex/file",
|
|
1513
|
+
json={
|
|
1514
|
+
"files": [
|
|
1515
|
+
{
|
|
1516
|
+
"filePath": f"{s3_upload_path}{os.path.basename(sample_description_file)}",
|
|
1517
|
+
"fileSize": os.stat(
|
|
1518
|
+
sample_description_file
|
|
1519
|
+
).st_size,
|
|
1520
|
+
"userGroupId": space,
|
|
1521
|
+
}
|
|
1522
|
+
]
|
|
1523
|
+
},
|
|
1524
|
+
)
|
|
1525
|
+
|
|
1526
|
+
if (
|
|
1527
|
+
sdf_response.status_code != 200
|
|
1528
|
+
or not sdf_response.json()
|
|
1529
|
+
or "created" not in sdf_response.json()
|
|
1530
|
+
):
|
|
1531
|
+
raise ServerError(
|
|
1532
|
+
"Failed to upload sample description file to PAS DB. Please check your connection and reauthenticate."
|
|
1533
|
+
)
|
|
1534
|
+
|
|
1535
|
+
for entry in sample_info:
|
|
1536
|
+
sample = self._add_sample(entry)
|
|
1537
|
+
samples.append(sample)
|
|
1538
|
+
|
|
1539
|
+
# Step 7: Parse the plate map file and convert the data into a form that can be POSTed to `/api/v1/msdatas`.
|
|
1540
|
+
plate_map_data = parse_plate_map_file(
|
|
1541
|
+
plate_map_file, samples, raw_file_paths, space
|
|
1542
|
+
)
|
|
1543
|
+
|
|
1544
|
+
# Step 8: Make a request to `/api/v1/msdatas/batch` with the processed samples data.
|
|
1545
|
+
with self._get_auth_session() as s:
|
|
1546
|
+
ms_data_response = s.post(
|
|
1547
|
+
f"{self._auth.url}api/v1/msdatas/batch",
|
|
1548
|
+
json={"msdatas": plate_map_data},
|
|
1549
|
+
)
|
|
1550
|
+
if ms_data_response.status_code != 200:
|
|
1551
|
+
raise ServerError(
|
|
1552
|
+
"Failed to add samples to plate in PAS. Please check your connection and reauthenticate."
|
|
1553
|
+
)
|
|
1554
|
+
|
|
1555
|
+
print(f"Plate generated with id: '{id_uuid}'")
|
|
1556
|
+
return id_uuid
|
|
1557
|
+
|
|
1558
|
+
def _get_msdataindex_metadata(self, folder=""):
|
|
1559
|
+
"""
|
|
1560
|
+
Get metadata for a given file path.
|
|
1561
|
+
|
|
1562
|
+
Raises:
|
|
1563
|
+
ServerError - could not fetch metadata for file.
|
|
1564
|
+
|
|
1565
|
+
Returns:
|
|
1566
|
+
dict: A dictionary containing the metadata for the file.
|
|
1567
|
+
"""
|
|
1568
|
+
URL = f"{self._auth.url}api/v2/msdataindex/getmetadata"
|
|
1569
|
+
with self._get_auth_session() as s:
|
|
1570
|
+
params = {"all": "true"}
|
|
1571
|
+
if folder:
|
|
1572
|
+
tenant_id = jwt.decode(
|
|
1573
|
+
self._auth.get_token()[0],
|
|
1574
|
+
options={"verify_signature": False},
|
|
1575
|
+
)["custom:tenantId"]
|
|
1576
|
+
params["folderKey"] = f"{tenant_id}/{folder}"
|
|
1577
|
+
print(params["folderKey"])
|
|
1578
|
+
|
|
1579
|
+
metadata = s.get(URL, params=params)
|
|
1580
|
+
|
|
1581
|
+
if metadata.status_code != 200:
|
|
1582
|
+
print(metadata.text)
|
|
1583
|
+
raise ServerError("Could not fetch metadata for file.")
|
|
1584
|
+
|
|
1585
|
+
return metadata.json()
|
|
1586
|
+
|
|
1587
|
+
def _get_msdataindex_path(self, display_path: list):
|
|
1588
|
+
"""
|
|
1589
|
+
Get the underlying cloud file path from the display path.
|
|
1590
|
+
|
|
1591
|
+
Args:
|
|
1592
|
+
display_path (list): A list of file paths as displayed on PAS
|
|
1593
|
+
|
|
1594
|
+
Returns:
|
|
1595
|
+
dict: A dictionary mapping the display path to the raw file path.
|
|
1596
|
+
"""
|
|
1597
|
+
|
|
1598
|
+
tenant_id = jwt.decode(
|
|
1599
|
+
self._auth.get_token()[0], options={"verify_signature": False}
|
|
1600
|
+
)["custom:tenantId"]
|
|
1601
|
+
result = {}
|
|
1602
|
+
# partition by folder_path
|
|
1603
|
+
folder_partitions = {os.path.dirname(x): [] for x in display_path}
|
|
1604
|
+
for path in display_path:
|
|
1605
|
+
folder_partitions[os.path.dirname(path)].append(path)
|
|
1606
|
+
|
|
1607
|
+
success = True
|
|
1608
|
+
missing_data_files = []
|
|
1609
|
+
# For every unique folder in the set of MS files, fetch the metadata
|
|
1610
|
+
for folder_path in folder_partitions:
|
|
1611
|
+
try:
|
|
1612
|
+
metadata = {
|
|
1613
|
+
x["key"]: x["rawFilePath"]
|
|
1614
|
+
for x in self._get_msdataindex_metadata(
|
|
1615
|
+
folder=folder_path
|
|
1616
|
+
)["data"]
|
|
1617
|
+
}
|
|
1618
|
+
except:
|
|
1619
|
+
# If the metadata fetch fails, skip the folder
|
|
1620
|
+
continue
|
|
1621
|
+
|
|
1622
|
+
for display_path in folder_partitions[folder_path]:
|
|
1623
|
+
if f"{tenant_id}/{display_path}" not in metadata:
|
|
1624
|
+
if success:
|
|
1625
|
+
success = False
|
|
1626
|
+
missing_data_files.append(display_path)
|
|
1627
|
+
result[display_path] = metadata[f"{tenant_id}/{display_path}"]
|
|
1628
|
+
|
|
1629
|
+
if not success:
|
|
1630
|
+
raise ValueError(
|
|
1631
|
+
f"Could not fetch metadata for the following files: {missing_data_files}"
|
|
1632
|
+
)
|
|
1633
|
+
|
|
1634
|
+
return result
|