seer-pas-sdk 0.1.3__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1634 @@
1
+ """
2
+ seer_pas_sdk.core.unsupported -- in development
3
+ """
4
+
5
+ from tqdm import tqdm
6
+
7
+ import os
8
+ import jwt
9
+ import requests
10
+ import urllib.request
11
+ import ssl
12
+ import shutil
13
+
14
+ from typing import List as _List
15
+
16
+ from ..common import *
17
+ from ..auth import Auth
18
+ from ..objects import PlateMap
19
+
20
+ from .sdk import SeerSDK as _SeerSDK
21
+
22
+
23
+ class _UnsupportedSDK(_SeerSDK):
24
+ """
25
+ **************
26
+ [UNEXPOSED MODULE]
27
+ **************
28
+
29
+ This module is currently not supported and should be considered unstable. Use at your own risk.
30
+ """
31
+
32
+ def _add_sample(self, sample_entry: dict):
33
+ """
34
+ ****************
35
+ [UNEXPOSED METHOD CALL]
36
+ ****************
37
+ Add a sample given a plate_id, sample_id, sample_name and space.
38
+
39
+ Parameters
40
+ ----------
41
+ sample_entry: dict
42
+ A dictionary containing all keys and values for the sample entry. These may or may not have been inferred from the sample description file.
43
+
44
+ Returns
45
+ -------
46
+ dict
47
+ The response from the backend.
48
+
49
+ Examples
50
+ -------
51
+ >>> from core import SeerSDK
52
+ >>> seer_sdk = SeerSDK()
53
+ >>> seer_sdk._add_sample("YOUR_PLATE_ID_HERE", "YOUR_SAMPLE_ID_HERE", "YOUR_SAMPLE_NAME_HERE")
54
+ >>> {
55
+ "id": "SAMPLE_ID_HERE",
56
+ "tenant_id": "TENANT_ID_HERE",
57
+ "plate_id": "PLATE_ID_HERE",
58
+ "sample_name": "SAMPLE_NAME_HERE",
59
+ "sample_type": "SAMPLE_TYPE_HERE",
60
+ "species": "Human",
61
+ "description": None,
62
+ ...
63
+ ...
64
+ }
65
+ """
66
+ for key in ["plateID", "sampleID", "sampleName"]:
67
+ if key not in sample_entry:
68
+ raise ValueError(
69
+ f"{key} is missing. Please check your parameters again."
70
+ )
71
+
72
+ URL = f"{self._auth.url}api/v1/samples"
73
+
74
+ with self._get_auth_session() as s:
75
+
76
+ response = s.post(URL, json=sample_entry)
77
+
78
+ if response.status_code != 200:
79
+ raise ValueError(
80
+ "Invalid request. Please check your parameters."
81
+ )
82
+
83
+ return response.json()
84
+
85
+ # Add samples in batch
86
+ def _add_samples(self, sample_info: list):
87
+ """
88
+ ****************
89
+ [UNEXPOSED METHOD CALL]
90
+ ****************
91
+ Add samples in batch given a list of sample entries.
92
+
93
+ Parameters:
94
+ -----------
95
+ sample_info: list
96
+ A list of dictionaries containing all keys and values for the sample entries. These may or may not have been inferred from the sample description file.
97
+ Required keys: ["plateID", "sampleID", "sampleName"]
98
+
99
+ Returns:
100
+ --------
101
+ dict
102
+ The response from the backend.
103
+ """
104
+ # Validate keys in samples
105
+ for sample in sample_info:
106
+ if not all(
107
+ key in sample for key in ["plateID", "sampleID", "sampleName"]
108
+ ):
109
+ raise ValueError(
110
+ f"Invalid sample entry for sample {sample}. Please check your parameters again."
111
+ )
112
+
113
+ URL = f"{self._auth.url}api/v1/samples/batch"
114
+
115
+ with self._get_auth_session() as s:
116
+ response = s.post(URL, json={"samples": sample_info})
117
+
118
+ if response.status_code != 200:
119
+ raise ValueError(
120
+ "Invalid request. Please check your parameters."
121
+ )
122
+
123
+ return response.json()
124
+
125
+ def add_project(
126
+ self,
127
+ project_name: str,
128
+ plate_ids: _List[str],
129
+ description: str = None,
130
+ notes: str = None,
131
+ space: str = None,
132
+ ):
133
+ """
134
+ Creates a new project with a given project_name and a project_ids list.
135
+
136
+ Parameters
137
+ ----------
138
+ project_name : str
139
+ Name of the project.
140
+ plate_ids : list[str]
141
+ List of plate ids to be added to the project.
142
+ description : str, optional
143
+ Description of the project.
144
+ notes : str, optional
145
+ Notes for the project.
146
+ space : str, optional
147
+ User group id of the project. Defaults to the user group id of the user who is creating the project (i.e null).
148
+
149
+ Returns
150
+ -------
151
+ res: dict
152
+ A dictionary containing the status of the request if succeeded.
153
+
154
+ Examples
155
+ -------
156
+ >>> from core import SeerSDK
157
+ >>> seer_sdk = SeerSDK()
158
+ >>> seer_sdk.add_project("YOUR_PROJECT_NAME_HERE", ["PLATE_ID_1, PLATE_ID_2"])
159
+ >>> {
160
+ "status": "Project with id = PROJECT_ID started."
161
+ }
162
+ """
163
+
164
+ if not project_name:
165
+ raise ValueError("Project name cannot be empty.")
166
+
167
+ all_plate_ids = set(
168
+ [plate["id"] for plate in self.get_plate_metadata()]
169
+ )
170
+
171
+ for plate_id in plate_ids:
172
+ if plate_id not in all_plate_ids:
173
+ raise ValueError(
174
+ f"Plate ID '{plate_id}' is not valid. Please check your parameters again."
175
+ )
176
+
177
+ URL = f"{self._auth.url}api/v1/projects"
178
+
179
+ with self._get_auth_session() as s:
180
+
181
+ project = s.post(
182
+ URL,
183
+ json={
184
+ "projectName": project_name,
185
+ "plateIDs": plate_ids,
186
+ "notes": notes,
187
+ "description": description,
188
+ "projectUserGroup": space,
189
+ },
190
+ )
191
+
192
+ if project.status_code != 200:
193
+ raise ValueError(
194
+ "Invalid request. Please check your parameters."
195
+ )
196
+
197
+ res = {
198
+ "status": f"Project started with id = {project.json()['id']}"
199
+ }
200
+
201
+ return res
202
+
203
+ def add_samples_to_project(self, samples: _List[str], project_id: str):
204
+ """
205
+ Add samples to a project given a list of sample ids and a project id.
206
+
207
+ Parameters
208
+ ----------
209
+ samples : list[str]
210
+ List of sample ids to be added to the project.
211
+ project_id : str
212
+ ID of the project to which the samples are to be added.
213
+
214
+ Returns
215
+ -------
216
+ res : dict
217
+ A dictionary containing the status of the request if succeeded.
218
+
219
+ Examples
220
+ -------
221
+ >>> from core import SeerSDK
222
+ >>> seer_sdk = SeerSDK()
223
+ >>> seer_sdk.add_samples_to_project(["SAMPLE_ID_1", "SAMPLE_ID_2"], "PROJECT_ID")
224
+ >>> {
225
+ "status": "Samples added to PROJECT_ID"
226
+ }
227
+ """
228
+ if not project_id:
229
+ raise ValueError("Project ID cannot be empty.")
230
+
231
+ if not samples:
232
+ raise ValueError("Samples cannot be empty.")
233
+
234
+ URL = f"{self._auth.url}api/v1/addSamplesToProject/{project_id}"
235
+
236
+ with self._get_auth_session() as s:
237
+
238
+ response = s.put(
239
+ URL,
240
+ json={
241
+ "sampleIDs": samples,
242
+ },
243
+ )
244
+
245
+ if response.status_code != 200:
246
+ raise ValueError(
247
+ "Invalid request. Please check your parameters."
248
+ )
249
+
250
+ res = {"status": f"Samples added to {project_id}"}
251
+ return res
252
+
253
+ def add_plates_to_project(self, plates: _List[str], project_id: str):
254
+ """
255
+ Add plates to a project given a list of plate ids and a project id.
256
+
257
+ Parameters
258
+ ----------
259
+ plates : list[str]
260
+ List of plate ids to be added to the project.
261
+ project_id : str
262
+ ID of the project to which the plates are to be added.
263
+
264
+ Returns
265
+ -------
266
+ res : dict
267
+ A dictionary containing the status of the request if succeeded.
268
+ """
269
+
270
+ if not project_id:
271
+ raise ValueError("Project ID cannot be empty.")
272
+
273
+ if not plates:
274
+ raise ValueError("Plates cannot be empty.")
275
+
276
+ # get samples
277
+ samples = (
278
+ x["id"]
279
+ for plate_id in plates
280
+ for x in self.get_samples_metadata(plate_id=plate_id)
281
+ )
282
+
283
+ return self.add_samples_to_project(
284
+ project_id=project_id, samples=list(samples)
285
+ )
286
+
287
+ def add_plate(
288
+ self,
289
+ ms_data_files: _List[str],
290
+ plate_map_file: str,
291
+ plate_id: str,
292
+ plate_name: str,
293
+ sample_description_file: str = None,
294
+ space: str = None,
295
+ ):
296
+ """
297
+ Add a plate given a list of (existing or new) ms_data_files, plate_map_file, plate_id, plate_name, sample_description_file and space.
298
+
299
+ Parameters
300
+ ----------
301
+ ms_data_files : list[str]
302
+ List of ms_data_files.
303
+ plate_map_file : str or `PlateMap` Object
304
+ The plate map file.
305
+ plate_id : str
306
+ The plate ID. Must be unique.
307
+ plate_name : str
308
+ The plate name.
309
+ sample_description_file : str, optional
310
+ The sample description file. Defaults to None.
311
+ space : str, optional
312
+ The space or usergroup. Defaults to the user group id of the user who is creating the plate (i.e None).
313
+
314
+ Returns
315
+ -------
316
+ id_uuid : str
317
+ - The UUID of the plate.
318
+
319
+ Examples
320
+ -------
321
+ >>> from core import SeerSDK
322
+ >>> seer_sdk = SeerSDK()
323
+ >>> seer_sdk.add_plate(["MS_DATA_FILE_1", "MS_DATA_FILE_2"], "PLATE_MAP_FILE", "PLATE_ID", "PLATE_NAME")
324
+ "9d5b6ab0-5a8c-11ef-8110-dd5cb94025eb"
325
+ """
326
+
327
+ plate_ids = (
328
+ set()
329
+ ) # contains all the plate_ids fetched from self.get_plate_metadata()
330
+ files = [] # to be uploaded to sync frontend
331
+ samples = [] # list of all the sample responses from the backend
332
+ id_uuid = "" # uuid for the plate id
333
+ raw_file_paths = {} # list of all the AWS raw file paths
334
+ s3_upload_path = None
335
+ s3_bucket = ""
336
+ dir_exists = (
337
+ True # flag to check if the generated_files directory exists
338
+ )
339
+
340
+ # Step 0: Check if the file paths are valid.
341
+ for file in ms_data_files:
342
+ if not os.path.exists(file):
343
+ raise ValueError(
344
+ f"File path '{file}' is invalid. Please check your parameters."
345
+ )
346
+
347
+ if type(plate_map_file) == str and not os.path.exists(plate_map_file):
348
+ raise ValueError(
349
+ f"File path '{plate_map_file}' is invalid. Please check your parameters."
350
+ )
351
+
352
+ if sample_description_file and not os.path.exists(
353
+ sample_description_file
354
+ ):
355
+ raise ValueError(
356
+ f"File path '{sample_description_file}' is invalid. Please check your parameters."
357
+ )
358
+
359
+ # Validate plate id, plate name as entity names
360
+ # Enforcing this on the SDK level to prevent the creation of empty records before the backend validation
361
+ if not entity_name_ruler(plate_id):
362
+ raise ValueError("Plate ID contains unsupported characters.")
363
+
364
+ if not entity_name_ruler(plate_name):
365
+ raise ValueError("Plate Name contains unsupported characters.")
366
+
367
+ # Validate plate map
368
+ if isinstance(plate_map_file, PlateMap):
369
+ plate_map_data = plate_map_file.to_df()
370
+ else:
371
+ plate_map_data = pd.read_csv(plate_map_file)
372
+
373
+ local_file_names = [os.path.basename(x) for x in ms_data_files]
374
+
375
+ validate_plate_map(plate_map_data, local_file_names)
376
+
377
+ # Step 1: Check for duplicates in the user-inputted plate id. Populates `plate_ids` set.
378
+ with self._get_auth_session() as s:
379
+ plate_response = s.get(f"{self._auth.url}api/v1/plateids")
380
+
381
+ if plate_response.status_code != 200:
382
+ raise ValueError(
383
+ "Failed to fetch plate ids from the server. Please check your connection and reauthenticate."
384
+ )
385
+
386
+ plate_ids = set(plate_response.json()["data"])
387
+
388
+ if not plate_ids:
389
+ raise ValueError(
390
+ "No plate ids returned from the server. Please reattempt."
391
+ )
392
+
393
+ # Step 2: Fetch the UUID that needs to be passed into the backend from `/api/v1/plates` to fetch the AWS upload config and raw file path. This will sync the plates backend with samples when the user uploads later. This UUID will also be void of duplicates since duplication is handled by the backend.
394
+
395
+ with self._get_auth_session() as s:
396
+ plate_response = s.post(
397
+ f"{self._auth.url}api/v1/plates",
398
+ json={
399
+ "plateId": plate_id,
400
+ "plateName": plate_name,
401
+ "plateUserGroup": space,
402
+ },
403
+ )
404
+
405
+ if plate_response.status_code != 200:
406
+ raise ValueError(
407
+ "Failed to connect to the server. Please check your connection and reauthenticate."
408
+ )
409
+
410
+ id_uuid = plate_response.json()["id"]
411
+
412
+ if not id_uuid:
413
+ raise ValueError(
414
+ "Failed to fetch a UUID from the server. Please check your connection and reauthenticate."
415
+ )
416
+
417
+ # Step 3: Fetch AWS upload config from the backend with the plateId we just generated. Populates `s3_upload_path` and `s3_bucket` global variables.
418
+ with self._get_auth_session() as s:
419
+ config_response = s.post(
420
+ f"{self._auth.url}api/v1/msdatas/getuploadconfig",
421
+ json={"plateId": id_uuid},
422
+ )
423
+
424
+ if (
425
+ config_response.status_code != 200
426
+ or not config_response.json()
427
+ ):
428
+ raise ValueError(
429
+ "Failed to fetch AWS upload config for the plate. Please check your connection and reauthenticate."
430
+ )
431
+
432
+ if "s3Bucket" not in config_response.json():
433
+ raise ValueError(
434
+ "Failed to fetch the S3 bucket from AWS. Please check your connection and reauthenticate."
435
+ )
436
+ elif "s3UploadPath" not in config_response.json():
437
+ raise ValueError(
438
+ "Failed to fetch the S3 upload path from AWS. Please check your connection and reauthenticate."
439
+ )
440
+
441
+ s3_bucket = config_response.json()["s3Bucket"]
442
+ s3_upload_path = config_response.json()["s3UploadPath"]
443
+
444
+ with self._get_auth_session() as s:
445
+ config_response = s.get(
446
+ f"{self._auth.url}auth/getawscredential",
447
+ )
448
+
449
+ if (
450
+ config_response.status_code != 200
451
+ or not config_response.json()
452
+ ):
453
+ raise ValueError(
454
+ "Failed to fetch credentials. Please check your connection and reauthenticate."
455
+ )
456
+
457
+ if "S3Bucket" not in config_response.json()["credentials"]:
458
+ raise ValueError(
459
+ "Failed to fetch data from AWS. Please check your connection and reauthenticate."
460
+ )
461
+
462
+ credentials = config_response.json()["credentials"]
463
+
464
+ # Step 4: Upload the platemap file to the S3 bucket.
465
+ if isinstance(plate_map_file, PlateMap):
466
+ plate_map_file_name = f"plateMap_{id_uuid}.csv"
467
+
468
+ if not os.path.exists("generated_files"):
469
+ dir_exists = False
470
+ os.makedirs("generated_files")
471
+
472
+ plate_map_file.to_csv(f"generated_files/{plate_map_file_name}")
473
+ plate_map_file = f"generated_files/{plate_map_file_name}"
474
+
475
+ else:
476
+ plate_map_file_name = os.path.basename(plate_map_file)
477
+
478
+ res = upload_file(
479
+ plate_map_file,
480
+ s3_bucket,
481
+ credentials,
482
+ f"{s3_upload_path}{plate_map_file_name}",
483
+ )
484
+
485
+ if not res:
486
+ raise ValueError(
487
+ "Failed to upload plate map to AWS. Please check your connection and reauthenticate."
488
+ )
489
+
490
+ with self._get_auth_session() as s:
491
+ plate_map_response = s.post(
492
+ f"{self._auth.url}api/v1/msdataindex/file",
493
+ json={
494
+ "files": [
495
+ {
496
+ "filePath": f"{s3_upload_path}{plate_map_file_name}",
497
+ "fileSize": os.stat(plate_map_file).st_size,
498
+ "userGroupId": space,
499
+ }
500
+ ]
501
+ },
502
+ )
503
+
504
+ if (
505
+ plate_map_response.status_code != 200
506
+ or not plate_map_response.json()
507
+ or "created" not in plate_map_response.json()
508
+ ):
509
+ raise ValueError(
510
+ "Failed to upload raw files to PAS. Please check your connection and reauthenticate."
511
+ )
512
+
513
+ # Step 5: Populate `raw_file_paths` for sample upload.
514
+ for file in ms_data_files:
515
+ filename = os.path.basename(file)
516
+ filesize = os.stat(file).st_size
517
+ raw_file_paths[f"{filename}"] = (
518
+ f"/{s3_bucket}/{s3_upload_path}{filename}"
519
+ )
520
+
521
+ sample_info = get_sample_info(
522
+ id_uuid,
523
+ plate_map_file,
524
+ space,
525
+ sample_description_file,
526
+ )
527
+
528
+ # Step 6: Get sample info from the plate map file and make a call to `/api/v1/samples` with the sample_info. This returns the plateId, sampleId and sampleName for each sample in the plate map file. Also validate and upload the sample_description_file if it exists.
529
+ if sample_description_file:
530
+
531
+ sdf_upload = upload_file(
532
+ sample_description_file,
533
+ s3_bucket,
534
+ credentials,
535
+ f"{s3_upload_path}{os.path.basename(sample_description_file)}",
536
+ )
537
+
538
+ if not sdf_upload:
539
+ raise ValueError(
540
+ "Failed to upload sample description file to AWS. Please check your connection and reauthenticate."
541
+ )
542
+
543
+ with self._get_auth_session() as s:
544
+ sdf_response = s.post(
545
+ f"{self._auth.url}api/v1/msdataindex/file",
546
+ json={
547
+ "files": [
548
+ {
549
+ "filePath": f"{s3_upload_path}{os.path.basename(sample_description_file)}",
550
+ "fileSize": os.stat(
551
+ sample_description_file
552
+ ).st_size,
553
+ "userGroupId": space,
554
+ }
555
+ ]
556
+ },
557
+ )
558
+
559
+ if (
560
+ sdf_response.status_code != 200
561
+ or not sdf_response.json()
562
+ or "created" not in sdf_response.json()
563
+ ):
564
+ raise ValueError(
565
+ "Failed to upload sample description file to PAS DB. Please check your connection and reauthenticate."
566
+ )
567
+
568
+ samples = self._add_samples(sample_info)
569
+
570
+ # Step 7: Parse the plate map file and convert the data into a form that can be POSTed to `/api/v1/msdatas`.
571
+ plate_map_data = parse_plate_map_file(
572
+ plate_map_file, samples, raw_file_paths, space
573
+ )
574
+
575
+ # Step 8: Make a request to `/api/v1/msdatas/batch` with the processed samples data.
576
+ with self._get_auth_session() as s:
577
+ ms_data_response = s.post(
578
+ f"{self._auth.url}api/v1/msdatas/batch",
579
+ json={"msdatas": plate_map_data},
580
+ )
581
+ if ms_data_response.status_code != 200:
582
+ raise ValueError(
583
+ "Failed to create samples in PAS. Please check your connection and reauthenticate."
584
+ )
585
+
586
+ # Step 9: Upload each msdata file to the S3 bucket.
587
+ with self._get_auth_session() as s:
588
+ config_response = s.get(
589
+ f"{self._auth.url}auth/getawscredential",
590
+ )
591
+
592
+ if (
593
+ config_response.status_code != 200
594
+ or not config_response.json()
595
+ ):
596
+ raise ValueError("Could not fetch config for user.")
597
+
598
+ if "S3Bucket" not in config_response.json()["credentials"]:
599
+ raise ValueError(
600
+ "Failed to connect to AWS. Please check your connection and reauthenticate."
601
+ )
602
+
603
+ credentials = config_response.json()["credentials"]
604
+
605
+ os.environ["AWS_ACCESS_KEY_ID"] = credentials["AccessKeyId"]
606
+ os.environ["AWS_SECRET_ACCESS_KEY"] = credentials[
607
+ "SecretAccessKey"
608
+ ]
609
+ os.environ["AWS_SESSION_TOKEN"] = credentials["SessionToken"]
610
+
611
+ for file in ms_data_files:
612
+ filename = os.path.basename(file)
613
+ filesize = os.stat(file).st_size
614
+ res = upload_file(
615
+ file, s3_bucket, credentials, f"{s3_upload_path}{filename}"
616
+ )
617
+
618
+ if not res:
619
+ raise ValueError(
620
+ "Failed to upload MS data files to AWS. Please check your connection and reauthenticate."
621
+ )
622
+
623
+ files.append(
624
+ {
625
+ "filePath": f"{s3_upload_path}{filename}",
626
+ "fileSize": filesize,
627
+ "userGroupId": space,
628
+ }
629
+ )
630
+
631
+ # Step 10: Make a call to `api/v1/msdataindex/file` to sync with frontend. This should only be done after all files have finished uploading, simulating an async "promise"-like scenario in JavaScript.
632
+ with self._get_auth_session() as s:
633
+ file_response = s.post(
634
+ f"{self._auth.url}api/v1/msdataindex/file",
635
+ json={"files": files},
636
+ )
637
+
638
+ if (
639
+ file_response.status_code != 200
640
+ or not file_response.json()
641
+ or "created" not in file_response.json()
642
+ ):
643
+ raise ValueError(
644
+ "Failed to update PAS MS Files view. Your data has been uploaded."
645
+ )
646
+
647
+ if os.path.exists("generated_files") and not dir_exists:
648
+ shutil.rmtree("generated_files")
649
+
650
+ print(f"Plate generated with id: '{id_uuid}'")
651
+ return id_uuid
652
+
653
+ def start_analysis(
654
+ self,
655
+ name: str,
656
+ project_id: str,
657
+ sample_ids: list = None,
658
+ analysis_protocol_name: str = None,
659
+ analysis_protocol_id: str = None,
660
+ notes: str = "",
661
+ description: str = "",
662
+ space: str = None,
663
+ filter: str = None,
664
+ ):
665
+ """
666
+ Given a name, analysis_protocol_id, project_id, creates a new analysis for the authenticated user.
667
+
668
+ Parameters
669
+ ----------
670
+ name : str
671
+ Name of the analysis.
672
+
673
+ project_id : str
674
+ ID of the project to which the analysis belongs. Can be fetched using the get_project_metadata() function.
675
+
676
+ sample_ids: list[str], optional
677
+ List of sample IDs to be used for the analysis. Should be omitted if analysis is to be run with all samples.
678
+
679
+ analysis_protocol_name : str, optional
680
+ Name of the analysis protocol to be used for the analysis. Can be fetched using the get_analysis_protocols() function. Should be omitted if analysis_protocol_id is provided.
681
+
682
+ analysis_protocol_id : str, optional
683
+ ID of the analysis protocol to be used for the analysis. Can be fetched using the get_analysis_protocols() function. Should be omitted if analysis_protocol_name is provided.
684
+
685
+ notes : str, optional
686
+ Notes for the analysis, defaulted to an empty string.
687
+
688
+ description : str, optional
689
+ Description for the analysis, defaulted to an empty string.
690
+
691
+ space : str, optional
692
+ ID of the user group to which the analysis belongs, defaulted to None.
693
+
694
+ filter : str, optional
695
+ Filter to be applied to the samples, defaulted to None. Acceptable values are 'sample', 'control', or None.
696
+
697
+ Returns
698
+ -------
699
+ dict
700
+ Contains message whether the analysis was started or not.
701
+
702
+ Examples
703
+ -------
704
+ >>> from core import SeerSDK
705
+ >>> seer_sdk = SeerSDK()
706
+ >>> seer_sdk.start_analysis("YOUR_ANALYSIS_NAME_HERE", "YOUR_PROJECT_ID_HERE", "YOUR_ANALYSIS_PROTOCOL_ID_HERE")
707
+ >>> { "message": "Analysis has been started successfully" }
708
+ """
709
+
710
+ if not name:
711
+ raise ValueError("Analysis name cannot be empty.")
712
+
713
+ if not project_id:
714
+ raise ValueError("Project ID cannot be empty.")
715
+
716
+ if not analysis_protocol_id and analysis_protocol_name:
717
+ valid_analysis_protocol = self.get_analysis_protocols(
718
+ analysis_protocol_name=analysis_protocol_name
719
+ )
720
+
721
+ if not valid_analysis_protocol:
722
+ raise ValueError(
723
+ f"Analysis protocol not found with name {analysis_protocol_name}."
724
+ )
725
+
726
+ analysis_protocol_id = valid_analysis_protocol[0]["id"]
727
+
728
+ if analysis_protocol_id and not analysis_protocol_name:
729
+ valid_analysis_protocol = self.get_analysis_protocols(
730
+ analysis_protocol_id=analysis_protocol_id
731
+ )
732
+
733
+ if not valid_analysis_protocol:
734
+ raise ValueError(
735
+ f"Analysis protocol not found with ID {analysis_protocol_id}."
736
+ )
737
+
738
+ if not analysis_protocol_id and not analysis_protocol_name:
739
+ raise ValueError(
740
+ "You must specify either analysis protocol ID or analysis protocol name."
741
+ )
742
+
743
+ if sample_ids:
744
+ valid_ids = [
745
+ entry["id"]
746
+ for entry in self.get_samples_metadata(project_id=project_id)
747
+ ]
748
+
749
+ for sample_id in sample_ids:
750
+ if sample_id not in valid_ids:
751
+ raise ValueError(
752
+ f"Sample ID '{sample_id}' is either not valid or not associated with the project."
753
+ )
754
+ if filter:
755
+ sample_ids = self._filter_samples_metadata(
756
+ project_id, filter, sample_ids
757
+ )
758
+
759
+ URL = f"{self._auth.url}api/v1/analyze"
760
+
761
+ with self._get_auth_session() as s:
762
+ req_payload = {
763
+ "analysisName": name,
764
+ "analysisProtocolId": analysis_protocol_id,
765
+ "projectId": project_id,
766
+ "notes": notes,
767
+ "description": description,
768
+ "userGroupId": space,
769
+ }
770
+
771
+ if sample_ids:
772
+ sample_ids = ",".join(sample_ids)
773
+ req_payload["selectedSampleIDs"] = sample_ids
774
+
775
+ analysis = s.post(URL, json=req_payload)
776
+
777
+ if analysis.status_code != 200:
778
+ raise ValueError(
779
+ "Failed to start analysis. Please check your connection."
780
+ )
781
+
782
+ # Analysis id is not contained in response.
783
+ return analysis.json()
784
+
785
+ def upload_ms_data_files(
786
+ self,
787
+ ms_data_files: list,
788
+ path: str,
789
+ space: str = None,
790
+ filenames=[],
791
+ ):
792
+ """
793
+ Upload MS data files to the backend.
794
+
795
+ Parameters
796
+ ----------
797
+ ms_data_files : List
798
+ List of MS data files to be uploaded.
799
+ path : str
800
+ The name of the destination folder in PAS. Does not accept leading, trailing or consecutive forward slashes. Example: "path/to/pas/folder".
801
+ space: str, optional
802
+ ID of the user group to which the files belongs, defaulted to None.
803
+ filenames: list, optional
804
+ List of preferred PAS filenames. This rename occurs on both the cloud and the user interface level. Indexes should be mapped to the correlating source file in ms_data_files. Folder paths are not accepted. Defaulted to [].
805
+
806
+ Returns
807
+ -------
808
+ dict
809
+ Contains message whether the files were uploaded or not.
810
+
811
+ Examples
812
+ -------
813
+ >>> from core import SeerSDK
814
+ >>> seer_sdk = SeerSDK()
815
+ >>> seer_sdk.upload_ms_data_files(["/path/to/file1", "/path/to/file2"])
816
+ [{'filePath': '/path/to/file1', 'fileSize': 1234, 'userGroupId': None}, {'filePath': '/path/to/file2', 'fileSize': 1234, 'userGroupId': None}]
817
+
818
+ >>> seer_sdk.upload_ms_data_files(["/path/to/file1", "/path/to/file2"], path="path/to/pas/folder")
819
+ [{'filePath': 'path/to/pas/folder/file1', 'fileSize': 1234, 'userGroupId': None}, {'filePath': 'path/to/pas/folder/file2', 'fileSize': 1234, 'userGroupId': None}]
820
+
821
+ >>> seer_sdk.upload_ms_data_files(["/path/to/file1", "/path/to/file2"], path="path/to/pas/folder", space="user_group_id")
822
+ [{'filePath': 'path/to/pas/folder/file1', 'fileSize': 1234, 'userGroupId': 'user_group_id'}, {'filePath': 'path/to/pas/folder/file2', 'fileSize': 1234, 'userGroupId': 'user_group_id'}]
823
+
824
+ >>> seer_sdk.upload_ms_data_files(["/path/to/file1", "/path/to/file2"], path="path/to/pas/folder", space="user_group_id", filenames=["fileA", "fileB"])
825
+ [{'filePath': 'path/to/pas/folder/fileA', 'fileSize': 1234, 'userGroupId': 'user_group_id'}, {'filePath': 'path/to/pas/folder/fileB', 'fileSize': 1234, 'userGroupId': 'user_group_id'}]
826
+
827
+ """
828
+
829
+ files = []
830
+ tenant_id = self._auth.active_tenant_id
831
+ s3_bucket = ""
832
+
833
+ if not path:
834
+ raise ValueError(
835
+ "A folder path is required to upload files into PAS."
836
+ )
837
+
838
+ # Step 1: Check if paths and file extensions are valid.
839
+ for file in ms_data_files:
840
+ if not valid_ms_data_file(file):
841
+ raise ValueError(
842
+ "Invalid file or file format. Please check your file."
843
+ )
844
+
845
+ extensions = set(
846
+ [os.path.splitext(file.lower())[1] for file in ms_data_files]
847
+ )
848
+
849
+ if filenames and ".d.zip" in extensions:
850
+ raise ValueError(
851
+ "Please leave the 'filenames' parameter empty when working with .d.zip files. SeerSDK.rename_d_zip_file() is available for this use case."
852
+ )
853
+ # Step 2: Use active tenant to fetch the tenant_id.
854
+ tenant_id = self.get_active_tenant_id()
855
+
856
+ # Step 3: Fetch the S3 bucket name by making a call to `/api/v1/auth/getawscredential`
857
+ with self._get_auth_session() as s:
858
+ config_response = s.get(
859
+ f"{self._auth.url}auth/getawscredential",
860
+ )
861
+
862
+ if (
863
+ config_response.status_code != 200
864
+ or not config_response.json()
865
+ ):
866
+ raise ValueError("Could not fetch config for user.")
867
+
868
+ if "S3Bucket" not in config_response.json()["credentials"]:
869
+ raise ValueError(
870
+ "Could not fetch config for user - incomplete response."
871
+ )
872
+
873
+ s3_bucket = config_response.json()["credentials"]["S3Bucket"]
874
+
875
+ credentials = config_response.json()["credentials"]
876
+
877
+ # Step 4: Upload each msdata file to the S3 bucket.
878
+ for i, file in enumerate(ms_data_files):
879
+ filename = (
880
+ filenames[i]
881
+ if filenames
882
+ else os.path.basename(file).replace("/", "")
883
+ )
884
+ filesize = os.stat(file).st_size
885
+ s3_upload_path = (
886
+ f"{tenant_id}" if not path else f"{tenant_id}/{path}"
887
+ )
888
+
889
+ res = upload_file(
890
+ file, s3_bucket, credentials, f"{s3_upload_path}/{filename}"
891
+ )
892
+
893
+ if not res:
894
+ raise ServerError(
895
+ f"Failed to upload to cloud storage. {filename}"
896
+ )
897
+
898
+ files.append(
899
+ {
900
+ "filePath": f"{s3_upload_path}/{filename}",
901
+ "fileSize": filesize,
902
+ "userGroupId": space,
903
+ }
904
+ )
905
+
906
+ # Step 5: Make a call to `/api/v1/msdataindex/file` to sync with frontend. This should only be done after all files have finished uploading, simulating an async "promise"-like scenario in JavaScript.
907
+ result_files = None
908
+ with self._get_auth_session() as s:
909
+ file_response = s.post(
910
+ f"{self._auth.url}api/v1/msdataindex/file",
911
+ json={"files": files},
912
+ )
913
+
914
+ if (
915
+ file_response.status_code != 200
916
+ or not file_response.json()
917
+ or "created" not in file_response.json()
918
+ ):
919
+ raise ServerError("Could not upload MS Files to PAS.")
920
+ result_files = file_response.json()["files"]
921
+
922
+ # omit tenant_id from return file path
923
+ for result in result_files:
924
+ result["filePath"] = "/".join(result["filePath"].split("/")[1:])
925
+
926
+ print(
927
+ f"Files uploaded successfully to {self.get_active_tenant_name()}."
928
+ )
929
+
930
+ return result_files
931
+
932
+ def _move_ms_data_files(
933
+ self,
934
+ source_data_files: _List,
935
+ target_data_files: _List,
936
+ target_space: str = None,
937
+ ):
938
+ """
939
+ Move MS data files from one location to another.
940
+
941
+ Parameters
942
+ ----------
943
+ source_data_files : List
944
+ List of MS data files to be moved.
945
+ target_data_files : List
946
+ List of target MS data files.
947
+ target_space : str, optional
948
+ Name of the user group to move the files to.
949
+ If None is provided, the files will remain in the same space prior to the move action.
950
+
951
+ Returns
952
+ -------
953
+ list
954
+ The list of files moved.
955
+
956
+ Examples
957
+ -------
958
+ >>> from core import SeerSDK
959
+ >>> seer_sdk = SeerSDK()
960
+ >>> seer_sdk.move_ms_data_files(["/path/to/file1", "/path/to/file2"], ["/path/to/target_file1", "/path/to/target_file2"])
961
+ ["/path/to/target_file1", "/path/to/target_file2"]
962
+ """
963
+
964
+ tenant_id = self._auth.active_tenant_id
965
+
966
+ if not source_data_files:
967
+ raise ValueError("Source data files cannot be empty.")
968
+
969
+ if len(source_data_files) != len(target_data_files):
970
+ raise ValueError(
971
+ "Source and target files should have the same number of files."
972
+ )
973
+
974
+ folder_paths = list({os.path.dirname(x) for x in source_data_files})
975
+ if len(folder_paths) > 1:
976
+ raise ValueError(
977
+ "Files can only be moved from one folder path at a time."
978
+ )
979
+ folder_path = f"{tenant_id}/{folder_paths[0]}"
980
+
981
+ target_folder_paths = list(
982
+ {os.path.dirname(x) for x in target_data_files}
983
+ )
984
+ if len(target_folder_paths) > 1:
985
+ raise ValueError(
986
+ "Files can only be moved to one folder path at a time."
987
+ )
988
+
989
+ available_spaces = self.get_spaces()
990
+ target_space_id = None
991
+ if target_space:
992
+ target_spaces = [
993
+ x["id"]
994
+ for x in available_spaces
995
+ if x["usergroup_name"].lower() == target_space.lower()
996
+ ]
997
+ if not target_spaces:
998
+ raise ValueError(
999
+ f"Target space not found with name {target_space}. Please correct this value."
1000
+ )
1001
+ target_space_id = target_spaces[0]
1002
+
1003
+ target_folder_path = f"{tenant_id}/{target_folder_paths[0]}"
1004
+ # Retrieve msdatafileindex metadata to determine source space
1005
+ base_space = None
1006
+ with self._get_auth_session() as s:
1007
+ URL = self._auth.url + "api/v1/msdataindex/getmetadata"
1008
+ params = {"folderKey": folder_path}
1009
+ r = s.get(URL, params=params)
1010
+ if r.status_code != 200:
1011
+ raise ValueError("Failed to locate source files in PAS.")
1012
+ data = r.json()["files"]
1013
+ found_files = [
1014
+ x
1015
+ for x in data
1016
+ if x["filename"]
1017
+ in [os.path.basename(x) for x in source_data_files]
1018
+ ]
1019
+ if len(found_files) != len(source_data_files):
1020
+ raise ValueError(
1021
+ "Not all source files were found in the source folder."
1022
+ )
1023
+ spaces = list({x["userGroupId"] for x in found_files})
1024
+ if len(spaces) > 1:
1025
+ raise ValueError(
1026
+ "Files are located in multiple spaces. Please separate these into multiple move requests."
1027
+ )
1028
+ base_space = spaces[0]
1029
+
1030
+ if not target_space:
1031
+ target_space_id = base_space
1032
+
1033
+ json = {
1034
+ "type": "file",
1035
+ "sourceFolder": folder_path,
1036
+ "targetFolder": target_folder_path,
1037
+ "sourceFiles": [os.path.basename(x) for x in source_data_files],
1038
+ "targetFiles": [os.path.basename(x) for x in target_data_files],
1039
+ }
1040
+
1041
+ # we must specify base_space if not General because it's a criteria for finding source files.
1042
+ if base_space:
1043
+ json["sourceUserGroupId"] = base_space
1044
+
1045
+ # If target space is General, we still omit it
1046
+ if target_space_id and base_space != target_space_id:
1047
+ json["targetUserGroupId"] = target_space_id
1048
+
1049
+ with self._get_auth_session() as s:
1050
+ URL = self._auth.url + "api/v1/msdataindex/move"
1051
+ json = json
1052
+ r = s.post(URL, json=json)
1053
+ if r.status_code != 200:
1054
+ raise ServerError("Failed to move files in PAS.")
1055
+ return target_data_files
1056
+
1057
+ def change_ms_file_space(
1058
+ self, ms_data_files: _List, destination_space: str
1059
+ ):
1060
+ """
1061
+ Change the space of MS data files.
1062
+
1063
+ Parameters
1064
+ ----------
1065
+ ms_data_files : List
1066
+ List of MS data files to be moved.
1067
+ destination_space : str
1068
+ name of the desired user group
1069
+
1070
+ Returns
1071
+ -------
1072
+ List
1073
+ List of files that were converted to the new space.
1074
+ """
1075
+ return self._move_ms_data_files(
1076
+ ms_data_files, ms_data_files, destination_space
1077
+ )
1078
+
1079
+ def move_ms_data_files(
1080
+ self, source_ms_data_files: _List, target_ms_data_files: _List
1081
+ ):
1082
+ """
1083
+ Move MS data files from one PAS file location to another. Space will be unchanged.
1084
+
1085
+ Parameters
1086
+ ----------
1087
+ source_ms_data_files : List
1088
+ List of file paths of the MS data files to be moved.
1089
+ target_ms_data_files : List
1090
+ List of destination file paths. Should be indexed one to one with the source ms data files list.
1091
+
1092
+ Returns
1093
+ -------
1094
+ List
1095
+ List of files that were moved.
1096
+ """
1097
+ return self._move_ms_data_files(
1098
+ source_ms_data_files, target_ms_data_files
1099
+ )
1100
+
1101
+ def download_analysis_files(
1102
+ self, analysis_id: str, download_path: str = "", file_name: str = ""
1103
+ ):
1104
+ """
1105
+ Download a specific analysis file from the backend given an `analysis_id` to the specified `download_path`.
1106
+
1107
+ If no `download_path` is specified, the file will be downloaded to the current working directory.
1108
+
1109
+ If no `file_name` is specified, all files for the analysis will be downloaded.
1110
+
1111
+ Parameters
1112
+ ----------
1113
+ analysis_id : str
1114
+ ID of the analysis to download.
1115
+ download_path : str, optional
1116
+ Path to download the analysis file to, defaulted to current working directory.
1117
+ file_name : str, optional
1118
+ Name of the analysis file to download, defaulted to None.
1119
+
1120
+ Returns
1121
+ -------
1122
+ dict
1123
+ Message containing whether the file was downloaded or not.
1124
+
1125
+ Examples
1126
+ -------
1127
+ >>> from core import SeerSDK
1128
+ >>> sdk = SeerSDK()
1129
+ >>> sdk.download_analysis_files("analysis_id", "/path/to/download")
1130
+ >>> Downloading EXP22006_2022ms0031bX25_B_BA4_1_4768/diann.log
1131
+ Finished downloading EXP22006_2022ms0031bX25_B_BA4_1_4768/diann.log
1132
+
1133
+ Downloading EXP20004_2020ms0007X11_A.mzML.quant
1134
+ Finished downloading EXP20004_2020ms0007X11_A.mzML.quant
1135
+
1136
+ Downloading EXP20004_2020ms0007X11_A/0714-diann181-libfree-mbr.json
1137
+ Finished downloading EXP20004_2020ms0007X11_A/0714-diann181-libfree-mbr.json
1138
+
1139
+ Downloading EXP20004_2020ms0007X11_A/diann.log
1140
+ Finished downloading EXP20004_2020ms0007X11_A/diann.log
1141
+ >>> { "message": "File downloaded successfully." }
1142
+ """
1143
+
1144
+ def get_url(analysis_id, file_name, project_id):
1145
+ URL = f"{self._auth.url}api/v1/analysisResultFiles/getUrl"
1146
+
1147
+ with self._get_auth_session() as s:
1148
+
1149
+ download_url = s.post(
1150
+ URL,
1151
+ json={
1152
+ "analysisId": analysis_id,
1153
+ "filename": file_name,
1154
+ "projectId": project_id,
1155
+ },
1156
+ )
1157
+
1158
+ if download_url.status_code != 200:
1159
+ raise ValueError(
1160
+ "Could not download file. Please check if the analysis ID is valid or the backend is running."
1161
+ )
1162
+
1163
+ return download_url.json()["url"]
1164
+
1165
+ if not analysis_id:
1166
+ raise ValueError("Analysis ID cannot be empty.")
1167
+
1168
+ try:
1169
+ valid_analysis = self.get_analysis(analysis_id)[0]
1170
+ except:
1171
+ raise ValueError(
1172
+ "Invalid analysis ID. Please check if the analysis ID is valid or the backend is running."
1173
+ )
1174
+
1175
+ project_id = valid_analysis["project_id"]
1176
+
1177
+ if not download_path:
1178
+ download_path = os.getcwd()
1179
+ print(f"\nDownload path not specified.\n")
1180
+
1181
+ if not os.path.isdir(download_path):
1182
+ print(
1183
+ f'\nThe path "{download_path}" you specified does not exist, was either invalid or not absolute.\n'
1184
+ )
1185
+ download_path = os.getcwd()
1186
+
1187
+ name = f"{download_path}/downloads/{analysis_id}"
1188
+
1189
+ if not os.path.exists(name):
1190
+ os.makedirs(name)
1191
+
1192
+ URL = f"{self._auth.url}api/v1/analysisResultFiles"
1193
+
1194
+ with self._get_auth_session() as s:
1195
+
1196
+ analysis_files = s.get(f"{URL}/{analysis_id}")
1197
+
1198
+ if analysis_files.status_code != 200:
1199
+ raise ValueError(
1200
+ "Invalid request. Please check if the analysis ID is valid or the backend is running."
1201
+ )
1202
+
1203
+ res = analysis_files.json()
1204
+
1205
+ if file_name:
1206
+ filenames = set([file["filename"] for file in res])
1207
+
1208
+ if file_name not in filenames:
1209
+ raise ValueError(
1210
+ "Invalid file name. Please check if the file name is correct."
1211
+ )
1212
+
1213
+ res = [file for file in res if file["filename"] == file_name]
1214
+
1215
+ print(f'Downloading files to "{name}"\n')
1216
+
1217
+ for file in res:
1218
+ filename = file["filename"]
1219
+ url = get_url(analysis_id, filename, project_id)
1220
+
1221
+ print(f"Downloading {filename}")
1222
+
1223
+ for _ in range(2):
1224
+ try:
1225
+ with tqdm(
1226
+ unit="B",
1227
+ unit_scale=True,
1228
+ unit_divisor=1024,
1229
+ miniters=1,
1230
+ desc=f"Progress",
1231
+ ) as t:
1232
+ ssl._create_default_https_context = (
1233
+ ssl._create_unverified_context
1234
+ )
1235
+ urllib.request.urlretrieve(
1236
+ url,
1237
+ f"{name}/{filename}",
1238
+ reporthook=download_hook(t),
1239
+ data=None,
1240
+ )
1241
+ break
1242
+ except:
1243
+ filename = filename.split("/")
1244
+ name += "/" + "/".join(
1245
+ [filename[i] for i in range(len(filename) - 1)]
1246
+ )
1247
+ filename = filename[-1]
1248
+ if not os.path.isdir(f"{name}/{filename}"):
1249
+ os.makedirs(f"{name}/")
1250
+
1251
+ else:
1252
+ raise ValueError(
1253
+ "Your download failed. Please check if the backend is still running."
1254
+ )
1255
+
1256
+ print(f"Finished downloading {filename}\n")
1257
+
1258
+ return {
1259
+ "message": f"Files downloaded successfully to '{download_path}/downloads/{analysis_id}'"
1260
+ }
1261
+
1262
+ def link_plate(
1263
+ self,
1264
+ ms_data_files: _List[str],
1265
+ plate_map_file: str,
1266
+ plate_id: str,
1267
+ plate_name: str,
1268
+ sample_description_file: str = None,
1269
+ space: str = None,
1270
+ ):
1271
+ """
1272
+ Links existing MS data files to user uploaded files to create a new plate.
1273
+
1274
+ Parameters
1275
+ ----------
1276
+ ms_data_files : list[str]
1277
+ Path to MS data files on the PAS backend or S3 bucket.
1278
+ plate_map_file : str
1279
+ Path to the plate map file to be linked.
1280
+ plate_id : str
1281
+ ID of the plate to be linked.
1282
+ plate_name : str
1283
+ Name of the plate to be linked.
1284
+ sample_description_file : str, optional
1285
+ Path to the sample description file to be linked, defaulted to None.
1286
+ space : str, optional
1287
+ ID of the user group to which the files belongs, defaulted to None.
1288
+
1289
+ Returns
1290
+ -------
1291
+ dict
1292
+ Contains the message whether the plate was created or not.
1293
+
1294
+ Examples
1295
+ -------
1296
+ >>> from core import SeerSDK
1297
+ >>> sdk = SeerSDK()
1298
+ >>> sdk.link_plate(["/path/to/file1", "/path/to/file2"], "/path/to/plate_map_file", "plate_id", "plate_name")
1299
+ >>> { "message": "Plate generated with id: 'plate_id'" }
1300
+ """
1301
+
1302
+ plate_ids = (
1303
+ set()
1304
+ ) # contains all the plate_ids fetched from self.get_plate_metadata()
1305
+ files = [] # to be uploaded to sync frontend
1306
+ samples = [] # list of all the sample responses from the backend
1307
+ id_uuid = "" # uuid for the plate id
1308
+ raw_file_paths = {} # list of all the AWS raw file paths
1309
+ s3_upload_path = None
1310
+ s3_bucket = ""
1311
+ ms_data_file_names = []
1312
+ dir_exists = (
1313
+ True # flag to check if the generated_files directory exists
1314
+ )
1315
+
1316
+ # Step 0: Check if the file paths exist in the S3 bucket.
1317
+ for file in ms_data_files:
1318
+ if not self.list_ms_data_files(file):
1319
+ raise ValueError(
1320
+ f"File '{file}' does not exist. Please check your parameters."
1321
+ )
1322
+
1323
+ if sample_description_file and not os.path.exists(
1324
+ sample_description_file
1325
+ ):
1326
+ raise ValueError(
1327
+ f"File path '{sample_description_file}' is invalid. Please check your parameters."
1328
+ )
1329
+
1330
+ # Validate plate id, plate name as entity names
1331
+ # Enforcing this on the SDK level to prevent the creation of empty records before the backend validation
1332
+ if not entity_name_ruler(plate_id):
1333
+ raise ValueError("Plate ID contains unsupported characters.")
1334
+
1335
+ if not entity_name_ruler(plate_name):
1336
+ raise ValueError("Plate Name contains unsupported characters.")
1337
+
1338
+ # Validate plate map
1339
+ if isinstance(plate_map_file, PlateMap):
1340
+ plate_map_data = plate_map_file.to_df()
1341
+ else:
1342
+ plate_map_data = pd.read_csv(plate_map_file)
1343
+
1344
+ validate_plate_map(plate_map_data, ms_data_files)
1345
+
1346
+ # Step 1: Check for duplicates in the user-inputted plate id. Populates `plate_ids` set.
1347
+ with self._get_auth_session() as s:
1348
+ plate_response = s.get(f"{self._auth.url}api/v1/plateids")
1349
+
1350
+ if plate_response.status_code != 200:
1351
+ raise ServerError(
1352
+ "Failed to fetch plate ids from the server. Please check your connection and reauthenticate."
1353
+ )
1354
+
1355
+ plate_ids = set(plate_response.json()["data"])
1356
+
1357
+ if not plate_ids:
1358
+ raise ServerError(
1359
+ "No plate ids returned from the server. Please reattempt."
1360
+ )
1361
+
1362
+ # Step 2: Fetch the UUID that needs to be passed into the backend from `/api/v1/plates` to fetch the AWS upload config and raw file path. This will sync the plates backend with samples when the user uploads later. This UUID will also be void of duplicates since duplication is handled by the backend.
1363
+
1364
+ with self._get_auth_session() as s:
1365
+ plate_response = s.post(
1366
+ f"{self._auth.url}api/v1/plates",
1367
+ json={
1368
+ "plateId": plate_id,
1369
+ "plateName": plate_name,
1370
+ "plateUserGroup": space,
1371
+ },
1372
+ )
1373
+
1374
+ if plate_response.status_code != 200:
1375
+ raise ServerError(
1376
+ "Failed to connect to the server. Please check your connection and reauthenticate."
1377
+ )
1378
+
1379
+ id_uuid = plate_response.json()["id"]
1380
+
1381
+ if not id_uuid:
1382
+ raise ServerError(
1383
+ "Failed to fetch a UUID from the server. Please check your connection and reauthenticate."
1384
+ )
1385
+
1386
+ # Step 3: Fetch AWS upload config from the backend with the plateId we just generated. Populates `s3_upload_path` and `s3_bucket` global variables.
1387
+ with self._get_auth_session() as s:
1388
+ config_response = s.post(
1389
+ f"{self._auth.url}api/v1/msdatas/getuploadconfig",
1390
+ json={"plateId": id_uuid},
1391
+ )
1392
+
1393
+ if (
1394
+ config_response.status_code != 200
1395
+ or not config_response.json()
1396
+ ):
1397
+ raise ServerError(
1398
+ "Failed to fetch AWS upload config for the plate. Please check your connection and reauthenticate."
1399
+ )
1400
+
1401
+ if "s3Bucket" not in config_response.json():
1402
+ raise ServerError(
1403
+ "Failed to fetch the S3 bucket from AWS. Please check your connection and reauthenticate."
1404
+ )
1405
+ elif "s3UploadPath" not in config_response.json():
1406
+ raise ServerError(
1407
+ "Failed to fetch the S3 upload path from AWS. Please check your connection and reauthenticate."
1408
+ )
1409
+
1410
+ s3_bucket = config_response.json()["s3Bucket"]
1411
+ s3_upload_path = config_response.json()["s3UploadPath"]
1412
+
1413
+ with self._get_auth_session() as s:
1414
+ config_response = s.get(
1415
+ f"{self._auth.url}auth/getawscredential",
1416
+ )
1417
+
1418
+ if (
1419
+ config_response.status_code != 200
1420
+ or not config_response.json()
1421
+ ):
1422
+ raise ServerError(
1423
+ "Failed to fetch credentials. Please check your connection and reauthenticate."
1424
+ )
1425
+
1426
+ if "S3Bucket" not in config_response.json()["credentials"]:
1427
+ raise ServerError(
1428
+ "Failed to fetch data from AWS. Please check your connection and reauthenticate."
1429
+ )
1430
+
1431
+ credentials = config_response.json()["credentials"]
1432
+
1433
+ # Step 4: Upload the platemap file to the S3 bucket.
1434
+ if isinstance(plate_map_file, PlateMap):
1435
+ plate_map_file_name = f"plateMap_{id_uuid}.csv"
1436
+
1437
+ if not os.path.exists("generated_files"):
1438
+ dir_exists = False
1439
+ os.makedirs("generated_files")
1440
+
1441
+ plate_map_file.to_csv(f"generated_files/{plate_map_file_name}")
1442
+ plate_map_file = f"generated_files/{plate_map_file_name}"
1443
+
1444
+ else:
1445
+ plate_map_file_name = os.path.basename(plate_map_file)
1446
+
1447
+ res = upload_file(
1448
+ plate_map_file,
1449
+ s3_bucket,
1450
+ credentials,
1451
+ f"{s3_upload_path}{plate_map_file_name}",
1452
+ )
1453
+
1454
+ if not res:
1455
+ raise ServerError(
1456
+ "Failed to upload plate map to AWS. Please check your connection and reauthenticate."
1457
+ )
1458
+
1459
+ with self._get_auth_session() as s:
1460
+ plate_map_response = s.post(
1461
+ f"{self._auth.url}api/v1/msdataindex/file",
1462
+ json={
1463
+ "files": [
1464
+ {
1465
+ "filePath": f"{s3_upload_path}{plate_map_file_name}",
1466
+ "fileSize": os.stat(plate_map_file).st_size,
1467
+ "userGroupId": space,
1468
+ }
1469
+ ]
1470
+ },
1471
+ )
1472
+
1473
+ if (
1474
+ plate_map_response.status_code != 200
1475
+ or not plate_map_response.json()
1476
+ or "created" not in plate_map_response.json()
1477
+ ):
1478
+ raise ServerError(
1479
+ "Failed to upload raw files to PAS. Please check your connection and reauthenticate."
1480
+ )
1481
+
1482
+ # Step 5: Populate `raw_file_paths` for sample upload.
1483
+ raw_file_paths = self._get_msdataindex_path(ms_data_files)
1484
+
1485
+ # Step 5.5: trim display paths to basename to align with plate map
1486
+ raw_file_paths = {
1487
+ os.path.basename(k): v for k, v in raw_file_paths.items()
1488
+ }
1489
+
1490
+ # Step 6: Get sample info from the plate map file and make a call to `/api/v1/samples` with the sample_info. This returns the plateId, sampleId and sampleName for each sample in the plate map file. Also validate and upload the sample_description_file if it exists.
1491
+ sample_info = get_sample_info(
1492
+ id_uuid,
1493
+ plate_map_file,
1494
+ space,
1495
+ sample_description_file,
1496
+ )
1497
+ if sample_description_file:
1498
+ sdf_upload = upload_file(
1499
+ sample_description_file,
1500
+ s3_bucket,
1501
+ credentials,
1502
+ f"{s3_upload_path}{os.path.basename(sample_description_file)}",
1503
+ )
1504
+
1505
+ if not sdf_upload:
1506
+ raise ValueError(
1507
+ "Failed to upload sample description file to AWS. Please check your connection and reauthenticate."
1508
+ )
1509
+
1510
+ with self._get_auth_session() as s:
1511
+ sdf_response = s.post(
1512
+ f"{self._auth.url}api/v1/msdataindex/file",
1513
+ json={
1514
+ "files": [
1515
+ {
1516
+ "filePath": f"{s3_upload_path}{os.path.basename(sample_description_file)}",
1517
+ "fileSize": os.stat(
1518
+ sample_description_file
1519
+ ).st_size,
1520
+ "userGroupId": space,
1521
+ }
1522
+ ]
1523
+ },
1524
+ )
1525
+
1526
+ if (
1527
+ sdf_response.status_code != 200
1528
+ or not sdf_response.json()
1529
+ or "created" not in sdf_response.json()
1530
+ ):
1531
+ raise ServerError(
1532
+ "Failed to upload sample description file to PAS DB. Please check your connection and reauthenticate."
1533
+ )
1534
+
1535
+ for entry in sample_info:
1536
+ sample = self._add_sample(entry)
1537
+ samples.append(sample)
1538
+
1539
+ # Step 7: Parse the plate map file and convert the data into a form that can be POSTed to `/api/v1/msdatas`.
1540
+ plate_map_data = parse_plate_map_file(
1541
+ plate_map_file, samples, raw_file_paths, space
1542
+ )
1543
+
1544
+ # Step 8: Make a request to `/api/v1/msdatas/batch` with the processed samples data.
1545
+ with self._get_auth_session() as s:
1546
+ ms_data_response = s.post(
1547
+ f"{self._auth.url}api/v1/msdatas/batch",
1548
+ json={"msdatas": plate_map_data},
1549
+ )
1550
+ if ms_data_response.status_code != 200:
1551
+ raise ServerError(
1552
+ "Failed to add samples to plate in PAS. Please check your connection and reauthenticate."
1553
+ )
1554
+
1555
+ print(f"Plate generated with id: '{id_uuid}'")
1556
+ return id_uuid
1557
+
1558
+ def _get_msdataindex_metadata(self, folder=""):
1559
+ """
1560
+ Get metadata for a given file path.
1561
+
1562
+ Raises:
1563
+ ServerError - could not fetch metadata for file.
1564
+
1565
+ Returns:
1566
+ dict: A dictionary containing the metadata for the file.
1567
+ """
1568
+ URL = f"{self._auth.url}api/v2/msdataindex/getmetadata"
1569
+ with self._get_auth_session() as s:
1570
+ params = {"all": "true"}
1571
+ if folder:
1572
+ tenant_id = jwt.decode(
1573
+ self._auth.get_token()[0],
1574
+ options={"verify_signature": False},
1575
+ )["custom:tenantId"]
1576
+ params["folderKey"] = f"{tenant_id}/{folder}"
1577
+ print(params["folderKey"])
1578
+
1579
+ metadata = s.get(URL, params=params)
1580
+
1581
+ if metadata.status_code != 200:
1582
+ print(metadata.text)
1583
+ raise ServerError("Could not fetch metadata for file.")
1584
+
1585
+ return metadata.json()
1586
+
1587
+ def _get_msdataindex_path(self, display_path: list):
1588
+ """
1589
+ Get the underlying cloud file path from the display path.
1590
+
1591
+ Args:
1592
+ display_path (list): A list of file paths as displayed on PAS
1593
+
1594
+ Returns:
1595
+ dict: A dictionary mapping the display path to the raw file path.
1596
+ """
1597
+
1598
+ tenant_id = jwt.decode(
1599
+ self._auth.get_token()[0], options={"verify_signature": False}
1600
+ )["custom:tenantId"]
1601
+ result = {}
1602
+ # partition by folder_path
1603
+ folder_partitions = {os.path.dirname(x): [] for x in display_path}
1604
+ for path in display_path:
1605
+ folder_partitions[os.path.dirname(path)].append(path)
1606
+
1607
+ success = True
1608
+ missing_data_files = []
1609
+ # For every unique folder in the set of MS files, fetch the metadata
1610
+ for folder_path in folder_partitions:
1611
+ try:
1612
+ metadata = {
1613
+ x["key"]: x["rawFilePath"]
1614
+ for x in self._get_msdataindex_metadata(
1615
+ folder=folder_path
1616
+ )["data"]
1617
+ }
1618
+ except:
1619
+ # If the metadata fetch fails, skip the folder
1620
+ continue
1621
+
1622
+ for display_path in folder_partitions[folder_path]:
1623
+ if f"{tenant_id}/{display_path}" not in metadata:
1624
+ if success:
1625
+ success = False
1626
+ missing_data_files.append(display_path)
1627
+ result[display_path] = metadata[f"{tenant_id}/{display_path}"]
1628
+
1629
+ if not success:
1630
+ raise ValueError(
1631
+ f"Could not fetch metadata for the following files: {missing_data_files}"
1632
+ )
1633
+
1634
+ return result