seer-pas-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1229 @@
1
+ from tqdm import tqdm
2
+
3
+ import os
4
+ import jwt
5
+ import requests
6
+ import urllib.request
7
+ import ssl
8
+ import shutil
9
+
10
+ from typing import List as _List
11
+
12
+ from ..common import *
13
+ from ..auth import Auth
14
+ from ..objects import PlateMap
15
+
16
+
17
+ class SeerSDK:
18
+ """
19
+ Object exposing SDK methods. Requires a username and password; the optional `instance` param denotes the instance of PAS (defaults to "US").
20
+
21
+ Examples
22
+ -------
23
+ >>> from core import SeerSDK
24
+ >>> USERNAME = "test"
25
+ >>> PASSWORD = "test-password"
26
+ >>> INSTANCE = "EU"
27
+ >>> seer_sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE)
28
+ """
29
+
30
+ def __init__(self, username, password, instance="US"):
31
+ try:
32
+ self._auth = Auth(username, password, instance)
33
+
34
+ self._auth.get_token()
35
+
36
+ print(f"User '{username}' logged in.\n")
37
+
38
+ except:
39
+ raise ValueError(
40
+ "Could not log in.\nPlease check your credentials and/or instance."
41
+ )
42
+
43
+ def get_spaces(self):
44
+ """
45
+ Fetches a list of spaces for the authenticated user.
46
+
47
+ Returns
48
+ -------
49
+ spaces: list
50
+ List of space objects for the authenticated user.
51
+
52
+ Examples
53
+ -------
54
+ >>> from core import SeerSDK
55
+ >>> seer_sdk = SeerSDK()
56
+ >>> seer_sdk.get_spaces()
57
+ >>> [
58
+ { "usergroup_name": ... },
59
+ { "usergroup_name": ... },
60
+ ...
61
+ ]
62
+ """
63
+
64
+ ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
65
+ HEADERS = {
66
+ "Authorization": f"{ID_TOKEN}",
67
+ "access-token": f"{ACCESS_TOKEN}",
68
+ }
69
+ URL = f"{self._auth.url}api/v1/usergroups"
70
+
71
+ with requests.Session() as s:
72
+ s.headers.update(HEADERS)
73
+
74
+ spaces = s.get(URL)
75
+
76
+ if spaces.status_code != 200:
77
+ raise ValueError(
78
+ "Invalid request. Please check your parameters."
79
+ )
80
+ return spaces.json()
81
+
82
+ def get_plate_metadata(self, plate_id: str = None, df: bool = False):
83
+ """
84
+ Fetches a list of plates for the authenticated user. If no `plate_id` is provided, returns all plates for the authenticated user. If `plate_id` is provided, returns the plate with the given `plate_id`, provided it exists.
85
+
86
+ Parameters
87
+ ----------
88
+ plate_id : str, optional
89
+ ID of the plate to be fetched, defaulted to None.
90
+ df: bool
91
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object
92
+
93
+ Returns
94
+ -------
95
+ plates: list or DataFrame
96
+ List/DataFrame of plate objects for the authenticated user.
97
+
98
+ Examples
99
+ -------
100
+ >>> from core import SeerSDK
101
+ >>> seer_sdk = SeerSDK()
102
+ >>> seer_sdk.get_plate_metadata()
103
+ >>> [
104
+ { "id": ... },
105
+ { "id": ... },
106
+ ...
107
+ ]
108
+ >>> seer_sdk.get_plate_metadata(df=True)
109
+ >>> id ... user_group
110
+ 0 a7c12190-15da-11ee-bdf1-bbaa73585acf ... None
111
+ 1 8c3b1480-15da-11ee-bdf1-bbaa73585acf ... None
112
+ 2 6f158840-15da-11ee-bdf1-bbaa73585acf ... None
113
+ 3 1a8a2920-15da-11ee-bdf1-bbaa73585acf ... None
114
+ 4 7ab47f40-15d9-11ee-bdf1-bbaa73585acf ... None
115
+ .. ... ... ...
116
+ 935 8fa91c00-6621-11ea-96e3-d5a4dab4ebf6 ... None
117
+ 936 53180b20-6621-11ea-96e3-d5a4dab4ebf6 ... None
118
+ 937 5c31fe90-6618-11ea-96e3-d5a4dab4ebf6 ... None
119
+ 938 5b05d440-6610-11ea-96e3-d5a4dab4ebf6 ... None
120
+ 939 9872e3f0-544e-11ea-ad9e-1991e0725494 ... None
121
+
122
+ >>> seer_sdk.get_plate_metadata(id="YOUR_PLATE_ID_HERE")
123
+ >>> [{ "id": ... }]
124
+ """
125
+
126
+ ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
127
+ HEADERS = {
128
+ "Authorization": f"{ID_TOKEN}",
129
+ "access-token": f"{ACCESS_TOKEN}",
130
+ }
131
+ URL = f"{self._auth.url}api/v1/plates"
132
+ res = []
133
+
134
+ with requests.Session() as s:
135
+ s.headers.update(HEADERS)
136
+
137
+ plates = s.get(
138
+ f"{URL}/{plate_id}" if plate_id else URL,
139
+ params={"all": "true"},
140
+ )
141
+ if plates.status_code != 200:
142
+ raise ValueError(
143
+ "Invalid request. Please check your parameters."
144
+ )
145
+ if not plate_id:
146
+ res = plates.json()["data"]
147
+ else:
148
+ res = [plates.json()]
149
+
150
+ for entry in res:
151
+ del entry["tenant_id"]
152
+
153
+ return res if not df else dict_to_df(res)
154
+
155
+ def get_project_metadata(self, project_id: str = None, df: bool = False):
156
+ """
157
+ Fetches a list of projects for the authenticated user. If no `project_id` is provided, returns all projects for the authenticated user. If `project_id` is provided, returns the project with the given `project_id`, provided it exists.
158
+
159
+ Parameters
160
+ ----------
161
+ project_id: str, optional
162
+ Project ID of the project to be fetched, defaulted to None.
163
+ df: bool
164
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
165
+
166
+ Returns
167
+ -------
168
+ projects: list or DataFrame
169
+ DataFrame or list of project objects for the authenticated user.
170
+
171
+ Examples
172
+ -------
173
+ >>> from core import SeerSDK
174
+ >>> seer_sdk = SeerSDK()
175
+ >>> seer_sdk.get_project_metadata()
176
+ >>> [
177
+ { "project_name": ... },
178
+ { "project_name": ... },
179
+ ...
180
+ ]
181
+
182
+ >>> seer_sdk.get_project_metadata(df=True)
183
+ >>> id ... user_group
184
+ 0 a7c12190-15da-11ee-bdf1-bbaa73585acf ... None
185
+ 1 8c3b1480-15da-11ee-bdf1-bbaa73585acf ... None
186
+ 2 6f158840-15da-11ee-bdf1-bbaa73585acf ... None
187
+ 3 1a8a2920-15da-11ee-bdf1-bbaa73585acf ... None
188
+ 4 7ab47f40-15d9-11ee-bdf1-bbaa73585acf ... None
189
+ .. ... ... ...
190
+ 935 8fa91c00-6621-11ea-96e3-d5a4dab4ebf6 ... None
191
+ 936 53180b20-6621-11ea-96e3-d5a4dab4ebf6 ... None
192
+ 937 5c31fe90-6618-11ea-96e3-d5a4dab4ebf6 ... None
193
+ 938 5b05d440-6610-11ea-96e3-d5a4dab4ebf6 ... None
194
+ 939 9872e3f0-544e-11ea-ad9e-1991e0725494 ... None
195
+
196
+ >>> seer_sdk.get_project_metadata(id="YOUR_PROJECT_ID_HERE")
197
+ >>> [{ "project_name": ... }]
198
+ """
199
+
200
+ ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
201
+ HEADERS = {
202
+ "Authorization": f"{ID_TOKEN}",
203
+ "access-token": f"{ACCESS_TOKEN}",
204
+ }
205
+ URL = (
206
+ f"{self._auth.url}api/v1/projects"
207
+ if not project_id
208
+ else f"{self._auth.url}api/v1/projects/{project_id}"
209
+ )
210
+ res = []
211
+
212
+ with requests.Session() as s:
213
+ s.headers.update(HEADERS)
214
+
215
+ projects = s.get(URL, params={"all": "true"})
216
+ if projects.status_code != 200:
217
+ raise ValueError(
218
+ "Invalid request. Please check your parameters."
219
+ )
220
+ if not project_id:
221
+ res = projects.json()["data"]
222
+ else:
223
+ res = [projects.json()]
224
+
225
+ for entry in res:
226
+ if "tenant_id" in entry:
227
+ del entry["tenant_id"]
228
+
229
+ if "raw_file_path" in entry:
230
+ # Simple lambda function to find the third occurrence of '/' in the raw file path
231
+ location = lambda s: len(s) - len(s.split("/", 3)[-1])
232
+ # Slicing the string from the location
233
+ entry["raw_file_path"] = entry["raw_file_path"][
234
+ location(entry["raw_file_path"]) :
235
+ ]
236
+ return res if not df else dict_to_df(res)
237
+
238
+ def _get_samples_metadata(
239
+ self, plate_id: str = None, project_id: str = None, df: bool = False
240
+ ):
241
+ """
242
+ ****************
243
+ [UNEXPOSED METHOD CALL]
244
+ ****************
245
+
246
+ Fetches a list of samples for the authenticated user, filtered by `plate_id`. Returns all samples for the plate with the given `plate_id`, provided it exists.
247
+
248
+ If both `plate_id` and `project_id` are passed in, only the `plate_id` is validated first.
249
+
250
+ Parameters
251
+ ----------
252
+ plate_id : str, optional
253
+ ID of the plate for which samples are to be fetched, defaulted to None.
254
+ project_id : str, optional
255
+ ID of the project for which samples are to be fetched, defaulted to None.
256
+ df: bool
257
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object
258
+
259
+ Returns
260
+ -------
261
+ samples: list or DataFrame
262
+ List/DataFrame of samples for the authenticated user.
263
+
264
+ Examples
265
+ -------
266
+ >>> from core import SeerSDK
267
+ >>> seer_sdk = SeerSDK()
268
+
269
+ >>> seer_sdk.get_samples_metadata(plate_id="7ec8cad0-15e0-11ee-bdf1-bbaa73585acf")
270
+ >>> [
271
+ { "id": ... },
272
+ { "id": ... },
273
+ ...
274
+ ]
275
+
276
+ >>> seer_sdk.get_samples_metadata(df=True)
277
+ >>> id ... control
278
+ 0 812139c0-15e0-11ee-bdf1-bbaa73585acf ...
279
+ 1 803e05b0-15e0-11ee-bdf1-bbaa73585acf ... MPE Control
280
+ 2 a9b26a40-15da-11ee-bdf1-bbaa73585acf ...
281
+ 3 a8fc87c0-15da-11ee-bdf1-bbaa73585acf ... MPE Control
282
+ 4 8e322990-15da-11ee-bdf1-bbaa73585acf ...
283
+ ... ... ... ...
284
+ 3624 907e1f40-6621-11ea-96e3-d5a4dab4ebf6 ... C132
285
+ 3625 53e59450-6621-11ea-96e3-d5a4dab4ebf6 ... C132
286
+ 3626 5d11b030-6618-11ea-96e3-d5a4dab4ebf6 ... C132
287
+ 3627 5bdf9270-6610-11ea-96e3-d5a4dab4ebf6 ... C132
288
+ 3628 dd607ef0-654c-11ea-8eb2-25a1cfd1163c ... C132
289
+ """
290
+
291
+ if not plate_id and not project_id:
292
+ raise ValueError("You must pass in plate ID or project ID.")
293
+
294
+ res = []
295
+ ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
296
+ HEADERS = {
297
+ "Authorization": f"{ID_TOKEN}",
298
+ "access-token": f"{ACCESS_TOKEN}",
299
+ }
300
+ URL = f"{self._auth.url}api/v1/samples"
301
+ sample_params = {"all": "true"}
302
+
303
+ with requests.Session() as s:
304
+ s.headers.update(HEADERS)
305
+
306
+ if plate_id:
307
+ try:
308
+ self.get_plate_metadata(plate_id)
309
+ except:
310
+ raise ValueError(
311
+ "Plate ID is invalid. Please check your parameters and see if the backend is running."
312
+ )
313
+ sample_params["plateId"] = plate_id
314
+
315
+ elif project_id:
316
+ try:
317
+ self.get_project_metadata(project_id)
318
+ except:
319
+ raise ValueError(
320
+ "Project ID is invalid. Please check your parameters and see if the backend is running."
321
+ )
322
+
323
+ sample_params["projectId"] = project_id
324
+
325
+ samples = s.get(URL, params=sample_params)
326
+ if samples.status_code != 200:
327
+ raise ValueError(
328
+ "Invalid request. Please check if your plate ID has any samples associated with it."
329
+ )
330
+ res = samples.json()["data"]
331
+
332
+ for entry in res:
333
+ del entry["tenant_id"]
334
+
335
+ return res if not df else dict_to_df(res)
336
+
337
+ def get_msdata(self, sample_ids: list, df: bool = False):
338
+ """
339
+ Fetches MS data files for passed in `sample_ids` (provided they are valid and contain relevant files) for an authenticated user.
340
+
341
+ The function returns a dict containing DataFrame objects if the `df` flag is passed in as True, otherwise a nested dict object is returned instead.
342
+
343
+ Parameters
344
+ ----------
345
+ sample_ids : list
346
+ List of unique sample IDs.
347
+ df: bool
348
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
349
+
350
+ Returns
351
+ -------
352
+ res: list or DataFrame
353
+ List/DataFrame of plate objects for the authenticated user.
354
+
355
+ Examples
356
+ -------
357
+ >>> from core import SeerSDK
358
+ >>> seer_sdk = SeerSDK()
359
+ >>> sample_ids = ["812139c0-15e0-11ee-bdf1-bbaa73585acf", "803e05b0-15e0-11ee-bdf1-bbaa73585acf"]
360
+
361
+ >>> seer_sdk.get_msdata(sample_ids)
362
+ >>> [
363
+ {"id": "SAMPLE_ID_1_HERE" ... },
364
+ {"id": "SAMPLE_ID_2_HERE" ... }
365
+ ]
366
+
367
+ >>> seer_sdk.get_msdata(sample_ids, df=True)
368
+ >>> id ... gradient
369
+ 0 81c6a180-15e0-11ee-bdf1-bbaa73585acf ... None
370
+ 1 816a9ed0-15e0-11ee-bdf1-bbaa73585acf ... None
371
+
372
+ [2 rows x 26 columns]
373
+ """
374
+ res = []
375
+ for sample_id in sample_ids:
376
+ ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
377
+ HEADERS = {
378
+ "Authorization": f"{ID_TOKEN}",
379
+ "access-token": f"{ACCESS_TOKEN}",
380
+ }
381
+ URL = f"{self._auth.url}api/v1/msdatas/items"
382
+
383
+ with requests.Session() as s:
384
+ s.headers.update(HEADERS)
385
+
386
+ msdatas = s.post(URL, json={"sampleId": sample_id})
387
+
388
+ if msdatas.status_code != 200 or not msdatas.json()["data"]:
389
+ raise ValueError(
390
+ "Failed to fetch MS data for your plate ID."
391
+ )
392
+
393
+ res.append(msdatas.json()["data"][0])
394
+
395
+ for entry in res:
396
+ if "tenant_id" in entry:
397
+ del entry["tenant_id"]
398
+
399
+ if "raw_file_path" in entry:
400
+ # Simple lambda function to find the third occurrence of '/' in the raw file path
401
+ location = lambda s: len(s) - len(s.split("/", 3)[-1])
402
+ # Slicing the string from the location
403
+ entry["raw_file_path"] = entry["raw_file_path"][
404
+ location(entry["raw_file_path"]) :
405
+ ]
406
+ return res if not df else dict_to_df(res)
407
+
408
+ def get_plate(self, plate_id: str, df: bool = False):
409
+ """
410
+ Fetches MS data files for a `plate_id` (provided that the `plate_id` is valid and has samples associated with it) for an authenticated user.
411
+
412
+ The function returns a dict containing DataFrame objects if the `df` flag is passed in as True, otherwise a nested dict object is returned instead.
413
+
414
+ Parameters
415
+ ----------
416
+ plate_id : str, optional
417
+ ID of the plate for which samples are to be fetched, defaulted to None.
418
+ df: bool
419
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object
420
+
421
+ Returns
422
+ -------
423
+ res: list or DataFrame
424
+ List/DataFrame of MS data file objects for the authenticated user.
425
+
426
+ Examples
427
+ -------
428
+ >>> from core import SeerSDK
429
+ >>> seer_sdk = SeerSDK()
430
+ >>> plate_id = "7ec8cad0-15e0-11ee-bdf1-bbaa73585acf"
431
+
432
+ >>> seer_sdk.get_plate(plate_id)
433
+ >>> [
434
+ {"id": "PLATE_ID_1_HERE" ... },
435
+ {"id": "PLATE_ID_2_HERE" ... }
436
+ ]
437
+
438
+ >>> seer_sdk.get_plate(plate_id, df=True)
439
+ >>> id ... volume
440
+ 0 PLATE_ID_1_HERE ... None
441
+ 1 PLATE_ID_2_HERE ... None
442
+
443
+ [2 rows x 26 columns]
444
+ """
445
+ plate_samples = self.get_samples_metadata(plate_id=plate_id)
446
+ sample_ids = [sample["id"] for sample in plate_samples]
447
+ return self.get_msdata(sample_ids, df)
448
+
449
+ def get_project(
450
+ self, project_id: str, msdata: bool = False, df: bool = False
451
+ ):
452
+ """
453
+ Fetches samples (and MS data files) for a `project_id` (provided that the `project_id` is valid and has samples associated with it) for an authenticated user.
454
+
455
+ The function returns a DataFrame object if the `df` flag is passed in as True, otherwise a nested dict object is returned instead. If the both the `df` and `msdata` flags are passed in as True, then a nested DataFrame object is returned instead.
456
+
457
+ Parameters
458
+ ----------
459
+ project_id : str
460
+ ID of the project for which samples are to be fetched.
461
+ msdata: bool, optional
462
+ Boolean flag denoting whether the user wants relevant MS data files associated with the samples.
463
+ df: bool, optional
464
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
465
+
466
+ Returns
467
+ -------
468
+ res: list or DataFrame
469
+ List/DataFrame of plate objects for the authenticated user.
470
+
471
+ Examples
472
+ -------
473
+ >>> from core import SeerSDK
474
+ >>> seer_sdk = SeerSDK()
475
+ >>> project_id = "7e48e150-8a47-11ed-b382-bf440acece26"
476
+
477
+ >>> seer_sdk.get_project(project_id=project_id, msdata=False, df=False)
478
+ >>> {
479
+ "project_samples": [
480
+ {
481
+ "id": "SAMPLE_ID_1_HERE",
482
+ "sample_type": "Plasma",
483
+ ...
484
+ ...
485
+ },
486
+ {
487
+ "id": "SAMPLE_ID_2_HERE",
488
+ "sample_type": "Plasma",
489
+ ...
490
+ ...
491
+ }
492
+ ]
493
+ }
494
+
495
+ >>> seer_sdk.get_project(project_id=project_id, msdata=True, df=False)
496
+ >>> [
497
+ {
498
+ "id": "SAMPLE_ID_1_HERE",
499
+ "sample_type": "Plasma",
500
+ ...
501
+ ...
502
+ "ms_data_files": [
503
+ {
504
+ "id": MS_DATA_FILE_ID_1_HERE,
505
+ "tenant_id": "TENANT_ID_HERE",
506
+ ...
507
+ ...
508
+ },
509
+ {
510
+ "id": MS_DATA_FILE_ID_1_HERE,
511
+ "tenant_id": "TENANT_ID_HERE",
512
+ ...
513
+ ...
514
+ }
515
+ ]
516
+ },
517
+ {
518
+ "id": "SAMPLE_ID_2_HERE",
519
+ "sample_type": "Plasma",
520
+ ...
521
+ ...
522
+ "ms_data_files": [
523
+ {
524
+ "id": MS_DATA_FILE_ID_2_HERE,
525
+ "tenant_id": "TENANT_ID_HERE",
526
+ ...
527
+ ...
528
+ },
529
+ {
530
+ "id": MS_DATA_FILE_ID_2_HERE,
531
+ "tenant_id": "TENANT_ID_HERE",
532
+ ...
533
+ ...
534
+ }
535
+ ]
536
+ }
537
+ ]
538
+
539
+ >>> seer_sdk.get_project(project_id=project_id, msdata=True, df=True)
540
+ >>> id ... ms_data_files
541
+ 0 829509f0-8a47-11ed-b382-bf440acece26 ... id ... g...
542
+ 1 828d41c0-8a47-11ed-b382-bf440acece26 ... id ... g...
543
+ 2 8294e2e0-8a47-11ed-b382-bf440acece26 ... id ... g...
544
+ 3 8285eec0-8a47-11ed-b382-bf440acece26 ... id ... g...
545
+
546
+ [4 rows x 60 columns]
547
+ """
548
+ if not project_id:
549
+ return ValueError("No project ID specified.")
550
+
551
+ sample_ids = []
552
+ project_samples = self.get_samples_metadata(
553
+ project_id=project_id, df=False
554
+ )
555
+
556
+ if msdata:
557
+ sample_ids = [
558
+ sample["id"] for sample in project_samples
559
+ ] # will always contain unique values
560
+ ms_data_files = self.get_msdata(sample_ids=sample_ids, df=False)
561
+
562
+ for ms_data_file in ms_data_files:
563
+ for sample_index in range(len(project_samples)):
564
+ if (
565
+ project_samples[sample_index]["id"]
566
+ == ms_data_file["sample_id"]
567
+ ):
568
+ if "ms_data_file" not in project_samples[sample_index]:
569
+ project_samples[sample_index]["ms_data_files"] = [
570
+ ms_data_file
571
+ ]
572
+ else:
573
+ project_samples[sample_index][
574
+ "ms_data_files"
575
+ ].append(ms_data_file)
576
+
577
+ if df:
578
+ for sample_index in range(len(project_samples)):
579
+ if "ms_data_files" in project_samples[sample_index]:
580
+ project_samples[sample_index]["ms_data_files"] = (
581
+ dict_to_df(
582
+ project_samples[sample_index]["ms_data_files"]
583
+ )
584
+ )
585
+
586
+ project_samples = dict_to_df(project_samples)
587
+
588
+ return project_samples
589
+
590
+ def get_analysis_protocols(
591
+ self,
592
+ analysis_protocol_name: str = None,
593
+ analysis_protocol_id: str = None,
594
+ ):
595
+ """
596
+ Fetches a list of analysis protocols for the authenticated user. If no `analysis_protocol_id` is provided, returns all analysis protocols for the authenticated user. If `analysis_protocol_name` (and no `analysis_protocol_id`) is provided, returns the analysis protocol with the given name, provided it exists.
597
+
598
+ Parameters
599
+ ----------
600
+ analysis_protocol_id : str, optional
601
+ ID of the analysis protocol to be fetched, defaulted to None.
602
+
603
+ analysis_protocol_name : str, optional
604
+ Name of the analysis protocol to be fetched, defaulted to None.
605
+
606
+ Returns
607
+ -------
608
+ protocols: list
609
+ List of analysis protocol objects for the authenticated user.
610
+
611
+ Examples
612
+ -------
613
+ >>> from core import SeerSDK
614
+ >>> seer_sdk = SeerSDK()
615
+ >>> seer_sdk.get_analysis_protocols()
616
+ >>> [
617
+ { "id": ..., "analysis_protocol_name": ... },
618
+ { "id": ..., "analysis_protocol_name": ... },
619
+ ...
620
+ ]
621
+
622
+ >>> seer_sdk.get_analysis_protocols(name="YOUR_ANALYSIS_PROTOCOL_NAME_HERE")
623
+ >>> [{ "id": ..., "analysis_protocol_name": ... }]
624
+
625
+ >>> seer_sdk.get_analysis_protocols(id="YOUR_ANALYSIS_PROTOCOL_ID_HERE")
626
+ >>> [{ "id": ..., "analysis_protocol_name": ... }]
627
+
628
+ >>> seer_sdk.get_analysis_protocols(id="YOUR_ANALYSIS_PROTOCOL_ID_HERE", name="YOUR_ANALYSIS_PROTOCOL_NAME_HERE")
629
+
630
+ >>> [{ "id": ..., "analysis_protocol_name": ... }] # in this case the id would supersede the inputted name.
631
+ """
632
+
633
+ ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
634
+ HEADERS = {
635
+ "Authorization": f"{ID_TOKEN}",
636
+ "access-token": f"{ACCESS_TOKEN}",
637
+ }
638
+ URL = (
639
+ f"{self._auth.url}api/v1/analysisProtocols"
640
+ if not analysis_protocol_id
641
+ else f"{self._auth.url}api/v1/analysisProtocols/{analysis_protocol_id}"
642
+ )
643
+ res = []
644
+
645
+ with requests.Session() as s:
646
+ s.headers.update(HEADERS)
647
+
648
+ protocols = s.get(URL, params={"all": "true"})
649
+ if protocols.status_code != 200:
650
+ raise ValueError(
651
+ "Invalid request. Please check your parameters."
652
+ )
653
+ if not analysis_protocol_id and not analysis_protocol_name:
654
+ res = protocols.json()["data"]
655
+
656
+ if analysis_protocol_id and not analysis_protocol_name:
657
+ res = [protocols.json()]
658
+
659
+ if not analysis_protocol_id and analysis_protocol_name:
660
+ res = [
661
+ protocol
662
+ for protocol in protocols.json()["data"]
663
+ if protocol["analysis_protocol_name"]
664
+ == analysis_protocol_name
665
+ ]
666
+
667
+ for entry in range(len(res)):
668
+ if "tenant_id" in res[entry]:
669
+ del res[entry]["tenant_id"]
670
+
671
+ if "parameter_file_path" in res[entry]:
672
+ # Simple lambda function to find the third occurrence of '/' in the raw file path
673
+ location = lambda s: len(s) - len(s.split("/", 3)[-1])
674
+ # Slicing the string from the location
675
+ res[entry]["parameter_file_path"] = res[entry][
676
+ "parameter_file_path"
677
+ ][location(res[entry]["parameter_file_path"]) :]
678
+
679
+ return res
680
+
681
+ def get_analysis(self, analysis_id: str = None):
682
+ """
683
+ Returns a list of analyses objects for the authenticated user. If no id is provided, returns all analyses for the authenticated user.
684
+
685
+ Parameters
686
+ ----------
687
+ analysis_id : str, optional
688
+ ID of the analysis to be fetched, defaulted to None.
689
+
690
+ Returns
691
+ -------
692
+ analyses: dict
693
+ Contains a list of analyses objects for the authenticated user.
694
+
695
+ Examples
696
+ -------
697
+ >>> from core import SeerSDK
698
+ >>> seer_sdk = SeerSDK()
699
+ >>> seer_sdk.get_analysis()
700
+ >>> [
701
+ {id: "YOUR_ANALYSIS_ID_HERE", ...},
702
+ {id: "YOUR_ANALYSIS_ID_HERE", ...},
703
+ {id: "YOUR_ANALYSIS_ID_HERE", ...}
704
+ ]
705
+
706
+ >>> seer_sdk.get_analyses("YOUR_ANALYSIS_ID_HERE")
707
+ >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
708
+ """
709
+
710
+ ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
711
+ HEADERS = {
712
+ "Authorization": f"{ID_TOKEN}",
713
+ "access-token": f"{ACCESS_TOKEN}",
714
+ }
715
+ URL = f"{self._auth.url}api/v1/analyses"
716
+ res = []
717
+
718
+ with requests.Session() as s:
719
+ s.headers.update(HEADERS)
720
+
721
+ analyses = s.get(
722
+ f"{URL}/{analysis_id}" if analysis_id else URL,
723
+ params={"all": "true"},
724
+ )
725
+ if analyses.status_code != 200:
726
+ raise ValueError(
727
+ "Invalid request. Please check your parameters."
728
+ )
729
+ if not analysis_id:
730
+ res = analyses.json()["data"]
731
+
732
+ else:
733
+ res = [analyses.json()["analysis"]]
734
+
735
+ for entry in range(len(res)):
736
+ if "tenant_id" in res[entry]:
737
+ del res[entry]["tenant_id"]
738
+
739
+ if "parameter_file_path" in res[entry]:
740
+ # Simple lambda function to find the third occurrence of '/' in the raw file path
741
+ location = lambda s: len(s) - len(s.split("/", 3)[-1])
742
+ # Slicing the string from the location
743
+ res[entry]["parameter_file_path"] = res[entry][
744
+ "parameter_file_path"
745
+ ][location(res[entry]["parameter_file_path"]) :]
746
+ return res
747
+
748
+ def get_analysis_result(self, analysis_id: str, download_path: str = ""):
749
+ """
750
+ Given an `analysis_id`, this function returns all relevant analysis data files in form of downloadable content, if applicable.
751
+
752
+ Parameters
753
+ ----------
754
+ analysis_id : str
755
+ ID of the analysis for which the data is to be fetched.
756
+
757
+ download_path : bool
758
+ String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
759
+
760
+ Returns
761
+ -------
762
+ links: dict
763
+ Contains dataframe objects for the `analysis_id`, given that the analysis has been complete.
764
+
765
+ Examples
766
+ -------
767
+ >>> from core import SeerSDK
768
+ >>> seer_sdk = SeerSDK()
769
+
770
+ >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE")
771
+ >>> {
772
+ "peptide_np": <peptide_np dataframe object>,
773
+ "peptide_panel": <peptide_panel dataframe object>,
774
+ "protein_np": <protein_np dataframe object>,
775
+ "protein_panel": <protein_panel dataframe object>
776
+ }
777
+
778
+ >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE", download_path="/Users/Downloads")
779
+ >>> { "status": "Download complete." }
780
+ """
781
+
782
+ if not analysis_id:
783
+ raise ValueError("Analysis ID cannot be empty.")
784
+
785
+ if download_path and not os.path.exists(download_path):
786
+ raise ValueError("The download path you entered is invalid.")
787
+
788
+ if self.get_analysis(analysis_id)[0]["status"] in ["FAILED", None]:
789
+ raise ValueError(
790
+ "Cannot generate links for failed or null analyses."
791
+ )
792
+
793
+ ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
794
+ HEADERS = {
795
+ "Authorization": f"{ID_TOKEN}",
796
+ "access-token": f"{ACCESS_TOKEN}",
797
+ }
798
+ URL = f"{self._auth.url}api/v1/data"
799
+
800
+ with requests.Session() as s:
801
+ s.headers.update(HEADERS)
802
+
803
+ protein_data = s.get(
804
+ f"{URL}/protein?analysisId={analysis_id}&retry=false"
805
+ )
806
+
807
+ if protein_data.status_code != 200:
808
+ raise ValueError(
809
+ "Invalid request. Could not fetch protein data. Please check your parameters."
810
+ )
811
+ protein_data = protein_data.json()
812
+
813
+ peptide_data = s.get(
814
+ f"{URL}/peptide?analysisId={analysis_id}&retry=false"
815
+ )
816
+
817
+ if peptide_data.status_code != 200:
818
+ raise ValueError(
819
+ "Invalid request. Could not fetch peptide data. Please check your parameters."
820
+ )
821
+
822
+ peptide_data = peptide_data.json()
823
+
824
+ links = {
825
+ "peptide_np": url_to_df(peptide_data["npLink"]["url"]),
826
+ "peptide_panel": url_to_df(peptide_data["panelLink"]["url"]),
827
+ "protein_np": url_to_df(protein_data["npLink"]["url"]),
828
+ "protein_panel": url_to_df(protein_data["panelLink"]["url"]),
829
+ }
830
+
831
+ if download_path:
832
+ name = f"{download_path}/downloads/{analysis_id}"
833
+ if not os.path.exists(name):
834
+ os.makedirs(name)
835
+
836
+ links["peptide_np"].to_csv(f"{name}/peptide_np.csv", sep="\t")
837
+ links["peptide_panel"].to_csv(
838
+ f"{name}/peptide_panel.csv", sep="\t"
839
+ )
840
+ links["protein_np"].to_csv(f"{name}/protein_np.csv", sep="\t")
841
+ links["protein_panel"].to_csv(
842
+ f"{name}/protein_panel.csv", sep="\t"
843
+ )
844
+
845
+ return {"status": "Download complete."}
846
+
847
+ return links
848
+
849
+ def analysis_complete(self, analysis_id: str):
850
+ """
851
+ Returns the status of the analysis with the given id.
852
+
853
+ Parameters
854
+ ----------
855
+ analysis_id : str
856
+ The analysis id.
857
+
858
+ Returns
859
+ -------
860
+ res : dict
861
+ A dictionary containing the status of the analysis.
862
+
863
+ Examples
864
+ -------
865
+ >>> from core import SeerSDK
866
+ >>> seer_sdk = SeerSDK()
867
+ >>> seer_sdk.analysis_complete("YOUR_ANALYSIS_ID_HERE")
868
+ >>> {
869
+ "status": "SUCCEEDED"
870
+ }
871
+ """
872
+
873
+ if not analysis_id:
874
+ raise ValueError("Analysis id cannot be empty.")
875
+
876
+ try:
877
+ res = self.get_analysis(analysis_id)
878
+ except ValueError:
879
+ return ValueError("Analysis not found. Your ID could be incorrect")
880
+
881
+ return {"status": res[0]["status"]}
882
+
883
+ def list_ms_data_files(self, folder="", space=None):
884
+ """
885
+ Lists all the MS data files in the given folder as long as the folder path passed in the params is valid.
886
+
887
+ Parameters
888
+ ----------
889
+ folder : str, optional
890
+ Folder path to list the files from. Defaults to an empty string and displays all files for the user.
891
+ space : str, optional
892
+ ID of the user group to which the files belong, defaulted to None.
893
+
894
+ Returns
895
+ -------
896
+ list
897
+ Contains the list of files in the folder.
898
+
899
+ Examples
900
+ -------
901
+ >>> from core import SeerSDK
902
+ >>> sdk = SeerSDK()
903
+ >>> folder_path = "test-may-2/"
904
+ >>> sdk.list_ms_data_files(folder_path)
905
+ >>> [
906
+ "test-may-2/EXP20028/EXP20028_2020ms0096X10_A.raw",
907
+ "test-may-2/agilent/05_C2_19ug-r001.d.zip",
908
+ "test-may-2/agilent/08_BC_24ug-r001.d.zip",
909
+ "test-may-2/d.zip/EXP22023_2022ms0143bX10_A_GA2_1_6681.d/EXP22023_2022ms0143bX10_A_GA2_1_6681.d.zip",
910
+ "test-may-2/DIA/EXP20002_2020ms0142X10_A.wiff",
911
+ "test-may-2/DIA/EXP20002_2020ms0142X10_A.wiff.scan",
912
+ "test-may-2/DIA/EXP20002_2020ms0142X17_A.wiff",
913
+ "test-may-2/DIA/EXP20002_2020ms0142X17_A.wiff.scan",
914
+ "test-may-2/DIA/EXP20002_2020ms0142X18_A.wiff",
915
+ "test-may-2/DIA/EXP20002_2020ms0142X18_A.wiff.scan"
916
+ ]
917
+ """
918
+
919
+ ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
920
+ HEADERS = {
921
+ "Authorization": f"{ID_TOKEN}",
922
+ "access-token": f"{ACCESS_TOKEN}",
923
+ }
924
+ URL = (
925
+ f"{self._auth.url}api/v1/msdataindex/filesinfolder?folder={folder}"
926
+ if not space
927
+ else f"{self._auth.url}api/v1/msdataindex/filesinfolder?folder={folder}&userGroupId={space}"
928
+ )
929
+ with requests.Session() as s:
930
+ s.headers.update(HEADERS)
931
+
932
+ files = s.get(URL)
933
+
934
+ if files.status_code != 200:
935
+ raise ValueError(
936
+ "Invalid request. Please check your parameters."
937
+ )
938
+ return files.json()["filesList"]
939
+
940
+ def download_ms_data_files(
941
+ self, paths: _List[str], download_path: str, space: str = None
942
+ ):
943
+ """
944
+ Downloads all MS data files for paths passed in the params to the specified download path.
945
+
946
+ Parameters
947
+ ----------
948
+ paths : list[str]
949
+ List of paths to download.
950
+ download_path : str
951
+ Path to download the files to.
952
+ space : str, optional
953
+ ID of the user group to which the files belongs, defaulted to None.
954
+
955
+ Returns
956
+ -------
957
+ message: dict
958
+ Contains the message whether the files were downloaded or not.
959
+ """
960
+
961
+ urls = []
962
+
963
+ if not download_path:
964
+ download_path = os.getcwd()
965
+ print(f"\nDownload path not specified.\n")
966
+
967
+ if not os.path.isdir(download_path):
968
+ print(
969
+ f'\nThe path "{download_path}" you specified does not exist, was either invalid or not absolute.\n'
970
+ )
971
+ download_path = f"{os.getcwd()}/downloads"
972
+
973
+ name = (
974
+ download_path if download_path[-1] != "/" else download_path[:-1]
975
+ )
976
+
977
+ if not os.path.exists(name):
978
+ os.makedirs(name)
979
+
980
+ print(f'Downloading files to "{name}"\n')
981
+
982
+ ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
983
+ HEADERS = {
984
+ "Authorization": f"{ID_TOKEN}",
985
+ "access-token": f"{ACCESS_TOKEN}",
986
+ }
987
+ URL = f"{self._auth.url}api/v1/msdataindex/download/getUrl"
988
+ tenant_id = jwt.decode(ID_TOKEN, options={"verify_signature": False})[
989
+ "custom:tenantId"
990
+ ]
991
+
992
+ for path in paths:
993
+ with requests.Session() as s:
994
+ s.headers.update(HEADERS)
995
+
996
+ download_url = s.post(
997
+ URL,
998
+ json={
999
+ "filepath": f"{tenant_id}/{path}",
1000
+ "userGroupId": space,
1001
+ },
1002
+ )
1003
+
1004
+ if download_url.status_code != 200:
1005
+ raise ValueError(
1006
+ "Could not download file. Please check if the backend is running."
1007
+ )
1008
+ urls.append(download_url.text)
1009
+
1010
+ for i in range(len(urls)):
1011
+ filename = paths[i].split("/")[-1]
1012
+ url = urls[i]
1013
+
1014
+ print(f"Downloading {filename}")
1015
+
1016
+ for _ in range(2):
1017
+ try:
1018
+ with tqdm(
1019
+ unit="B",
1020
+ unit_scale=True,
1021
+ unit_divisor=1024,
1022
+ miniters=1,
1023
+ desc=f"Progress",
1024
+ ) as t:
1025
+ ssl._create_default_https_context = (
1026
+ ssl._create_unverified_context
1027
+ )
1028
+ urllib.request.urlretrieve(
1029
+ url,
1030
+ f"{name}/{filename}",
1031
+ reporthook=download_hook(t),
1032
+ data=None,
1033
+ )
1034
+ break
1035
+ except:
1036
+ filename = filename.split("/")
1037
+ name += "/" + "/".join(
1038
+ [filename[i] for i in range(len(filename) - 1)]
1039
+ )
1040
+ filename = filename[-1]
1041
+ if not os.path.isdir(f"{name}/{filename}"):
1042
+ os.makedirs(f"{name}/")
1043
+
1044
+ else:
1045
+ raise ValueError(
1046
+ "Your download failed. Please check if the backend is still running."
1047
+ )
1048
+
1049
+ print(f"Finished downloading {filename}\n")
1050
+
1051
+ return {"message": f"Files downloaded successfully to '{name}'"}
1052
+
1053
+ def group_analysis_results(self, analysis_id: str, box_plot: dict = None):
1054
+ """
1055
+ Returns the group analysis data for the given analysis id, provided it exists.
1056
+
1057
+ Parameters
1058
+ ----------
1059
+ analysis_id : str
1060
+ The analysis id.
1061
+
1062
+ box_plot : dict, optional
1063
+ The box plot configuration needed for the analysis, defaulted to None. Contains `feature_type` ("protein" or "peptide") and `feature_ids` (comma separated list of feature IDs) keys.
1064
+
1065
+ Returns
1066
+ -------
1067
+ res : dict
1068
+ A dictionary containing the group analysis data.
1069
+
1070
+ Examples
1071
+ -------
1072
+ >>> from core import SeerSDK
1073
+ >>> seer_sdk = SeerSDK()
1074
+ >>> seer_sdk.group_analysis_results("YOUR_ANALYSIS_ID_HERE")
1075
+ >>> {
1076
+ "pre": {
1077
+ "protein": [],
1078
+ "peptide": [],
1079
+ },
1080
+ "post": {
1081
+ "protein": {},
1082
+ "protein_url": {
1083
+ "protein_processed_file_url": "",
1084
+ "protein_processed_long_form_file_url": "",
1085
+ },
1086
+ "peptide": {},
1087
+ "peptide_url": {
1088
+ "peptide_processed_file_url": "",
1089
+ "peptide_processed_long_form_file_url": "",
1090
+ },
1091
+ },
1092
+ "box_plot": []
1093
+ }
1094
+ """
1095
+
1096
+ if not analysis_id:
1097
+ raise ValueError("Analysis ID cannot be empty.")
1098
+
1099
+ ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
1100
+ HEADERS = {
1101
+ "Authorization": f"{ID_TOKEN}",
1102
+ "access-token": f"{ACCESS_TOKEN}",
1103
+ }
1104
+ URL = f"{self._auth.url}"
1105
+
1106
+ res = {
1107
+ "pre": {
1108
+ "protein": [],
1109
+ "peptide": [],
1110
+ },
1111
+ "post": {
1112
+ "protein": {},
1113
+ "protein_url": {
1114
+ "protein_processed_file_url": "",
1115
+ "protein_processed_long_form_file_url": "",
1116
+ },
1117
+ "peptide": {},
1118
+ "peptide_url": {
1119
+ "peptide_processed_file_url": "",
1120
+ "peptide_processed_long_form_file_url": "",
1121
+ },
1122
+ },
1123
+ "box_plot": [],
1124
+ }
1125
+
1126
+ # Pre-GA data call
1127
+ with requests.Session() as s:
1128
+ s.headers.update(HEADERS)
1129
+
1130
+ protein_pre_data = s.post(
1131
+ url=f"{URL}api/v2/groupanalysis/protein",
1132
+ json={"analysisId": analysis_id, "grouping": "condition"},
1133
+ )
1134
+ if protein_pre_data.status_code != 200:
1135
+ raise ValueError(
1136
+ "Invalid request. Could not fetch group analysis protein pre data. Please check your parameters."
1137
+ )
1138
+
1139
+ protein_pre_data = protein_pre_data.json()
1140
+
1141
+ res["pre"]["protein"] = protein_pre_data
1142
+
1143
+ with requests.Session() as s:
1144
+ s.headers.update(HEADERS)
1145
+
1146
+ peptide_pre_data = s.post(
1147
+ url=f"{URL}api/v2/groupanalysis/peptide",
1148
+ json={"analysisId": analysis_id, "grouping": "condition"},
1149
+ )
1150
+ if peptide_pre_data.status_code != 200:
1151
+ raise ValueError(
1152
+ "Invalid request. Could not fetch group analysis peptide pre data. Please check your parameters."
1153
+ )
1154
+
1155
+ peptide_pre_data = peptide_pre_data.json()
1156
+ res["pre"]["peptide"] = peptide_pre_data
1157
+
1158
+ # Post-GA data call
1159
+ with requests.Session() as s:
1160
+ s.headers.update(HEADERS)
1161
+
1162
+ get_saved_result = s.get(
1163
+ f"{URL}api/v1/groupanalysis/getSavedResults?analysisid={analysis_id}"
1164
+ )
1165
+
1166
+ if get_saved_result.status_code != 200:
1167
+ raise ValueError(
1168
+ "Invalid request. Could not fetch group analysis post data. Please check your parameters."
1169
+ )
1170
+ get_saved_result = get_saved_result.json()
1171
+
1172
+ # Protein data
1173
+ if "pgResult" in get_saved_result:
1174
+ res["post"]["protein"] = get_saved_result["pgResult"]
1175
+
1176
+ # Peptide data
1177
+ if "peptideResult" in get_saved_result:
1178
+ res["post"]["peptide"] = get_saved_result["peptideResult"]
1179
+
1180
+ # Protein URLs
1181
+ if "pgProcessedFileUrl" in get_saved_result:
1182
+ res["post"]["protein_url"]["protein_processed_file_url"] = (
1183
+ get_saved_result["pgProcessedFileUrl"]
1184
+ )
1185
+ if "pgProcessedLongFormFileUrl" in get_saved_result:
1186
+ res["post"]["protein_url"][
1187
+ "protein_processed_long_form_file_url"
1188
+ ] = get_saved_result["pgProcessedLongFormFileUrl"]
1189
+
1190
+ # Peptide URLs
1191
+ if "peptideProcessedFileUrl" in get_saved_result:
1192
+ res["post"]["peptide_url"]["peptide_processed_file_url"] = (
1193
+ get_saved_result["peptideProcessedFileUrl"]
1194
+ )
1195
+
1196
+ if "peptideProcessedLongFormFileUrl" in get_saved_result:
1197
+ res["post"]["peptide_url"][
1198
+ "peptide_processed_long_form_file_url"
1199
+ ] = get_saved_result["peptideProcessedLongFormFileUrl"]
1200
+
1201
+ # Box plot data call
1202
+ if not box_plot:
1203
+ del res["box_plot"]
1204
+ return res
1205
+
1206
+ with requests.Session() as s:
1207
+ s.headers.update(HEADERS)
1208
+ box_plot["feature_type"] = box_plot["feature_type"].lower()
1209
+ box_plot_data = s.post(
1210
+ url=f"{URL}api/v1/groupanalysis/rawdata",
1211
+ json={
1212
+ "analysisId": analysis_id,
1213
+ "featureIds": (
1214
+ ",".join(box_plot["feature_ids"])
1215
+ if len(box_plot["feature_ids"]) > 1
1216
+ else box_plot["feature_ids"][0]
1217
+ ),
1218
+ "featureType": f"{box_plot['feature_type']}group",
1219
+ },
1220
+ )
1221
+ if box_plot_data.status_code != 200:
1222
+ raise ValueError(
1223
+ "Invalid request, could not fetch box plot data. Please verify your 'box_plot' parameters, including 'feature_ids' (comma-separated list of feature IDs) and 'feature_type' (needs to be a either 'protein' or 'peptide')."
1224
+ )
1225
+
1226
+ box_plot_data = box_plot_data.json()
1227
+ res["box_plot"] = box_plot_data
1228
+
1229
+ return res