seer-pas-sdk 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1250 @@
1
+ from tqdm import tqdm
2
+
3
+ import os
4
+ import jwt
5
+ import requests
6
+ import urllib.request
7
+ import ssl
8
+ import shutil
9
+
10
+ from typing import List as _List
11
+
12
+ from ..common import *
13
+ from ..auth import Auth
14
+ from ..objects import PlateMap
15
+
16
+
17
+ class SeerSDK:
18
+ """
19
+ Object exposing SDK methods. Requires a username and password; the optional `instance` param denotes the instance of PAS (defaults to "US").
20
+
21
+ Examples
22
+ -------
23
+ >>> from seer_pas_sdk import SeerSDK
24
+ >>> USERNAME = "test"
25
+ >>> PASSWORD = "test-password"
26
+ >>> INSTANCE = "EU"
27
+ >>> seer_sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE)
28
+ """
29
+
30
+ def __init__(self, username, password, instance="US"):
31
+ try:
32
+ self._auth = Auth(username, password, instance)
33
+
34
+ self._auth.get_token()
35
+
36
+ print(f"User '{username}' logged in.\n")
37
+
38
+ except:
39
+ raise ValueError(
40
+ "Could not log in.\nPlease check your credentials and/or instance."
41
+ )
42
+
43
+ def _get_auth_headers(self):
44
+ id_token, access_token = self._auth.get_token()
45
+ return {
46
+ "Authorization": id_token,
47
+ "access-token": access_token,
48
+ }
49
+
50
+ def _get_auth_session(self):
51
+ sess = requests.Session()
52
+
53
+ sess.headers.update(self._get_auth_headers())
54
+
55
+ return sess
56
+
57
+ def get_spaces(self):
58
+ """
59
+ Fetches a list of spaces for the authenticated user.
60
+
61
+ Returns
62
+ -------
63
+ spaces: list
64
+ List of space objects for the authenticated user.
65
+
66
+ Examples
67
+ -------
68
+ >>> from seer_pas_sdk import SeerSDK
69
+ >>> seer_sdk = SeerSDK()
70
+ >>> seer_sdk.get_spaces()
71
+ >>> [
72
+ { "usergroup_name": ... },
73
+ { "usergroup_name": ... },
74
+ ...
75
+ ]
76
+ """
77
+
78
+ URL = f"{self._auth.url}api/v1/usergroups"
79
+
80
+ with self._get_auth_session() as s:
81
+ spaces = s.get(URL)
82
+
83
+ if spaces.status_code != 200:
84
+ raise ValueError(
85
+ "Invalid request. Please check your parameters."
86
+ )
87
+ return spaces.json()
88
+
89
+ def get_plate_metadata(self, plate_id: str = None, df: bool = False):
90
+ """
91
+ Fetches a list of plates for the authenticated user. If no `plate_id` is provided, returns all plates for the authenticated user. If `plate_id` is provided, returns the plate with the given `plate_id`, provided it exists.
92
+
93
+ Parameters
94
+ ----------
95
+ plate_id : str, optional
96
+ ID of the plate to be fetched, defaulted to None.
97
+ df: bool
98
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object
99
+
100
+ Returns
101
+ -------
102
+ plates: list or DataFrame
103
+ List/DataFrame of plate objects for the authenticated user.
104
+
105
+ Examples
106
+ -------
107
+ >>> from seer_pas_sdk import SeerSDK
108
+ >>> seer_sdk = SeerSDK()
109
+ >>> seer_sdk.get_plate_metadata()
110
+ >>> [
111
+ { "id": ... },
112
+ { "id": ... },
113
+ ...
114
+ ]
115
+ >>> seer_sdk.get_plate_metadata(df=True)
116
+ >>> id ... user_group
117
+ 0 a7c12190-15da-11ee-bdf1-bbaa73585acf ... None
118
+ 1 8c3b1480-15da-11ee-bdf1-bbaa73585acf ... None
119
+ 2 6f158840-15da-11ee-bdf1-bbaa73585acf ... None
120
+ 3 1a8a2920-15da-11ee-bdf1-bbaa73585acf ... None
121
+ 4 7ab47f40-15d9-11ee-bdf1-bbaa73585acf ... None
122
+ .. ... ... ...
123
+ 935 8fa91c00-6621-11ea-96e3-d5a4dab4ebf6 ... None
124
+ 936 53180b20-6621-11ea-96e3-d5a4dab4ebf6 ... None
125
+ 937 5c31fe90-6618-11ea-96e3-d5a4dab4ebf6 ... None
126
+ 938 5b05d440-6610-11ea-96e3-d5a4dab4ebf6 ... None
127
+ 939 9872e3f0-544e-11ea-ad9e-1991e0725494 ... None
128
+
129
+ >>> seer_sdk.get_plate_metadata(id="YOUR_PLATE_ID_HERE")
130
+ >>> [{ "id": ... }]
131
+ """
132
+
133
+ URL = f"{self._auth.url}api/v1/plates"
134
+ res = []
135
+
136
+ with self._get_auth_session() as s:
137
+
138
+ plates = s.get(
139
+ f"{URL}/{plate_id}" if plate_id else URL,
140
+ params={"all": "true"},
141
+ )
142
+ if plates.status_code != 200:
143
+ raise ValueError(
144
+ "Invalid request. Please check your parameters."
145
+ )
146
+ if not plate_id:
147
+ res = plates.json()["data"]
148
+ else:
149
+ res = [plates.json()]
150
+
151
+ for entry in res:
152
+ del entry["tenant_id"]
153
+
154
+ return res if not df else dict_to_df(res)
155
+
156
+ def get_project_metadata(self, project_id: str = None, df: bool = False):
157
+ """
158
+ Fetches a list of projects for the authenticated user. If no `project_id` is provided, returns all projects for the authenticated user. If `project_id` is provided, returns the project with the given `project_id`, provided it exists.
159
+
160
+ Parameters
161
+ ----------
162
+ project_id: str, optional
163
+ Project ID of the project to be fetched, defaulted to None.
164
+ df: bool
165
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
166
+
167
+ Returns
168
+ -------
169
+ projects: list or DataFrame
170
+ DataFrame or list of project objects for the authenticated user.
171
+
172
+ Examples
173
+ -------
174
+ >>> from seer_pas_sdk import SeerSDK
175
+ >>> seer_sdk = SeerSDK()
176
+ >>> seer_sdk.get_project_metadata()
177
+ >>> [
178
+ { "project_name": ... },
179
+ { "project_name": ... },
180
+ ...
181
+ ]
182
+
183
+ >>> seer_sdk.get_project_metadata(df=True)
184
+ >>> id ... user_group
185
+ 0 a7c12190-15da-11ee-bdf1-bbaa73585acf ... None
186
+ 1 8c3b1480-15da-11ee-bdf1-bbaa73585acf ... None
187
+ 2 6f158840-15da-11ee-bdf1-bbaa73585acf ... None
188
+ 3 1a8a2920-15da-11ee-bdf1-bbaa73585acf ... None
189
+ 4 7ab47f40-15d9-11ee-bdf1-bbaa73585acf ... None
190
+ .. ... ... ...
191
+ 935 8fa91c00-6621-11ea-96e3-d5a4dab4ebf6 ... None
192
+ 936 53180b20-6621-11ea-96e3-d5a4dab4ebf6 ... None
193
+ 937 5c31fe90-6618-11ea-96e3-d5a4dab4ebf6 ... None
194
+ 938 5b05d440-6610-11ea-96e3-d5a4dab4ebf6 ... None
195
+ 939 9872e3f0-544e-11ea-ad9e-1991e0725494 ... None
196
+
197
+ >>> seer_sdk.get_project_metadata(id="YOUR_PROJECT_ID_HERE")
198
+ >>> [{ "project_name": ... }]
199
+ """
200
+
201
+ URL = (
202
+ f"{self._auth.url}api/v1/projects"
203
+ if not project_id
204
+ else f"{self._auth.url}api/v1/projects/{project_id}"
205
+ )
206
+ res = []
207
+
208
+ with self._get_auth_session() as s:
209
+
210
+ projects = s.get(URL, params={"all": "true"})
211
+ if projects.status_code != 200:
212
+ raise ValueError(
213
+ "Invalid request. Please check your parameters."
214
+ )
215
+ if not project_id:
216
+ res = projects.json()["data"]
217
+ else:
218
+ res = [projects.json()]
219
+
220
+ for entry in res:
221
+ if "tenant_id" in entry:
222
+ del entry["tenant_id"]
223
+
224
+ if "raw_file_path" in entry:
225
+ # Simple lambda function to find the third occurrence of '/' in the raw file path
226
+ location = lambda s: len(s) - len(s.split("/", 3)[-1])
227
+ # Slicing the string from the location
228
+ entry["raw_file_path"] = entry["raw_file_path"][
229
+ location(entry["raw_file_path"]) :
230
+ ]
231
+ return res if not df else dict_to_df(res)
232
+
233
+ def _get_samples_metadata(
234
+ self, plate_id: str = None, project_id: str = None, df: bool = False
235
+ ):
236
+ """
237
+ ****************
238
+ [UNEXPOSED METHOD CALL]
239
+ ****************
240
+
241
+ Fetches a list of samples for the authenticated user, filtered by `plate_id`. Returns all samples for the plate with the given `plate_id`, provided it exists.
242
+
243
+ If both `plate_id` and `project_id` are passed in, only the `plate_id` is validated first.
244
+
245
+ Parameters
246
+ ----------
247
+ plate_id : str, optional
248
+ ID of the plate for which samples are to be fetched, defaulted to None.
249
+ project_id : str, optional
250
+ ID of the project for which samples are to be fetched, defaulted to None.
251
+ df: bool
252
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object
253
+
254
+ Returns
255
+ -------
256
+ samples: list or DataFrame
257
+ List/DataFrame of samples for the authenticated user.
258
+
259
+ Examples
260
+ -------
261
+ >>> from seer_pas_sdk import SeerSDK
262
+ >>> seer_sdk = SeerSDK()
263
+
264
+ >>> seer_sdk._get_samples_metadata(plate_id="7ec8cad0-15e0-11ee-bdf1-bbaa73585acf")
265
+ >>> [
266
+ { "id": ... },
267
+ { "id": ... },
268
+ ...
269
+ ]
270
+
271
+ >>> seer_sdk._get_samples_metadata(df=True)
272
+ >>> id ... control
273
+ 0 812139c0-15e0-11ee-bdf1-bbaa73585acf ...
274
+ 1 803e05b0-15e0-11ee-bdf1-bbaa73585acf ... MPE Control
275
+ 2 a9b26a40-15da-11ee-bdf1-bbaa73585acf ...
276
+ 3 a8fc87c0-15da-11ee-bdf1-bbaa73585acf ... MPE Control
277
+ 4 8e322990-15da-11ee-bdf1-bbaa73585acf ...
278
+ ... ... ... ...
279
+ 3624 907e1f40-6621-11ea-96e3-d5a4dab4ebf6 ... C132
280
+ 3625 53e59450-6621-11ea-96e3-d5a4dab4ebf6 ... C132
281
+ 3626 5d11b030-6618-11ea-96e3-d5a4dab4ebf6 ... C132
282
+ 3627 5bdf9270-6610-11ea-96e3-d5a4dab4ebf6 ... C132
283
+ 3628 dd607ef0-654c-11ea-8eb2-25a1cfd1163c ... C132
284
+ """
285
+
286
+ if not plate_id and not project_id:
287
+ raise ValueError("You must pass in plate ID or project ID.")
288
+
289
+ res = []
290
+ URL = f"{self._auth.url}api/v1/samples"
291
+ sample_params = {"all": "true"}
292
+
293
+ with self._get_auth_session() as s:
294
+
295
+ if plate_id:
296
+ try:
297
+ self.get_plate_metadata(plate_id)
298
+ except:
299
+ raise ValueError(
300
+ "Plate ID is invalid. Please check your parameters and see if the backend is running."
301
+ )
302
+ sample_params["plateId"] = plate_id
303
+
304
+ elif project_id:
305
+ try:
306
+ self.get_project_metadata(project_id)
307
+ except:
308
+ raise ValueError(
309
+ "Project ID is invalid. Please check your parameters and see if the backend is running."
310
+ )
311
+
312
+ sample_params["projectId"] = project_id
313
+
314
+ samples = s.get(URL, params=sample_params)
315
+ if samples.status_code != 200:
316
+ raise ValueError(
317
+ "Invalid request. Please check if your plate ID has any samples associated with it."
318
+ )
319
+ res = samples.json()["data"]
320
+
321
+ for entry in res:
322
+ del entry["tenant_id"]
323
+
324
+ # Exclude custom fields that don't belong to the tenant
325
+ res_df = dict_to_df(res)
326
+ custom_columns = [
327
+ x["field_name"] for x in self.get_sample_custom_fields()
328
+ ]
329
+ res_df = res_df[
330
+ [
331
+ x
332
+ for x in res_df.columns
333
+ if not x.startswith("custom_") or x in custom_columns
334
+ ]
335
+ ]
336
+
337
+ return res_df.to_dict(orient="records") if not df else res_df
338
+
339
+ def get_sample_custom_fields(self):
340
+ """
341
+ Fetches a list of custom fields defined for the authenticated user.
342
+ """
343
+ URL = f"{self._auth.url}api/v1/samplefields"
344
+
345
+ with self._get_auth_session() as s:
346
+
347
+ fields = s.get(URL)
348
+
349
+ if fields.status_code != 200:
350
+ raise ValueError(
351
+ "Failed to fetch custom columns. Please check your connection."
352
+ )
353
+
354
+ res = fields.json()
355
+ for entry in res:
356
+ del entry["tenant_id"]
357
+ return res
358
+
359
+ def get_msdata(self, sample_ids: list, df: bool = False):
360
+ """
361
+ Fetches MS data files for passed in `sample_ids` (provided they are valid and contain relevant files) for an authenticated user.
362
+
363
+ The function returns a dict containing DataFrame objects if the `df` flag is passed in as True, otherwise a nested dict object is returned instead.
364
+
365
+ Parameters
366
+ ----------
367
+ sample_ids : list
368
+ List of unique sample IDs.
369
+ df: bool
370
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
371
+
372
+ Returns
373
+ -------
374
+ res: list or DataFrame
375
+ List/DataFrame of plate objects for the authenticated user.
376
+
377
+ Examples
378
+ -------
379
+ >>> from seer_pas_sdk import SeerSDK
380
+ >>> seer_sdk = SeerSDK()
381
+ >>> sample_ids = ["812139c0-15e0-11ee-bdf1-bbaa73585acf", "803e05b0-15e0-11ee-bdf1-bbaa73585acf"]
382
+
383
+ >>> seer_sdk.get_msdata(sample_ids)
384
+ >>> [
385
+ {"id": "SAMPLE_ID_1_HERE" ... },
386
+ {"id": "SAMPLE_ID_2_HERE" ... }
387
+ ]
388
+
389
+ >>> seer_sdk.get_msdata(sample_ids, df=True)
390
+ >>> id ... gradient
391
+ 0 81c6a180-15e0-11ee-bdf1-bbaa73585acf ... None
392
+ 1 816a9ed0-15e0-11ee-bdf1-bbaa73585acf ... None
393
+
394
+ [2 rows x 26 columns]
395
+ """
396
+
397
+ URL = f"{self._auth.url}api/v1/msdatas/items"
398
+
399
+ res = []
400
+ for sample_id in sample_ids:
401
+
402
+ with self._get_auth_session() as s:
403
+
404
+ msdatas = s.post(URL, json={"sampleId": sample_id})
405
+
406
+ if msdatas.status_code != 200 or not msdatas.json()["data"]:
407
+ raise ValueError(
408
+ "Failed to fetch MS data for your plate ID."
409
+ )
410
+
411
+ res.append(msdatas.json()["data"][0])
412
+
413
+ for entry in res:
414
+ if "tenant_id" in entry:
415
+ del entry["tenant_id"]
416
+
417
+ if "raw_file_path" in entry:
418
+ # Simple lambda function to find the third occurrence of '/' in the raw file path
419
+ location = lambda s: len(s) - len(s.split("/", 3)[-1])
420
+ # Slicing the string from the location
421
+ entry["raw_file_path"] = entry["raw_file_path"][
422
+ location(entry["raw_file_path"]) :
423
+ ]
424
+ return res if not df else dict_to_df(res)
425
+
426
+ def get_plate(self, plate_id: str, df: bool = False):
427
+ """
428
+ Fetches MS data files for a `plate_id` (provided that the `plate_id` is valid and has samples associated with it) for an authenticated user.
429
+
430
+ The function returns a dict containing DataFrame objects if the `df` flag is passed in as True, otherwise a nested dict object is returned instead.
431
+
432
+ Parameters
433
+ ----------
434
+ plate_id : str, optional
435
+ ID of the plate for which samples are to be fetched, defaulted to None.
436
+ df: bool
437
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object
438
+
439
+ Returns
440
+ -------
441
+ res: list or DataFrame
442
+ List/DataFrame of MS data file objects for the authenticated user.
443
+
444
+ Examples
445
+ -------
446
+ >>> from seer_pas_sdk import SeerSDK
447
+ >>> seer_sdk = SeerSDK()
448
+ >>> plate_id = "7ec8cad0-15e0-11ee-bdf1-bbaa73585acf"
449
+
450
+ >>> seer_sdk.get_plate(plate_id)
451
+ >>> [
452
+ {"id": "PLATE_ID_1_HERE" ... },
453
+ {"id": "PLATE_ID_2_HERE" ... }
454
+ ]
455
+
456
+ >>> seer_sdk.get_plate(plate_id, df=True)
457
+ >>> id ... volume
458
+ 0 PLATE_ID_1_HERE ... None
459
+ 1 PLATE_ID_2_HERE ... None
460
+
461
+ [2 rows x 26 columns]
462
+ """
463
+ plate_samples = self._get_samples_metadata(plate_id=plate_id)
464
+ sample_ids = [sample["id"] for sample in plate_samples]
465
+ return self.get_msdata(sample_ids, df)
466
+
467
+ def get_project(
468
+ self, project_id: str, msdata: bool = False, df: bool = False
469
+ ):
470
+ """
471
+ Fetches samples (and MS data files) for a `project_id` (provided that the `project_id` is valid and has samples associated with it) for an authenticated user.
472
+
473
+ The function returns a DataFrame object if the `df` flag is passed in as True, otherwise a nested dict object is returned instead. If the both the `df` and `msdata` flags are passed in as True, then a nested DataFrame object is returned instead.
474
+
475
+ Parameters
476
+ ----------
477
+ project_id : str
478
+ ID of the project for which samples are to be fetched.
479
+ msdata: bool, optional
480
+ Boolean flag denoting whether the user wants relevant MS data files associated with the samples.
481
+ df: bool, optional
482
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
483
+
484
+ Returns
485
+ -------
486
+ res: list or DataFrame
487
+ List/DataFrame of plate objects for the authenticated user.
488
+
489
+ Examples
490
+ -------
491
+ >>> from seer_pas_sdk import SeerSDK
492
+ >>> seer_sdk = SeerSDK()
493
+ >>> project_id = "7e48e150-8a47-11ed-b382-bf440acece26"
494
+
495
+ >>> seer_sdk.get_project(project_id=project_id, msdata=False, df=False)
496
+ >>> {
497
+ "project_samples": [
498
+ {
499
+ "id": "SAMPLE_ID_1_HERE",
500
+ "sample_type": "Plasma",
501
+ ...
502
+ ...
503
+ },
504
+ {
505
+ "id": "SAMPLE_ID_2_HERE",
506
+ "sample_type": "Plasma",
507
+ ...
508
+ ...
509
+ }
510
+ ]
511
+ }
512
+
513
+ >>> seer_sdk.get_project(project_id=project_id, msdata=True, df=False)
514
+ >>> [
515
+ {
516
+ "id": "SAMPLE_ID_1_HERE",
517
+ "sample_type": "Plasma",
518
+ ...
519
+ ...
520
+ "ms_data_files": [
521
+ {
522
+ "id": MS_DATA_FILE_ID_1_HERE,
523
+ "tenant_id": "TENANT_ID_HERE",
524
+ ...
525
+ ...
526
+ },
527
+ {
528
+ "id": MS_DATA_FILE_ID_1_HERE,
529
+ "tenant_id": "TENANT_ID_HERE",
530
+ ...
531
+ ...
532
+ }
533
+ ]
534
+ },
535
+ {
536
+ "id": "SAMPLE_ID_2_HERE",
537
+ "sample_type": "Plasma",
538
+ ...
539
+ ...
540
+ "ms_data_files": [
541
+ {
542
+ "id": MS_DATA_FILE_ID_2_HERE,
543
+ "tenant_id": "TENANT_ID_HERE",
544
+ ...
545
+ ...
546
+ },
547
+ {
548
+ "id": MS_DATA_FILE_ID_2_HERE,
549
+ "tenant_id": "TENANT_ID_HERE",
550
+ ...
551
+ ...
552
+ }
553
+ ]
554
+ }
555
+ ]
556
+
557
+ >>> seer_sdk.get_project(project_id=project_id, msdata=True, df=True)
558
+ >>> id ... ms_data_files
559
+ 0 829509f0-8a47-11ed-b382-bf440acece26 ... id ... g...
560
+ 1 828d41c0-8a47-11ed-b382-bf440acece26 ... id ... g...
561
+ 2 8294e2e0-8a47-11ed-b382-bf440acece26 ... id ... g...
562
+ 3 8285eec0-8a47-11ed-b382-bf440acece26 ... id ... g...
563
+
564
+ [4 rows x 60 columns]
565
+ """
566
+ if not project_id:
567
+ return ValueError("No project ID specified.")
568
+
569
+ sample_ids = []
570
+ project_samples = self._get_samples_metadata(
571
+ project_id=project_id, df=False
572
+ )
573
+
574
+ if msdata:
575
+ sample_ids = [
576
+ sample["id"] for sample in project_samples
577
+ ] # will always contain unique values
578
+ ms_data_files = self.get_msdata(sample_ids=sample_ids, df=False)
579
+
580
+ for ms_data_file in ms_data_files:
581
+ for sample_index in range(len(project_samples)):
582
+ if (
583
+ project_samples[sample_index]["id"]
584
+ == ms_data_file["sample_id"]
585
+ ):
586
+ if "ms_data_file" not in project_samples[sample_index]:
587
+ project_samples[sample_index]["ms_data_files"] = [
588
+ ms_data_file
589
+ ]
590
+ else:
591
+ project_samples[sample_index][
592
+ "ms_data_files"
593
+ ].append(ms_data_file)
594
+
595
+ if df:
596
+ for sample_index in range(len(project_samples)):
597
+ if "ms_data_files" in project_samples[sample_index]:
598
+ project_samples[sample_index]["ms_data_files"] = (
599
+ dict_to_df(
600
+ project_samples[sample_index]["ms_data_files"]
601
+ )
602
+ )
603
+
604
+ project_samples = dict_to_df(project_samples)
605
+
606
+ return project_samples
607
+
608
+ def get_analysis_protocols(
609
+ self,
610
+ analysis_protocol_name: str = None,
611
+ analysis_protocol_id: str = None,
612
+ ):
613
+ """
614
+ Fetches a list of analysis protocols for the authenticated user. If no `analysis_protocol_id` is provided, returns all analysis protocols for the authenticated user. If `analysis_protocol_name` (and no `analysis_protocol_id`) is provided, returns the analysis protocol with the given name, provided it exists.
615
+
616
+ Parameters
617
+ ----------
618
+ analysis_protocol_id : str, optional
619
+ ID of the analysis protocol to be fetched, defaulted to None.
620
+
621
+ analysis_protocol_name : str, optional
622
+ Name of the analysis protocol to be fetched, defaulted to None.
623
+
624
+ Returns
625
+ -------
626
+ protocols: list
627
+ List of analysis protocol objects for the authenticated user.
628
+
629
+ Examples
630
+ -------
631
+ >>> from seer_pas_sdk import SeerSDK
632
+ >>> seer_sdk = SeerSDK()
633
+ >>> seer_sdk.get_analysis_protocols()
634
+ >>> [
635
+ { "id": ..., "analysis_protocol_name": ... },
636
+ { "id": ..., "analysis_protocol_name": ... },
637
+ ...
638
+ ]
639
+
640
+ >>> seer_sdk.get_analysis_protocols(name="YOUR_ANALYSIS_PROTOCOL_NAME_HERE")
641
+ >>> [{ "id": ..., "analysis_protocol_name": ... }]
642
+
643
+ >>> seer_sdk.get_analysis_protocols(id="YOUR_ANALYSIS_PROTOCOL_ID_HERE")
644
+ >>> [{ "id": ..., "analysis_protocol_name": ... }]
645
+
646
+ >>> seer_sdk.get_analysis_protocols(id="YOUR_ANALYSIS_PROTOCOL_ID_HERE", name="YOUR_ANALYSIS_PROTOCOL_NAME_HERE")
647
+
648
+ >>> [{ "id": ..., "analysis_protocol_name": ... }] # in this case the id would supersede the inputted name.
649
+ """
650
+
651
+ URL = (
652
+ f"{self._auth.url}api/v1/analysisProtocols"
653
+ if not analysis_protocol_id
654
+ else f"{self._auth.url}api/v1/analysisProtocols/{analysis_protocol_id}"
655
+ )
656
+ res = []
657
+
658
+ with self._get_auth_session() as s:
659
+
660
+ protocols = s.get(URL, params={"all": "true"})
661
+ if protocols.status_code != 200:
662
+ raise ValueError(
663
+ "Invalid request. Please check your parameters."
664
+ )
665
+ if not analysis_protocol_id and not analysis_protocol_name:
666
+ res = protocols.json()["data"]
667
+
668
+ if analysis_protocol_id and not analysis_protocol_name:
669
+ res = [protocols.json()]
670
+
671
+ if not analysis_protocol_id and analysis_protocol_name:
672
+ res = [
673
+ protocol
674
+ for protocol in protocols.json()["data"]
675
+ if protocol["analysis_protocol_name"]
676
+ == analysis_protocol_name
677
+ ]
678
+
679
+ for entry in range(len(res)):
680
+ if "tenant_id" in res[entry]:
681
+ del res[entry]["tenant_id"]
682
+
683
+ if "parameter_file_path" in res[entry]:
684
+ # Simple lambda function to find the third occurrence of '/' in the raw file path
685
+ location = lambda s: len(s) - len(s.split("/", 3)[-1])
686
+ # Slicing the string from the location
687
+ res[entry]["parameter_file_path"] = res[entry][
688
+ "parameter_file_path"
689
+ ][location(res[entry]["parameter_file_path"]) :]
690
+
691
+ return res
692
+
693
+ def get_analysis(
694
+ self,
695
+ analysis_id: str = None,
696
+ folder_id: str = None,
697
+ show_folders=True,
698
+ analysis_only=True,
699
+ ):
700
+ """
701
+ Returns a list of analyses objects for the authenticated user. If no id is provided, returns all analyses for the authenticated user.
702
+
703
+ Parameters
704
+ ----------
705
+ analysis_id : str, optional
706
+ ID of the analysis to be fetched, defaulted to None.
707
+
708
+ folder_id : str, optional
709
+ ID of the folder to be fetched, defaulted to None.
710
+
711
+ show_folders : bool, optional
712
+ Mark True if folder contents are to be returned in the response, defaulted to True.
713
+ Will be disabled if an analysis id is provided.
714
+
715
+ analysis_only : bool, optional
716
+ Mark True if only analyses objects are to be returned in the response, defaulted to True.
717
+ If marked false, folder objects will also be included in the response.
718
+
719
+ Returns
720
+ -------
721
+ analyses: dict
722
+ Contains a list of analyses objects for the authenticated user.
723
+
724
+ Examples
725
+ -------
726
+ >>> from seer_pas_sdk import SeerSDK
727
+ >>> seer_sdk = SeerSDK()
728
+ >>> seer_sdk.get_analysis()
729
+ >>> [
730
+ {id: "YOUR_ANALYSIS_ID_HERE", ...},
731
+ {id: "YOUR_ANALYSIS_ID_HERE", ...},
732
+ {id: "YOUR_ANALYSIS_ID_HERE", ...}
733
+ ]
734
+
735
+ >>> seer_sdk.get_analyses("YOUR_ANALYSIS_ID_HERE")
736
+ >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
737
+ """
738
+
739
+ URL = f"{self._auth.url}api/v1/analyses"
740
+ res = []
741
+
742
+ with self._get_auth_session() as s:
743
+
744
+ params = {"all": "true"}
745
+ if folder_id:
746
+ params["folder"] = folder_id
747
+
748
+ analyses = s.get(
749
+ f"{URL}/{analysis_id}" if analysis_id else URL, params=params
750
+ )
751
+
752
+ if analyses.status_code != 200:
753
+ raise ValueError(
754
+ "Invalid request. Please check your parameters."
755
+ )
756
+ if not analysis_id:
757
+ res = analyses.json()["data"]
758
+
759
+ else:
760
+ res = [analyses.json()["analysis"]]
761
+
762
+ folders = []
763
+ for entry in range(len(res)):
764
+ if "tenant_id" in res[entry]:
765
+ del res[entry]["tenant_id"]
766
+
767
+ if "parameter_file_path" in res[entry]:
768
+ # Simple lambda function to find the third occurrence of '/' in the raw file path
769
+ location = lambda s: len(s) - len(s.split("/", 3)[-1])
770
+
771
+ # Slicing the string from the location
772
+ res[entry]["parameter_file_path"] = res[entry][
773
+ "parameter_file_path"
774
+ ][location(res[entry]["parameter_file_path"]) :]
775
+
776
+ if (
777
+ show_folders
778
+ and not analysis_id
779
+ and res[entry]["is_folder"]
780
+ ):
781
+ folders.append(res[entry]["id"])
782
+
783
+ # recursive solution to get analyses in folders
784
+ for folder in folders:
785
+ res += self.get_analysis(folder_id=folder)
786
+
787
+ if analysis_only:
788
+ res = [
789
+ analysis for analysis in res if not analysis["is_folder"]
790
+ ]
791
+ return res
792
+
793
+ def get_analysis_result(self, analysis_id: str, download_path: str = ""):
794
+ """
795
+ Given an `analysis_id`, this function returns all relevant analysis data files in form of downloadable content, if applicable.
796
+
797
+ Parameters
798
+ ----------
799
+ analysis_id : str
800
+ ID of the analysis for which the data is to be fetched.
801
+
802
+ download_path : bool
803
+ String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
804
+
805
+ Returns
806
+ -------
807
+ links: dict
808
+ Contains dataframe objects for the `analysis_id`, given that the analysis has been complete.
809
+
810
+ Examples
811
+ -------
812
+ >>> from seer_pas_sdk import SeerSDK
813
+ >>> seer_sdk = SeerSDK()
814
+
815
+ >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE")
816
+ >>> {
817
+ "peptide_np": <peptide_np dataframe object>,
818
+ "peptide_panel": <peptide_panel dataframe object>,
819
+ "protein_np": <protein_np dataframe object>,
820
+ "protein_panel": <protein_panel dataframe object>
821
+ }
822
+
823
+ >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE", download_path="/Users/Downloads")
824
+ >>> { "status": "Download complete." }
825
+ """
826
+
827
+ if not analysis_id:
828
+ raise ValueError("Analysis ID cannot be empty.")
829
+
830
+ if download_path and not os.path.exists(download_path):
831
+ raise ValueError("The download path you entered is invalid.")
832
+
833
+ if self.get_analysis(analysis_id)[0]["status"] in ["FAILED", None]:
834
+ raise ValueError(
835
+ "Cannot generate links for failed or null analyses."
836
+ )
837
+
838
+ URL = f"{self._auth.url}api/v1/data"
839
+
840
+ with self._get_auth_session() as s:
841
+
842
+ protein_data = s.get(
843
+ f"{URL}/protein?analysisId={analysis_id}&retry=false"
844
+ )
845
+
846
+ if protein_data.status_code != 200:
847
+ raise ValueError(
848
+ "Invalid request. Could not fetch protein data. Please check your parameters."
849
+ )
850
+ protein_data = protein_data.json()
851
+
852
+ peptide_data = s.get(
853
+ f"{URL}/peptide?analysisId={analysis_id}&retry=false"
854
+ )
855
+
856
+ if peptide_data.status_code != 200:
857
+ raise ValueError(
858
+ "Invalid request. Could not fetch peptide data. Please check your parameters."
859
+ )
860
+
861
+ peptide_data = peptide_data.json()
862
+
863
+ links = {
864
+ "peptide_np": url_to_df(peptide_data["npLink"]["url"]),
865
+ "peptide_panel": url_to_df(peptide_data["panelLink"]["url"]),
866
+ "protein_np": url_to_df(protein_data["npLink"]["url"]),
867
+ "protein_panel": url_to_df(protein_data["panelLink"]["url"]),
868
+ }
869
+
870
+ if download_path:
871
+ name = f"{download_path}/downloads/{analysis_id}"
872
+ if not os.path.exists(name):
873
+ os.makedirs(name)
874
+
875
+ links["peptide_np"].to_csv(f"{name}/peptide_np.csv", sep="\t")
876
+ links["peptide_panel"].to_csv(
877
+ f"{name}/peptide_panel.csv", sep="\t"
878
+ )
879
+ links["protein_np"].to_csv(f"{name}/protein_np.csv", sep="\t")
880
+ links["protein_panel"].to_csv(
881
+ f"{name}/protein_panel.csv", sep="\t"
882
+ )
883
+
884
+ return {"status": "Download complete."}
885
+
886
+ return links
887
+
888
+ def analysis_complete(self, analysis_id: str):
889
+ """
890
+ Returns the status of the analysis with the given id.
891
+
892
+ Parameters
893
+ ----------
894
+ analysis_id : str
895
+ The analysis id.
896
+
897
+ Returns
898
+ -------
899
+ res : dict
900
+ A dictionary containing the status of the analysis.
901
+
902
+ Examples
903
+ -------
904
+ >>> from seer_pas_sdk import SeerSDK
905
+ >>> seer_sdk = SeerSDK()
906
+ >>> seer_sdk.analysis_complete("YOUR_ANALYSIS_ID_HERE")
907
+ >>> {
908
+ "status": "SUCCEEDED"
909
+ }
910
+ """
911
+
912
+ if not analysis_id:
913
+ raise ValueError("Analysis id cannot be empty.")
914
+
915
+ try:
916
+ res = self.get_analysis(analysis_id)
917
+ except ValueError:
918
+ return ValueError("Analysis not found. Your ID could be incorrect")
919
+
920
+ return {"status": res[0]["status"]}
921
+
922
+ def list_ms_data_files(self, folder="", space=None):
923
+ """
924
+ Lists all the MS data files in the given folder as long as the folder path passed in the params is valid.
925
+
926
+ Parameters
927
+ ----------
928
+ folder : str, optional
929
+ Folder path to list the files from. Defaults to an empty string and displays all files for the user.
930
+ space : str, optional
931
+ ID of the user group to which the files belong, defaulted to None.
932
+
933
+ Returns
934
+ -------
935
+ list
936
+ Contains the list of files in the folder.
937
+
938
+ Examples
939
+ -------
940
+ >>> from seer_pas_sdk import SeerSDK
941
+ >>> sdk = SeerSDK()
942
+ >>> folder_path = "test-may-2/"
943
+ >>> sdk.list_ms_data_files(folder_path)
944
+ >>> [
945
+ "test-may-2/EXP20028/EXP20028_2020ms0096X10_A.raw",
946
+ "test-may-2/agilent/05_C2_19ug-r001.d.zip",
947
+ "test-may-2/agilent/08_BC_24ug-r001.d.zip",
948
+ "test-may-2/d.zip/EXP22023_2022ms0143bX10_A_GA2_1_6681.d/EXP22023_2022ms0143bX10_A_GA2_1_6681.d.zip",
949
+ "test-may-2/DIA/EXP20002_2020ms0142X10_A.wiff",
950
+ "test-may-2/DIA/EXP20002_2020ms0142X10_A.wiff.scan",
951
+ "test-may-2/DIA/EXP20002_2020ms0142X17_A.wiff",
952
+ "test-may-2/DIA/EXP20002_2020ms0142X17_A.wiff.scan",
953
+ "test-may-2/DIA/EXP20002_2020ms0142X18_A.wiff",
954
+ "test-may-2/DIA/EXP20002_2020ms0142X18_A.wiff.scan"
955
+ ]
956
+ """
957
+
958
+ URL = (
959
+ f"{self._auth.url}api/v1/msdataindex/filesinfolder?folder={folder}"
960
+ if not space
961
+ else f"{self._auth.url}api/v1/msdataindex/filesinfolder?folder={folder}&userGroupId={space}"
962
+ )
963
+ with self._get_auth_session() as s:
964
+
965
+ files = s.get(URL)
966
+
967
+ if files.status_code != 200:
968
+ raise ValueError(
969
+ "Invalid request. Please check your parameters."
970
+ )
971
+ return files.json()["filesList"]
972
+
973
+ def download_ms_data_files(
974
+ self, paths: _List[str], download_path: str, space: str = None
975
+ ):
976
+ """
977
+ Downloads all MS data files for paths passed in the params to the specified download path.
978
+
979
+ Parameters
980
+ ----------
981
+ paths : list[str]
982
+ List of paths to download.
983
+ download_path : str
984
+ Path to download the files to.
985
+ space : str, optional
986
+ ID of the user group to which the files belongs, defaulted to None.
987
+
988
+ Returns
989
+ -------
990
+ message: dict
991
+ Contains the message whether the files were downloaded or not.
992
+ """
993
+
994
+ urls = []
995
+
996
+ if not download_path:
997
+ download_path = os.getcwd()
998
+ print(f"\nDownload path not specified.\n")
999
+
1000
+ if not os.path.isdir(download_path):
1001
+ print(
1002
+ f'\nThe path "{download_path}" you specified does not exist, was either invalid or not absolute.\n'
1003
+ )
1004
+ download_path = f"{os.getcwd()}/downloads"
1005
+
1006
+ name = (
1007
+ download_path if download_path[-1] != "/" else download_path[:-1]
1008
+ )
1009
+
1010
+ if not os.path.exists(name):
1011
+ os.makedirs(name)
1012
+
1013
+ print(f'Downloading files to "{name}"\n')
1014
+
1015
+ URL = f"{self._auth.url}api/v1/msdataindex/download/getUrl"
1016
+ tenant_id = jwt.decode(ID_TOKEN, options={"verify_signature": False})[
1017
+ "custom:tenantId"
1018
+ ]
1019
+
1020
+ for path in paths:
1021
+ with self._get_auth_session() as s:
1022
+
1023
+ download_url = s.post(
1024
+ URL,
1025
+ json={
1026
+ "filepath": f"{tenant_id}/{path}",
1027
+ "userGroupId": space,
1028
+ },
1029
+ )
1030
+
1031
+ if download_url.status_code != 200:
1032
+ raise ValueError(
1033
+ "Could not download file. Please check if the backend is running."
1034
+ )
1035
+ urls.append(download_url.text)
1036
+
1037
+ for i in range(len(urls)):
1038
+ filename = paths[i].split("/")[-1]
1039
+ url = urls[i]
1040
+
1041
+ print(f"Downloading {filename}")
1042
+
1043
+ for _ in range(2):
1044
+ try:
1045
+ with tqdm(
1046
+ unit="B",
1047
+ unit_scale=True,
1048
+ unit_divisor=1024,
1049
+ miniters=1,
1050
+ desc=f"Progress",
1051
+ ) as t:
1052
+ ssl._create_default_https_context = (
1053
+ ssl._create_unverified_context
1054
+ )
1055
+ urllib.request.urlretrieve(
1056
+ url,
1057
+ f"{name}/{filename}",
1058
+ reporthook=download_hook(t),
1059
+ data=None,
1060
+ )
1061
+ break
1062
+ except:
1063
+ filename = filename.split("/")
1064
+ name += "/" + "/".join(
1065
+ [filename[i] for i in range(len(filename) - 1)]
1066
+ )
1067
+ filename = filename[-1]
1068
+ if not os.path.isdir(f"{name}/{filename}"):
1069
+ os.makedirs(f"{name}/")
1070
+
1071
+ else:
1072
+ raise ValueError(
1073
+ "Your download failed. Please check if the backend is still running."
1074
+ )
1075
+
1076
+ print(f"Finished downloading {filename}\n")
1077
+
1078
+ return {"message": f"Files downloaded successfully to '{name}'"}
1079
+
1080
+ def group_analysis_results(self, analysis_id: str, box_plot: dict = None):
1081
+ """
1082
+ Returns the group analysis data for the given analysis id, provided it exists.
1083
+
1084
+ Parameters
1085
+ ----------
1086
+ analysis_id : str
1087
+ The analysis id.
1088
+
1089
+ box_plot : dict, optional
1090
+ The box plot configuration needed for the analysis, defaulted to None. Contains `feature_type` ("protein" or "peptide") and `feature_ids` (comma separated list of feature IDs) keys.
1091
+
1092
+ Returns
1093
+ -------
1094
+ res : dict
1095
+ A dictionary containing the group analysis data.
1096
+
1097
+ Examples
1098
+ -------
1099
+ >>> from seer_pas_sdk import SeerSDK
1100
+ >>> seer_sdk = SeerSDK()
1101
+ >>> seer_sdk.group_analysis_results("YOUR_ANALYSIS_ID_HERE")
1102
+ >>> {
1103
+ "pre": {
1104
+ "protein": [],
1105
+ "peptide": [],
1106
+ },
1107
+ "post": {
1108
+ "protein": {},
1109
+ "protein_url": {
1110
+ "protein_processed_file_url": "",
1111
+ "protein_processed_long_form_file_url": "",
1112
+ },
1113
+ "peptide": {},
1114
+ "peptide_url": {
1115
+ "peptide_processed_file_url": "",
1116
+ "peptide_processed_long_form_file_url": "",
1117
+ },
1118
+ },
1119
+ "box_plot": []
1120
+ }
1121
+ """
1122
+
1123
+ if not analysis_id:
1124
+ raise ValueError("Analysis ID cannot be empty.")
1125
+
1126
+ URL = f"{self._auth.url}"
1127
+
1128
+ res = {
1129
+ "pre": {
1130
+ "protein": [],
1131
+ "peptide": [],
1132
+ },
1133
+ "post": {
1134
+ "protein": {},
1135
+ "protein_url": {
1136
+ "protein_processed_file_url": "",
1137
+ "protein_processed_long_form_file_url": "",
1138
+ },
1139
+ "peptide": {},
1140
+ "peptide_url": {
1141
+ "peptide_processed_file_url": "",
1142
+ "peptide_processed_long_form_file_url": "",
1143
+ },
1144
+ },
1145
+ "box_plot": [],
1146
+ }
1147
+
1148
+ # Pre-GA data call
1149
+ with self._get_auth_session() as s:
1150
+
1151
+ protein_pre_data = s.post(
1152
+ url=f"{URL}api/v2/groupanalysis/protein",
1153
+ json={"analysisId": analysis_id, "grouping": "condition"},
1154
+ )
1155
+ if protein_pre_data.status_code != 200:
1156
+ raise ValueError(
1157
+ "Invalid request. Could not fetch group analysis protein pre data. Please check your parameters."
1158
+ )
1159
+
1160
+ protein_pre_data = protein_pre_data.json()
1161
+
1162
+ res["pre"]["protein"] = protein_pre_data
1163
+
1164
+ with requests.Session() as s:
1165
+ s.headers.update(HEADERS)
1166
+
1167
+ peptide_pre_data = s.post(
1168
+ url=f"{URL}api/v2/groupanalysis/peptide",
1169
+ json={"analysisId": analysis_id, "grouping": "condition"},
1170
+ )
1171
+ if peptide_pre_data.status_code != 200:
1172
+ raise ValueError(
1173
+ "Invalid request. Could not fetch group analysis peptide pre data. Please check your parameters."
1174
+ )
1175
+
1176
+ peptide_pre_data = peptide_pre_data.json()
1177
+ res["pre"]["peptide"] = peptide_pre_data
1178
+
1179
+ # Post-GA data call
1180
+ with requests.Session() as s:
1181
+ s.headers.update(HEADERS)
1182
+
1183
+ get_saved_result = s.get(
1184
+ f"{URL}api/v1/groupanalysis/getSavedResults?analysisid={analysis_id}"
1185
+ )
1186
+
1187
+ if get_saved_result.status_code != 200:
1188
+ raise ValueError(
1189
+ "Invalid request. Could not fetch group analysis post data. Please check your parameters."
1190
+ )
1191
+ get_saved_result = get_saved_result.json()
1192
+
1193
+ # Protein data
1194
+ if "pgResult" in get_saved_result:
1195
+ res["post"]["protein"] = get_saved_result["pgResult"]
1196
+
1197
+ # Peptide data
1198
+ if "peptideResult" in get_saved_result:
1199
+ res["post"]["peptide"] = get_saved_result["peptideResult"]
1200
+
1201
+ # Protein URLs
1202
+ if "pgProcessedFileUrl" in get_saved_result:
1203
+ res["post"]["protein_url"]["protein_processed_file_url"] = (
1204
+ get_saved_result["pgProcessedFileUrl"]
1205
+ )
1206
+ if "pgProcessedLongFormFileUrl" in get_saved_result:
1207
+ res["post"]["protein_url"][
1208
+ "protein_processed_long_form_file_url"
1209
+ ] = get_saved_result["pgProcessedLongFormFileUrl"]
1210
+
1211
+ # Peptide URLs
1212
+ if "peptideProcessedFileUrl" in get_saved_result:
1213
+ res["post"]["peptide_url"]["peptide_processed_file_url"] = (
1214
+ get_saved_result["peptideProcessedFileUrl"]
1215
+ )
1216
+
1217
+ if "peptideProcessedLongFormFileUrl" in get_saved_result:
1218
+ res["post"]["peptide_url"][
1219
+ "peptide_processed_long_form_file_url"
1220
+ ] = get_saved_result["peptideProcessedLongFormFileUrl"]
1221
+
1222
+ # Box plot data call
1223
+ if not box_plot:
1224
+ del res["box_plot"]
1225
+ return res
1226
+
1227
+ with requests.Session() as s:
1228
+ s.headers.update(HEADERS)
1229
+ box_plot["feature_type"] = box_plot["feature_type"].lower()
1230
+ box_plot_data = s.post(
1231
+ url=f"{URL}api/v1/groupanalysis/rawdata",
1232
+ json={
1233
+ "analysisId": analysis_id,
1234
+ "featureIds": (
1235
+ ",".join(box_plot["feature_ids"])
1236
+ if len(box_plot["feature_ids"]) > 1
1237
+ else box_plot["feature_ids"][0]
1238
+ ),
1239
+ "featureType": f"{box_plot['feature_type']}group",
1240
+ },
1241
+ )
1242
+ if box_plot_data.status_code != 200:
1243
+ raise ValueError(
1244
+ "Invalid request, could not fetch box plot data. Please verify your 'box_plot' parameters, including 'feature_ids' (comma-separated list of feature IDs) and 'feature_type' (needs to be a either 'protein' or 'peptide')."
1245
+ )
1246
+
1247
+ box_plot_data = box_plot_data.json()
1248
+ res["box_plot"] = box_plot_data
1249
+
1250
+ return res