seer-pas-sdk 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1307 +1 @@
1
- from tqdm import tqdm
2
-
3
- import os
4
- import jwt
5
- import requests
6
- import urllib.request
7
- import ssl
8
- import shutil
9
-
10
- from typing import List as _List
11
-
12
- from ..common import *
13
- from ..auth import Auth
14
- from ..objects import PlateMap
15
-
16
-
17
- class SeerSDK:
18
- """
19
- Object exposing SDK methods. Requires a username and password; the optional `instance` param denotes the instance of PAS (defaults to "US").
20
-
21
- Examples
22
- -------
23
- >>> from seer_pas_sdk import SeerSDK
24
- >>> USERNAME = "test"
25
- >>> PASSWORD = "test-password"
26
- >>> INSTANCE = "EU"
27
- >>> seer_sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE)
28
- """
29
-
30
- def __init__(self, username, password, instance="US"):
31
- try:
32
- self._auth = Auth(username, password, instance)
33
-
34
- self._auth.get_token()
35
-
36
- print(f"User '{username}' logged in.\n")
37
-
38
- except:
39
- raise ValueError(
40
- "Could not log in.\nPlease check your credentials and/or instance."
41
- )
42
-
43
- def get_spaces(self):
44
- """
45
- Fetches a list of spaces for the authenticated user.
46
-
47
- Returns
48
- -------
49
- spaces: list
50
- List of space objects for the authenticated user.
51
-
52
- Examples
53
- -------
54
- >>> from seer_pas_sdk import SeerSDK
55
- >>> seer_sdk = SeerSDK()
56
- >>> seer_sdk.get_spaces()
57
- >>> [
58
- { "usergroup_name": ... },
59
- { "usergroup_name": ... },
60
- ...
61
- ]
62
- """
63
-
64
- ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
65
- HEADERS = {
66
- "Authorization": f"{ID_TOKEN}",
67
- "access-token": f"{ACCESS_TOKEN}",
68
- }
69
- URL = f"{self._auth.url}api/v1/usergroups"
70
-
71
- with requests.Session() as s:
72
- s.headers.update(HEADERS)
73
-
74
- spaces = s.get(URL)
75
-
76
- if spaces.status_code != 200:
77
- raise ValueError(
78
- "Invalid request. Please check your parameters."
79
- )
80
- return spaces.json()
81
-
82
- def get_plate_metadata(self, plate_id: str = None, df: bool = False):
83
- """
84
- Fetches a list of plates for the authenticated user. If no `plate_id` is provided, returns all plates for the authenticated user. If `plate_id` is provided, returns the plate with the given `plate_id`, provided it exists.
85
-
86
- Parameters
87
- ----------
88
- plate_id : str, optional
89
- ID of the plate to be fetched, defaulted to None.
90
- df: bool
91
- Boolean denoting whether the user wants the response back in JSON or a DataFrame object
92
-
93
- Returns
94
- -------
95
- plates: list or DataFrame
96
- List/DataFrame of plate objects for the authenticated user.
97
-
98
- Examples
99
- -------
100
- >>> from seer_pas_sdk import SeerSDK
101
- >>> seer_sdk = SeerSDK()
102
- >>> seer_sdk.get_plate_metadata()
103
- >>> [
104
- { "id": ... },
105
- { "id": ... },
106
- ...
107
- ]
108
- >>> seer_sdk.get_plate_metadata(df=True)
109
- >>> id ... user_group
110
- 0 a7c12190-15da-11ee-bdf1-bbaa73585acf ... None
111
- 1 8c3b1480-15da-11ee-bdf1-bbaa73585acf ... None
112
- 2 6f158840-15da-11ee-bdf1-bbaa73585acf ... None
113
- 3 1a8a2920-15da-11ee-bdf1-bbaa73585acf ... None
114
- 4 7ab47f40-15d9-11ee-bdf1-bbaa73585acf ... None
115
- .. ... ... ...
116
- 935 8fa91c00-6621-11ea-96e3-d5a4dab4ebf6 ... None
117
- 936 53180b20-6621-11ea-96e3-d5a4dab4ebf6 ... None
118
- 937 5c31fe90-6618-11ea-96e3-d5a4dab4ebf6 ... None
119
- 938 5b05d440-6610-11ea-96e3-d5a4dab4ebf6 ... None
120
- 939 9872e3f0-544e-11ea-ad9e-1991e0725494 ... None
121
-
122
- >>> seer_sdk.get_plate_metadata(id="YOUR_PLATE_ID_HERE")
123
- >>> [{ "id": ... }]
124
- """
125
-
126
- ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
127
- HEADERS = {
128
- "Authorization": f"{ID_TOKEN}",
129
- "access-token": f"{ACCESS_TOKEN}",
130
- }
131
- URL = f"{self._auth.url}api/v1/plates"
132
- res = []
133
-
134
- with requests.Session() as s:
135
- s.headers.update(HEADERS)
136
-
137
- plates = s.get(
138
- f"{URL}/{plate_id}" if plate_id else URL,
139
- params={"all": "true"},
140
- )
141
- if plates.status_code != 200:
142
- raise ValueError(
143
- "Invalid request. Please check your parameters."
144
- )
145
- if not plate_id:
146
- res = plates.json()["data"]
147
- else:
148
- res = [plates.json()]
149
-
150
- for entry in res:
151
- del entry["tenant_id"]
152
-
153
- return res if not df else dict_to_df(res)
154
-
155
- def get_project_metadata(self, project_id: str = None, df: bool = False):
156
- """
157
- Fetches a list of projects for the authenticated user. If no `project_id` is provided, returns all projects for the authenticated user. If `project_id` is provided, returns the project with the given `project_id`, provided it exists.
158
-
159
- Parameters
160
- ----------
161
- project_id: str, optional
162
- Project ID of the project to be fetched, defaulted to None.
163
- df: bool
164
- Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
165
-
166
- Returns
167
- -------
168
- projects: list or DataFrame
169
- DataFrame or list of project objects for the authenticated user.
170
-
171
- Examples
172
- -------
173
- >>> from seer_pas_sdk import SeerSDK
174
- >>> seer_sdk = SeerSDK()
175
- >>> seer_sdk.get_project_metadata()
176
- >>> [
177
- { "project_name": ... },
178
- { "project_name": ... },
179
- ...
180
- ]
181
-
182
- >>> seer_sdk.get_project_metadata(df=True)
183
- >>> id ... user_group
184
- 0 a7c12190-15da-11ee-bdf1-bbaa73585acf ... None
185
- 1 8c3b1480-15da-11ee-bdf1-bbaa73585acf ... None
186
- 2 6f158840-15da-11ee-bdf1-bbaa73585acf ... None
187
- 3 1a8a2920-15da-11ee-bdf1-bbaa73585acf ... None
188
- 4 7ab47f40-15d9-11ee-bdf1-bbaa73585acf ... None
189
- .. ... ... ...
190
- 935 8fa91c00-6621-11ea-96e3-d5a4dab4ebf6 ... None
191
- 936 53180b20-6621-11ea-96e3-d5a4dab4ebf6 ... None
192
- 937 5c31fe90-6618-11ea-96e3-d5a4dab4ebf6 ... None
193
- 938 5b05d440-6610-11ea-96e3-d5a4dab4ebf6 ... None
194
- 939 9872e3f0-544e-11ea-ad9e-1991e0725494 ... None
195
-
196
- >>> seer_sdk.get_project_metadata(id="YOUR_PROJECT_ID_HERE")
197
- >>> [{ "project_name": ... }]
198
- """
199
-
200
- ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
201
- HEADERS = {
202
- "Authorization": f"{ID_TOKEN}",
203
- "access-token": f"{ACCESS_TOKEN}",
204
- }
205
- URL = (
206
- f"{self._auth.url}api/v1/projects"
207
- if not project_id
208
- else f"{self._auth.url}api/v1/projects/{project_id}"
209
- )
210
- res = []
211
-
212
- with requests.Session() as s:
213
- s.headers.update(HEADERS)
214
-
215
- projects = s.get(URL, params={"all": "true"})
216
- if projects.status_code != 200:
217
- raise ValueError(
218
- "Invalid request. Please check your parameters."
219
- )
220
- if not project_id:
221
- res = projects.json()["data"]
222
- else:
223
- res = [projects.json()]
224
-
225
- for entry in res:
226
- if "tenant_id" in entry:
227
- del entry["tenant_id"]
228
-
229
- if "raw_file_path" in entry:
230
- # Simple lambda function to find the third occurrence of '/' in the raw file path
231
- location = lambda s: len(s) - len(s.split("/", 3)[-1])
232
- # Slicing the string from the location
233
- entry["raw_file_path"] = entry["raw_file_path"][
234
- location(entry["raw_file_path"]) :
235
- ]
236
- return res if not df else dict_to_df(res)
237
-
238
- def _get_samples_metadata(
239
- self, plate_id: str = None, project_id: str = None, df: bool = False
240
- ):
241
- """
242
- ****************
243
- [UNEXPOSED METHOD CALL]
244
- ****************
245
-
246
- Fetches a list of samples for the authenticated user, filtered by `plate_id`. Returns all samples for the plate with the given `plate_id`, provided it exists.
247
-
248
- If both `plate_id` and `project_id` are passed in, only the `plate_id` is validated first.
249
-
250
- Parameters
251
- ----------
252
- plate_id : str, optional
253
- ID of the plate for which samples are to be fetched, defaulted to None.
254
- project_id : str, optional
255
- ID of the project for which samples are to be fetched, defaulted to None.
256
- df: bool
257
- Boolean denoting whether the user wants the response back in JSON or a DataFrame object
258
-
259
- Returns
260
- -------
261
- samples: list or DataFrame
262
- List/DataFrame of samples for the authenticated user.
263
-
264
- Examples
265
- -------
266
- >>> from seer_pas_sdk import SeerSDK
267
- >>> seer_sdk = SeerSDK()
268
-
269
- >>> seer_sdk._get_samples_metadata(plate_id="7ec8cad0-15e0-11ee-bdf1-bbaa73585acf")
270
- >>> [
271
- { "id": ... },
272
- { "id": ... },
273
- ...
274
- ]
275
-
276
- >>> seer_sdk._get_samples_metadata(df=True)
277
- >>> id ... control
278
- 0 812139c0-15e0-11ee-bdf1-bbaa73585acf ...
279
- 1 803e05b0-15e0-11ee-bdf1-bbaa73585acf ... MPE Control
280
- 2 a9b26a40-15da-11ee-bdf1-bbaa73585acf ...
281
- 3 a8fc87c0-15da-11ee-bdf1-bbaa73585acf ... MPE Control
282
- 4 8e322990-15da-11ee-bdf1-bbaa73585acf ...
283
- ... ... ... ...
284
- 3624 907e1f40-6621-11ea-96e3-d5a4dab4ebf6 ... C132
285
- 3625 53e59450-6621-11ea-96e3-d5a4dab4ebf6 ... C132
286
- 3626 5d11b030-6618-11ea-96e3-d5a4dab4ebf6 ... C132
287
- 3627 5bdf9270-6610-11ea-96e3-d5a4dab4ebf6 ... C132
288
- 3628 dd607ef0-654c-11ea-8eb2-25a1cfd1163c ... C132
289
- """
290
-
291
- if not plate_id and not project_id:
292
- raise ValueError("You must pass in plate ID or project ID.")
293
-
294
- res = []
295
- ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
296
- HEADERS = {
297
- "Authorization": f"{ID_TOKEN}",
298
- "access-token": f"{ACCESS_TOKEN}",
299
- }
300
- URL = f"{self._auth.url}api/v1/samples"
301
- sample_params = {"all": "true"}
302
-
303
- with requests.Session() as s:
304
- s.headers.update(HEADERS)
305
-
306
- if plate_id:
307
- try:
308
- self.get_plate_metadata(plate_id)
309
- except:
310
- raise ValueError(
311
- "Plate ID is invalid. Please check your parameters and see if the backend is running."
312
- )
313
- sample_params["plateId"] = plate_id
314
-
315
- elif project_id:
316
- try:
317
- self.get_project_metadata(project_id)
318
- except:
319
- raise ValueError(
320
- "Project ID is invalid. Please check your parameters and see if the backend is running."
321
- )
322
-
323
- sample_params["projectId"] = project_id
324
-
325
- samples = s.get(URL, params=sample_params)
326
- if samples.status_code != 200:
327
- raise ValueError(
328
- "Invalid request. Please check if your plate ID has any samples associated with it."
329
- )
330
- res = samples.json()["data"]
331
-
332
- for entry in res:
333
- del entry["tenant_id"]
334
-
335
- # Exclude custom fields that don't belong to the tenant
336
- res_df = dict_to_df(res)
337
- custom_columns = [
338
- x["field_name"] for x in self.get_sample_custom_fields()
339
- ]
340
- res_df = res_df[
341
- [
342
- x
343
- for x in res_df.columns
344
- if not x.startswith("custom_") or x in custom_columns
345
- ]
346
- ]
347
-
348
- return res_df.to_dict(orient="records") if not df else res_df
349
-
350
- def get_sample_custom_fields(self):
351
- """
352
- Fetches a list of custom fields defined for the authenticated user.
353
- """
354
- ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
355
- HEADERS = {
356
- "Authorization": f"{ID_TOKEN}",
357
- "access-token": f"{ACCESS_TOKEN}",
358
- }
359
- URL = f"{self._auth.url}api/v1/samplefields"
360
-
361
- with requests.Session() as s:
362
- s.headers.update(HEADERS)
363
-
364
- fields = s.get(URL)
365
-
366
- if fields.status_code != 200:
367
- raise ValueError(
368
- "Failed to fetch custom columns. Please check your connection."
369
- )
370
-
371
- res = fields.json()
372
- for entry in res:
373
- del entry["tenant_id"]
374
- return res
375
-
376
- def get_msdata(self, sample_ids: list, df: bool = False):
377
- """
378
- Fetches MS data files for passed in `sample_ids` (provided they are valid and contain relevant files) for an authenticated user.
379
-
380
- The function returns a dict containing DataFrame objects if the `df` flag is passed in as True, otherwise a nested dict object is returned instead.
381
-
382
- Parameters
383
- ----------
384
- sample_ids : list
385
- List of unique sample IDs.
386
- df: bool
387
- Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
388
-
389
- Returns
390
- -------
391
- res: list or DataFrame
392
- List/DataFrame of plate objects for the authenticated user.
393
-
394
- Examples
395
- -------
396
- >>> from seer_pas_sdk import SeerSDK
397
- >>> seer_sdk = SeerSDK()
398
- >>> sample_ids = ["812139c0-15e0-11ee-bdf1-bbaa73585acf", "803e05b0-15e0-11ee-bdf1-bbaa73585acf"]
399
-
400
- >>> seer_sdk.get_msdata(sample_ids)
401
- >>> [
402
- {"id": "SAMPLE_ID_1_HERE" ... },
403
- {"id": "SAMPLE_ID_2_HERE" ... }
404
- ]
405
-
406
- >>> seer_sdk.get_msdata(sample_ids, df=True)
407
- >>> id ... gradient
408
- 0 81c6a180-15e0-11ee-bdf1-bbaa73585acf ... None
409
- 1 816a9ed0-15e0-11ee-bdf1-bbaa73585acf ... None
410
-
411
- [2 rows x 26 columns]
412
- """
413
- res = []
414
- for sample_id in sample_ids:
415
- ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
416
- HEADERS = {
417
- "Authorization": f"{ID_TOKEN}",
418
- "access-token": f"{ACCESS_TOKEN}",
419
- }
420
- URL = f"{self._auth.url}api/v1/msdatas/items"
421
-
422
- with requests.Session() as s:
423
- s.headers.update(HEADERS)
424
-
425
- msdatas = s.post(URL, json={"sampleId": sample_id})
426
-
427
- if msdatas.status_code != 200 or not msdatas.json()["data"]:
428
- raise ValueError(
429
- "Failed to fetch MS data for your plate ID."
430
- )
431
-
432
- res.append(msdatas.json()["data"][0])
433
-
434
- for entry in res:
435
- if "tenant_id" in entry:
436
- del entry["tenant_id"]
437
-
438
- if "raw_file_path" in entry:
439
- # Simple lambda function to find the third occurrence of '/' in the raw file path
440
- location = lambda s: len(s) - len(s.split("/", 3)[-1])
441
- # Slicing the string from the location
442
- entry["raw_file_path"] = entry["raw_file_path"][
443
- location(entry["raw_file_path"]) :
444
- ]
445
- return res if not df else dict_to_df(res)
446
-
447
- def get_plate(self, plate_id: str, df: bool = False):
448
- """
449
- Fetches MS data files for a `plate_id` (provided that the `plate_id` is valid and has samples associated with it) for an authenticated user.
450
-
451
- The function returns a dict containing DataFrame objects if the `df` flag is passed in as True, otherwise a nested dict object is returned instead.
452
-
453
- Parameters
454
- ----------
455
- plate_id : str, optional
456
- ID of the plate for which samples are to be fetched, defaulted to None.
457
- df: bool
458
- Boolean denoting whether the user wants the response back in JSON or a DataFrame object
459
-
460
- Returns
461
- -------
462
- res: list or DataFrame
463
- List/DataFrame of MS data file objects for the authenticated user.
464
-
465
- Examples
466
- -------
467
- >>> from seer_pas_sdk import SeerSDK
468
- >>> seer_sdk = SeerSDK()
469
- >>> plate_id = "7ec8cad0-15e0-11ee-bdf1-bbaa73585acf"
470
-
471
- >>> seer_sdk.get_plate(plate_id)
472
- >>> [
473
- {"id": "PLATE_ID_1_HERE" ... },
474
- {"id": "PLATE_ID_2_HERE" ... }
475
- ]
476
-
477
- >>> seer_sdk.get_plate(plate_id, df=True)
478
- >>> id ... volume
479
- 0 PLATE_ID_1_HERE ... None
480
- 1 PLATE_ID_2_HERE ... None
481
-
482
- [2 rows x 26 columns]
483
- """
484
- plate_samples = self._get_samples_metadata(plate_id=plate_id)
485
- sample_ids = [sample["id"] for sample in plate_samples]
486
- return self.get_msdata(sample_ids, df)
487
-
488
- def get_project(
489
- self, project_id: str, msdata: bool = False, df: bool = False
490
- ):
491
- """
492
- Fetches samples (and MS data files) for a `project_id` (provided that the `project_id` is valid and has samples associated with it) for an authenticated user.
493
-
494
- The function returns a DataFrame object if the `df` flag is passed in as True, otherwise a nested dict object is returned instead. If the both the `df` and `msdata` flags are passed in as True, then a nested DataFrame object is returned instead.
495
-
496
- Parameters
497
- ----------
498
- project_id : str
499
- ID of the project for which samples are to be fetched.
500
- msdata: bool, optional
501
- Boolean flag denoting whether the user wants relevant MS data files associated with the samples.
502
- df: bool, optional
503
- Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
504
-
505
- Returns
506
- -------
507
- res: list or DataFrame
508
- List/DataFrame of plate objects for the authenticated user.
509
-
510
- Examples
511
- -------
512
- >>> from seer_pas_sdk import SeerSDK
513
- >>> seer_sdk = SeerSDK()
514
- >>> project_id = "7e48e150-8a47-11ed-b382-bf440acece26"
515
-
516
- >>> seer_sdk.get_project(project_id=project_id, msdata=False, df=False)
517
- >>> {
518
- "project_samples": [
519
- {
520
- "id": "SAMPLE_ID_1_HERE",
521
- "sample_type": "Plasma",
522
- ...
523
- ...
524
- },
525
- {
526
- "id": "SAMPLE_ID_2_HERE",
527
- "sample_type": "Plasma",
528
- ...
529
- ...
530
- }
531
- ]
532
- }
533
-
534
- >>> seer_sdk.get_project(project_id=project_id, msdata=True, df=False)
535
- >>> [
536
- {
537
- "id": "SAMPLE_ID_1_HERE",
538
- "sample_type": "Plasma",
539
- ...
540
- ...
541
- "ms_data_files": [
542
- {
543
- "id": MS_DATA_FILE_ID_1_HERE,
544
- "tenant_id": "TENANT_ID_HERE",
545
- ...
546
- ...
547
- },
548
- {
549
- "id": MS_DATA_FILE_ID_1_HERE,
550
- "tenant_id": "TENANT_ID_HERE",
551
- ...
552
- ...
553
- }
554
- ]
555
- },
556
- {
557
- "id": "SAMPLE_ID_2_HERE",
558
- "sample_type": "Plasma",
559
- ...
560
- ...
561
- "ms_data_files": [
562
- {
563
- "id": MS_DATA_FILE_ID_2_HERE,
564
- "tenant_id": "TENANT_ID_HERE",
565
- ...
566
- ...
567
- },
568
- {
569
- "id": MS_DATA_FILE_ID_2_HERE,
570
- "tenant_id": "TENANT_ID_HERE",
571
- ...
572
- ...
573
- }
574
- ]
575
- }
576
- ]
577
-
578
- >>> seer_sdk.get_project(project_id=project_id, msdata=True, df=True)
579
- >>> id ... ms_data_files
580
- 0 829509f0-8a47-11ed-b382-bf440acece26 ... id ... g...
581
- 1 828d41c0-8a47-11ed-b382-bf440acece26 ... id ... g...
582
- 2 8294e2e0-8a47-11ed-b382-bf440acece26 ... id ... g...
583
- 3 8285eec0-8a47-11ed-b382-bf440acece26 ... id ... g...
584
-
585
- [4 rows x 60 columns]
586
- """
587
- if not project_id:
588
- return ValueError("No project ID specified.")
589
-
590
- sample_ids = []
591
- project_samples = self._get_samples_metadata(
592
- project_id=project_id, df=False
593
- )
594
-
595
- if msdata:
596
- sample_ids = [
597
- sample["id"] for sample in project_samples
598
- ] # will always contain unique values
599
- ms_data_files = self.get_msdata(sample_ids=sample_ids, df=False)
600
-
601
- for ms_data_file in ms_data_files:
602
- for sample_index in range(len(project_samples)):
603
- if (
604
- project_samples[sample_index]["id"]
605
- == ms_data_file["sample_id"]
606
- ):
607
- if "ms_data_file" not in project_samples[sample_index]:
608
- project_samples[sample_index]["ms_data_files"] = [
609
- ms_data_file
610
- ]
611
- else:
612
- project_samples[sample_index][
613
- "ms_data_files"
614
- ].append(ms_data_file)
615
-
616
- if df:
617
- for sample_index in range(len(project_samples)):
618
- if "ms_data_files" in project_samples[sample_index]:
619
- project_samples[sample_index]["ms_data_files"] = (
620
- dict_to_df(
621
- project_samples[sample_index]["ms_data_files"]
622
- )
623
- )
624
-
625
- project_samples = dict_to_df(project_samples)
626
-
627
- return project_samples
628
-
629
- def get_analysis_protocols(
630
- self,
631
- analysis_protocol_name: str = None,
632
- analysis_protocol_id: str = None,
633
- ):
634
- """
635
- Fetches a list of analysis protocols for the authenticated user. If no `analysis_protocol_id` is provided, returns all analysis protocols for the authenticated user. If `analysis_protocol_name` (and no `analysis_protocol_id`) is provided, returns the analysis protocol with the given name, provided it exists.
636
-
637
- Parameters
638
- ----------
639
- analysis_protocol_id : str, optional
640
- ID of the analysis protocol to be fetched, defaulted to None.
641
-
642
- analysis_protocol_name : str, optional
643
- Name of the analysis protocol to be fetched, defaulted to None.
644
-
645
- Returns
646
- -------
647
- protocols: list
648
- List of analysis protocol objects for the authenticated user.
649
-
650
- Examples
651
- -------
652
- >>> from seer_pas_sdk import SeerSDK
653
- >>> seer_sdk = SeerSDK()
654
- >>> seer_sdk.get_analysis_protocols()
655
- >>> [
656
- { "id": ..., "analysis_protocol_name": ... },
657
- { "id": ..., "analysis_protocol_name": ... },
658
- ...
659
- ]
660
-
661
- >>> seer_sdk.get_analysis_protocols(name="YOUR_ANALYSIS_PROTOCOL_NAME_HERE")
662
- >>> [{ "id": ..., "analysis_protocol_name": ... }]
663
-
664
- >>> seer_sdk.get_analysis_protocols(id="YOUR_ANALYSIS_PROTOCOL_ID_HERE")
665
- >>> [{ "id": ..., "analysis_protocol_name": ... }]
666
-
667
- >>> seer_sdk.get_analysis_protocols(id="YOUR_ANALYSIS_PROTOCOL_ID_HERE", name="YOUR_ANALYSIS_PROTOCOL_NAME_HERE")
668
-
669
- >>> [{ "id": ..., "analysis_protocol_name": ... }] # in this case the id would supersede the inputted name.
670
- """
671
-
672
- ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
673
- HEADERS = {
674
- "Authorization": f"{ID_TOKEN}",
675
- "access-token": f"{ACCESS_TOKEN}",
676
- }
677
- URL = (
678
- f"{self._auth.url}api/v1/analysisProtocols"
679
- if not analysis_protocol_id
680
- else f"{self._auth.url}api/v1/analysisProtocols/{analysis_protocol_id}"
681
- )
682
- res = []
683
-
684
- with requests.Session() as s:
685
- s.headers.update(HEADERS)
686
-
687
- protocols = s.get(URL, params={"all": "true"})
688
- if protocols.status_code != 200:
689
- raise ValueError(
690
- "Invalid request. Please check your parameters."
691
- )
692
- if not analysis_protocol_id and not analysis_protocol_name:
693
- res = protocols.json()["data"]
694
-
695
- if analysis_protocol_id and not analysis_protocol_name:
696
- res = [protocols.json()]
697
-
698
- if not analysis_protocol_id and analysis_protocol_name:
699
- res = [
700
- protocol
701
- for protocol in protocols.json()["data"]
702
- if protocol["analysis_protocol_name"]
703
- == analysis_protocol_name
704
- ]
705
-
706
- for entry in range(len(res)):
707
- if "tenant_id" in res[entry]:
708
- del res[entry]["tenant_id"]
709
-
710
- if "parameter_file_path" in res[entry]:
711
- # Simple lambda function to find the third occurrence of '/' in the raw file path
712
- location = lambda s: len(s) - len(s.split("/", 3)[-1])
713
- # Slicing the string from the location
714
- res[entry]["parameter_file_path"] = res[entry][
715
- "parameter_file_path"
716
- ][location(res[entry]["parameter_file_path"]) :]
717
-
718
- return res
719
-
720
- def get_analysis(
721
- self,
722
- analysis_id: str = None,
723
- folder_id: str = None,
724
- show_folders=True,
725
- analysis_only=True,
726
- ):
727
- """
728
- Returns a list of analyses objects for the authenticated user. If no id is provided, returns all analyses for the authenticated user.
729
-
730
- Parameters
731
- ----------
732
- analysis_id : str, optional
733
- ID of the analysis to be fetched, defaulted to None.
734
-
735
- folder_id : str, optional
736
- ID of the folder to be fetched, defaulted to None.
737
-
738
- show_folders : bool, optional
739
- Mark True if folder contents are to be returned in the response, defaulted to True.
740
- Will be disabled if an analysis id is provided.
741
-
742
- analysis_only : bool, optional
743
- Mark True if only analyses objects are to be returned in the response, defaulted to True.
744
- If marked false, folder objects will also be included in the response.
745
-
746
- Returns
747
- -------
748
- analyses: dict
749
- Contains a list of analyses objects for the authenticated user.
750
-
751
- Examples
752
- -------
753
- >>> from seer_pas_sdk import SeerSDK
754
- >>> seer_sdk = SeerSDK()
755
- >>> seer_sdk.get_analysis()
756
- >>> [
757
- {id: "YOUR_ANALYSIS_ID_HERE", ...},
758
- {id: "YOUR_ANALYSIS_ID_HERE", ...},
759
- {id: "YOUR_ANALYSIS_ID_HERE", ...}
760
- ]
761
-
762
- >>> seer_sdk.get_analyses("YOUR_ANALYSIS_ID_HERE")
763
- >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
764
- """
765
-
766
- ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
767
- HEADERS = {
768
- "Authorization": f"{ID_TOKEN}",
769
- "access-token": f"{ACCESS_TOKEN}",
770
- }
771
- URL = f"{self._auth.url}api/v1/analyses"
772
- res = []
773
-
774
- with requests.Session() as s:
775
- s.headers.update(HEADERS)
776
-
777
- params = {"all": "true"}
778
- if folder_id:
779
- params["folder"] = folder_id
780
-
781
- analyses = s.get(
782
- f"{URL}/{analysis_id}" if analysis_id else URL, params=params
783
- )
784
-
785
- if analyses.status_code != 200:
786
- raise ValueError(
787
- "Invalid request. Please check your parameters."
788
- )
789
- if not analysis_id:
790
- res = analyses.json()["data"]
791
-
792
- else:
793
- res = [analyses.json()["analysis"]]
794
-
795
- folders = []
796
- for entry in range(len(res)):
797
- if "tenant_id" in res[entry]:
798
- del res[entry]["tenant_id"]
799
-
800
- if "parameter_file_path" in res[entry]:
801
- # Simple lambda function to find the third occurrence of '/' in the raw file path
802
- location = lambda s: len(s) - len(s.split("/", 3)[-1])
803
-
804
- # Slicing the string from the location
805
- res[entry]["parameter_file_path"] = res[entry][
806
- "parameter_file_path"
807
- ][location(res[entry]["parameter_file_path"]) :]
808
-
809
- if (
810
- show_folders
811
- and not analysis_id
812
- and res[entry]["is_folder"]
813
- ):
814
- folders.append(res[entry]["id"])
815
-
816
- # recursive solution to get analyses in folders
817
- for folder in folders:
818
- res += self.get_analysis(folder_id=folder)
819
-
820
- if analysis_only:
821
- res = [
822
- analysis for analysis in res if not analysis["is_folder"]
823
- ]
824
- return res
825
-
826
- def get_analysis_result(self, analysis_id: str, download_path: str = ""):
827
- """
828
- Given an `analysis_id`, this function returns all relevant analysis data files in form of downloadable content, if applicable.
829
-
830
- Parameters
831
- ----------
832
- analysis_id : str
833
- ID of the analysis for which the data is to be fetched.
834
-
835
- download_path : bool
836
- String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
837
-
838
- Returns
839
- -------
840
- links: dict
841
- Contains dataframe objects for the `analysis_id`, given that the analysis has been complete.
842
-
843
- Examples
844
- -------
845
- >>> from seer_pas_sdk import SeerSDK
846
- >>> seer_sdk = SeerSDK()
847
-
848
- >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE")
849
- >>> {
850
- "peptide_np": <peptide_np dataframe object>,
851
- "peptide_panel": <peptide_panel dataframe object>,
852
- "protein_np": <protein_np dataframe object>,
853
- "protein_panel": <protein_panel dataframe object>
854
- }
855
-
856
- >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE", download_path="/Users/Downloads")
857
- >>> { "status": "Download complete." }
858
- """
859
-
860
- if not analysis_id:
861
- raise ValueError("Analysis ID cannot be empty.")
862
-
863
- if download_path and not os.path.exists(download_path):
864
- raise ValueError("The download path you entered is invalid.")
865
-
866
- if self.get_analysis(analysis_id)[0]["status"] in ["FAILED", None]:
867
- raise ValueError(
868
- "Cannot generate links for failed or null analyses."
869
- )
870
-
871
- ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
872
- HEADERS = {
873
- "Authorization": f"{ID_TOKEN}",
874
- "access-token": f"{ACCESS_TOKEN}",
875
- }
876
- URL = f"{self._auth.url}api/v1/data"
877
-
878
- with requests.Session() as s:
879
- s.headers.update(HEADERS)
880
-
881
- protein_data = s.get(
882
- f"{URL}/protein?analysisId={analysis_id}&retry=false"
883
- )
884
-
885
- if protein_data.status_code != 200:
886
- raise ValueError(
887
- "Invalid request. Could not fetch protein data. Please check your parameters."
888
- )
889
- protein_data = protein_data.json()
890
-
891
- peptide_data = s.get(
892
- f"{URL}/peptide?analysisId={analysis_id}&retry=false"
893
- )
894
-
895
- if peptide_data.status_code != 200:
896
- raise ValueError(
897
- "Invalid request. Could not fetch peptide data. Please check your parameters."
898
- )
899
-
900
- peptide_data = peptide_data.json()
901
-
902
- links = {
903
- "peptide_np": url_to_df(peptide_data["npLink"]["url"]),
904
- "peptide_panel": url_to_df(peptide_data["panelLink"]["url"]),
905
- "protein_np": url_to_df(protein_data["npLink"]["url"]),
906
- "protein_panel": url_to_df(protein_data["panelLink"]["url"]),
907
- }
908
-
909
- if download_path:
910
- name = f"{download_path}/downloads/{analysis_id}"
911
- if not os.path.exists(name):
912
- os.makedirs(name)
913
-
914
- links["peptide_np"].to_csv(f"{name}/peptide_np.csv", sep="\t")
915
- links["peptide_panel"].to_csv(
916
- f"{name}/peptide_panel.csv", sep="\t"
917
- )
918
- links["protein_np"].to_csv(f"{name}/protein_np.csv", sep="\t")
919
- links["protein_panel"].to_csv(
920
- f"{name}/protein_panel.csv", sep="\t"
921
- )
922
-
923
- return {"status": "Download complete."}
924
-
925
- return links
926
-
927
- def analysis_complete(self, analysis_id: str):
928
- """
929
- Returns the status of the analysis with the given id.
930
-
931
- Parameters
932
- ----------
933
- analysis_id : str
934
- The analysis id.
935
-
936
- Returns
937
- -------
938
- res : dict
939
- A dictionary containing the status of the analysis.
940
-
941
- Examples
942
- -------
943
- >>> from seer_pas_sdk import SeerSDK
944
- >>> seer_sdk = SeerSDK()
945
- >>> seer_sdk.analysis_complete("YOUR_ANALYSIS_ID_HERE")
946
- >>> {
947
- "status": "SUCCEEDED"
948
- }
949
- """
950
-
951
- if not analysis_id:
952
- raise ValueError("Analysis id cannot be empty.")
953
-
954
- try:
955
- res = self.get_analysis(analysis_id)
956
- except ValueError:
957
- return ValueError("Analysis not found. Your ID could be incorrect")
958
-
959
- return {"status": res[0]["status"]}
960
-
961
- def list_ms_data_files(self, folder="", space=None):
962
- """
963
- Lists all the MS data files in the given folder as long as the folder path passed in the params is valid.
964
-
965
- Parameters
966
- ----------
967
- folder : str, optional
968
- Folder path to list the files from. Defaults to an empty string and displays all files for the user.
969
- space : str, optional
970
- ID of the user group to which the files belong, defaulted to None.
971
-
972
- Returns
973
- -------
974
- list
975
- Contains the list of files in the folder.
976
-
977
- Examples
978
- -------
979
- >>> from seer_pas_sdk import SeerSDK
980
- >>> sdk = SeerSDK()
981
- >>> folder_path = "test-may-2/"
982
- >>> sdk.list_ms_data_files(folder_path)
983
- >>> [
984
- "test-may-2/EXP20028/EXP20028_2020ms0096X10_A.raw",
985
- "test-may-2/agilent/05_C2_19ug-r001.d.zip",
986
- "test-may-2/agilent/08_BC_24ug-r001.d.zip",
987
- "test-may-2/d.zip/EXP22023_2022ms0143bX10_A_GA2_1_6681.d/EXP22023_2022ms0143bX10_A_GA2_1_6681.d.zip",
988
- "test-may-2/DIA/EXP20002_2020ms0142X10_A.wiff",
989
- "test-may-2/DIA/EXP20002_2020ms0142X10_A.wiff.scan",
990
- "test-may-2/DIA/EXP20002_2020ms0142X17_A.wiff",
991
- "test-may-2/DIA/EXP20002_2020ms0142X17_A.wiff.scan",
992
- "test-may-2/DIA/EXP20002_2020ms0142X18_A.wiff",
993
- "test-may-2/DIA/EXP20002_2020ms0142X18_A.wiff.scan"
994
- ]
995
- """
996
-
997
- ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
998
- HEADERS = {
999
- "Authorization": f"{ID_TOKEN}",
1000
- "access-token": f"{ACCESS_TOKEN}",
1001
- }
1002
- URL = (
1003
- f"{self._auth.url}api/v1/msdataindex/filesinfolder?folder={folder}"
1004
- if not space
1005
- else f"{self._auth.url}api/v1/msdataindex/filesinfolder?folder={folder}&userGroupId={space}"
1006
- )
1007
- with requests.Session() as s:
1008
- s.headers.update(HEADERS)
1009
-
1010
- files = s.get(URL)
1011
-
1012
- if files.status_code != 200:
1013
- raise ValueError(
1014
- "Invalid request. Please check your parameters."
1015
- )
1016
- return files.json()["filesList"]
1017
-
1018
- def download_ms_data_files(
1019
- self, paths: _List[str], download_path: str, space: str = None
1020
- ):
1021
- """
1022
- Downloads all MS data files for paths passed in the params to the specified download path.
1023
-
1024
- Parameters
1025
- ----------
1026
- paths : list[str]
1027
- List of paths to download.
1028
- download_path : str
1029
- Path to download the files to.
1030
- space : str, optional
1031
- ID of the user group to which the files belongs, defaulted to None.
1032
-
1033
- Returns
1034
- -------
1035
- message: dict
1036
- Contains the message whether the files were downloaded or not.
1037
- """
1038
-
1039
- urls = []
1040
-
1041
- if not download_path:
1042
- download_path = os.getcwd()
1043
- print(f"\nDownload path not specified.\n")
1044
-
1045
- if not os.path.isdir(download_path):
1046
- print(
1047
- f'\nThe path "{download_path}" you specified does not exist, was either invalid or not absolute.\n'
1048
- )
1049
- download_path = f"{os.getcwd()}/downloads"
1050
-
1051
- name = (
1052
- download_path if download_path[-1] != "/" else download_path[:-1]
1053
- )
1054
-
1055
- if not os.path.exists(name):
1056
- os.makedirs(name)
1057
-
1058
- print(f'Downloading files to "{name}"\n')
1059
-
1060
- ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
1061
- HEADERS = {
1062
- "Authorization": f"{ID_TOKEN}",
1063
- "access-token": f"{ACCESS_TOKEN}",
1064
- }
1065
- URL = f"{self._auth.url}api/v1/msdataindex/download/getUrl"
1066
- tenant_id = jwt.decode(ID_TOKEN, options={"verify_signature": False})[
1067
- "custom:tenantId"
1068
- ]
1069
-
1070
- for path in paths:
1071
- with requests.Session() as s:
1072
- s.headers.update(HEADERS)
1073
-
1074
- download_url = s.post(
1075
- URL,
1076
- json={
1077
- "filepath": f"{tenant_id}/{path}",
1078
- "userGroupId": space,
1079
- },
1080
- )
1081
-
1082
- if download_url.status_code != 200:
1083
- raise ValueError(
1084
- "Could not download file. Please check if the backend is running."
1085
- )
1086
- urls.append(download_url.text)
1087
-
1088
- for i in range(len(urls)):
1089
- filename = paths[i].split("/")[-1]
1090
- url = urls[i]
1091
-
1092
- print(f"Downloading {filename}")
1093
-
1094
- for _ in range(2):
1095
- try:
1096
- with tqdm(
1097
- unit="B",
1098
- unit_scale=True,
1099
- unit_divisor=1024,
1100
- miniters=1,
1101
- desc=f"Progress",
1102
- ) as t:
1103
- ssl._create_default_https_context = (
1104
- ssl._create_unverified_context
1105
- )
1106
- urllib.request.urlretrieve(
1107
- url,
1108
- f"{name}/{filename}",
1109
- reporthook=download_hook(t),
1110
- data=None,
1111
- )
1112
- break
1113
- except:
1114
- filename = filename.split("/")
1115
- name += "/" + "/".join(
1116
- [filename[i] for i in range(len(filename) - 1)]
1117
- )
1118
- filename = filename[-1]
1119
- if not os.path.isdir(f"{name}/{filename}"):
1120
- os.makedirs(f"{name}/")
1121
-
1122
- else:
1123
- raise ValueError(
1124
- "Your download failed. Please check if the backend is still running."
1125
- )
1126
-
1127
- print(f"Finished downloading {filename}\n")
1128
-
1129
- return {"message": f"Files downloaded successfully to '{name}'"}
1130
-
1131
- def group_analysis_results(self, analysis_id: str, box_plot: dict = None):
1132
- """
1133
- Returns the group analysis data for the given analysis id, provided it exists.
1134
-
1135
- Parameters
1136
- ----------
1137
- analysis_id : str
1138
- The analysis id.
1139
-
1140
- box_plot : dict, optional
1141
- The box plot configuration needed for the analysis, defaulted to None. Contains `feature_type` ("protein" or "peptide") and `feature_ids` (comma separated list of feature IDs) keys.
1142
-
1143
- Returns
1144
- -------
1145
- res : dict
1146
- A dictionary containing the group analysis data.
1147
-
1148
- Examples
1149
- -------
1150
- >>> from seer_pas_sdk import SeerSDK
1151
- >>> seer_sdk = SeerSDK()
1152
- >>> seer_sdk.group_analysis_results("YOUR_ANALYSIS_ID_HERE")
1153
- >>> {
1154
- "pre": {
1155
- "protein": [],
1156
- "peptide": [],
1157
- },
1158
- "post": {
1159
- "protein": {},
1160
- "protein_url": {
1161
- "protein_processed_file_url": "",
1162
- "protein_processed_long_form_file_url": "",
1163
- },
1164
- "peptide": {},
1165
- "peptide_url": {
1166
- "peptide_processed_file_url": "",
1167
- "peptide_processed_long_form_file_url": "",
1168
- },
1169
- },
1170
- "box_plot": []
1171
- }
1172
- """
1173
-
1174
- if not analysis_id:
1175
- raise ValueError("Analysis ID cannot be empty.")
1176
-
1177
- ID_TOKEN, ACCESS_TOKEN = self._auth.get_token()
1178
- HEADERS = {
1179
- "Authorization": f"{ID_TOKEN}",
1180
- "access-token": f"{ACCESS_TOKEN}",
1181
- }
1182
- URL = f"{self._auth.url}"
1183
-
1184
- res = {
1185
- "pre": {
1186
- "protein": [],
1187
- "peptide": [],
1188
- },
1189
- "post": {
1190
- "protein": {},
1191
- "protein_url": {
1192
- "protein_processed_file_url": "",
1193
- "protein_processed_long_form_file_url": "",
1194
- },
1195
- "peptide": {},
1196
- "peptide_url": {
1197
- "peptide_processed_file_url": "",
1198
- "peptide_processed_long_form_file_url": "",
1199
- },
1200
- },
1201
- "box_plot": [],
1202
- }
1203
-
1204
- # Pre-GA data call
1205
- with requests.Session() as s:
1206
- s.headers.update(HEADERS)
1207
-
1208
- protein_pre_data = s.post(
1209
- url=f"{URL}api/v2/groupanalysis/protein",
1210
- json={"analysisId": analysis_id, "grouping": "condition"},
1211
- )
1212
- if protein_pre_data.status_code != 200:
1213
- raise ValueError(
1214
- "Invalid request. Could not fetch group analysis protein pre data. Please check your parameters."
1215
- )
1216
-
1217
- protein_pre_data = protein_pre_data.json()
1218
-
1219
- res["pre"]["protein"] = protein_pre_data
1220
-
1221
- with requests.Session() as s:
1222
- s.headers.update(HEADERS)
1223
-
1224
- peptide_pre_data = s.post(
1225
- url=f"{URL}api/v2/groupanalysis/peptide",
1226
- json={"analysisId": analysis_id, "grouping": "condition"},
1227
- )
1228
- if peptide_pre_data.status_code != 200:
1229
- raise ValueError(
1230
- "Invalid request. Could not fetch group analysis peptide pre data. Please check your parameters."
1231
- )
1232
-
1233
- peptide_pre_data = peptide_pre_data.json()
1234
- res["pre"]["peptide"] = peptide_pre_data
1235
-
1236
- # Post-GA data call
1237
- with requests.Session() as s:
1238
- s.headers.update(HEADERS)
1239
-
1240
- get_saved_result = s.get(
1241
- f"{URL}api/v1/groupanalysis/getSavedResults?analysisid={analysis_id}"
1242
- )
1243
-
1244
- if get_saved_result.status_code != 200:
1245
- raise ValueError(
1246
- "Invalid request. Could not fetch group analysis post data. Please check your parameters."
1247
- )
1248
- get_saved_result = get_saved_result.json()
1249
-
1250
- # Protein data
1251
- if "pgResult" in get_saved_result:
1252
- res["post"]["protein"] = get_saved_result["pgResult"]
1253
-
1254
- # Peptide data
1255
- if "peptideResult" in get_saved_result:
1256
- res["post"]["peptide"] = get_saved_result["peptideResult"]
1257
-
1258
- # Protein URLs
1259
- if "pgProcessedFileUrl" in get_saved_result:
1260
- res["post"]["protein_url"]["protein_processed_file_url"] = (
1261
- get_saved_result["pgProcessedFileUrl"]
1262
- )
1263
- if "pgProcessedLongFormFileUrl" in get_saved_result:
1264
- res["post"]["protein_url"][
1265
- "protein_processed_long_form_file_url"
1266
- ] = get_saved_result["pgProcessedLongFormFileUrl"]
1267
-
1268
- # Peptide URLs
1269
- if "peptideProcessedFileUrl" in get_saved_result:
1270
- res["post"]["peptide_url"]["peptide_processed_file_url"] = (
1271
- get_saved_result["peptideProcessedFileUrl"]
1272
- )
1273
-
1274
- if "peptideProcessedLongFormFileUrl" in get_saved_result:
1275
- res["post"]["peptide_url"][
1276
- "peptide_processed_long_form_file_url"
1277
- ] = get_saved_result["peptideProcessedLongFormFileUrl"]
1278
-
1279
- # Box plot data call
1280
- if not box_plot:
1281
- del res["box_plot"]
1282
- return res
1283
-
1284
- with requests.Session() as s:
1285
- s.headers.update(HEADERS)
1286
- box_plot["feature_type"] = box_plot["feature_type"].lower()
1287
- box_plot_data = s.post(
1288
- url=f"{URL}api/v1/groupanalysis/rawdata",
1289
- json={
1290
- "analysisId": analysis_id,
1291
- "featureIds": (
1292
- ",".join(box_plot["feature_ids"])
1293
- if len(box_plot["feature_ids"]) > 1
1294
- else box_plot["feature_ids"][0]
1295
- ),
1296
- "featureType": f"{box_plot['feature_type']}group",
1297
- },
1298
- )
1299
- if box_plot_data.status_code != 200:
1300
- raise ValueError(
1301
- "Invalid request, could not fetch box plot data. Please verify your 'box_plot' parameters, including 'feature_ids' (comma-separated list of feature IDs) and 'feature_type' (needs to be a either 'protein' or 'peptide')."
1302
- )
1303
-
1304
- box_plot_data = box_plot_data.json()
1305
- res["box_plot"] = box_plot_data
1306
-
1307
- return res
1
+ from .sdk import SeerSDK