seer-pas-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ """
2
+ `seer-pas-sdk`
3
+
4
+ Exports:
5
+
6
+ - `SeerSDK`
7
+ - `PlateMap`
8
+ """
9
+
10
+ # Initialize the package.
11
+ try:
12
+ from importlib.metadata import version, PackageNotFoundError
13
+
14
+ try:
15
+ __version__ = version("seer-pas-sdk")
16
+ except PackageNotFoundError:
17
+ pass
18
+
19
+ except ImportError:
20
+ from pkg_resources import get_distribution, DistributionNotFound
21
+
22
+ try:
23
+ __version__ = get_distribution("seer-pas-sdk").version
24
+ except DistributionNotFound:
25
+ pass
26
+
27
+ # Export public functions and classes.
28
+ from .core import SeerSDK
29
+ from .objects import PlateMap
@@ -0,0 +1,77 @@
1
+ import requests
2
+
3
+
4
+ class Auth:
5
+ _instances = {
6
+ "US": "https://api.pas.seer.software/",
7
+ "EU": "https://api.pas-eu.seer.bio/",
8
+ }
9
+
10
+ def __init__(self, username, password, instance="US"):
11
+ """
12
+ Constructor for the Auth class. Uses the username, password and instance name to instantiate the class.
13
+
14
+ Parameters
15
+ ----------
16
+ username: str
17
+ The username of the account associated with the PAS instance.
18
+ password: str
19
+ The password of the account associated with the PAS instance.
20
+ instance: str
21
+ The instance name of the PAS instance (`US | EU`). Defaults to `US`.
22
+ """
23
+
24
+ self.username = username
25
+ self.__password = password
26
+
27
+ if instance not in Auth._instances:
28
+ if instance.startswith("https://"):
29
+ # Support arbitrary endpoint for testing
30
+ self.url = instance
31
+ else:
32
+ raise ValueError("Invalid PAS instance.")
33
+ else:
34
+ self.url = Auth._instances[instance]
35
+
36
+ self.instance = instance
37
+
38
+ def login(self):
39
+ """
40
+ Logs into the PAS instance using the mapped URL and the login credentials (username and password) provided in the constructor.
41
+
42
+ Returns
43
+ -------
44
+ dict
45
+ A dictionary containing the login response from the PAS instance.
46
+ """
47
+ response = requests.post(
48
+ f"{self.url}auth/login",
49
+ json={"username": self.username, "password": self.__password},
50
+ )
51
+
52
+ if not response:
53
+ raise ValueError(
54
+ "Check if the credentials are correct or if the backend is running or not."
55
+ )
56
+
57
+ if response.status_code == 200:
58
+ return response.json()
59
+
60
+ def get_token(self):
61
+ """
62
+ Gets the token from the login response.
63
+
64
+ Returns
65
+ -------
66
+ str
67
+ The token from the login response.
68
+ """
69
+
70
+ res = self.login()
71
+
72
+ if "id_token" not in res or "access_token" not in res:
73
+ raise ValueError(
74
+ "Check if the credentials are correct or if the backend is running or not."
75
+ )
76
+
77
+ return res["id_token"], res["access_token"]
@@ -0,0 +1,424 @@
1
+ from dotenv import load_dotenv
2
+ from botocore.config import Config
3
+ from botocore.exceptions import ClientError
4
+ from re import sub
5
+
6
+ import pandas as pd
7
+ import os
8
+ import io
9
+ import requests
10
+ import boto3
11
+ import json
12
+
13
+ from ..auth import Auth
14
+
15
+ load_dotenv()
16
+
17
+
18
+ def upload_file(file_name, bucket, object_name=None):
19
+ """
20
+ Upload a file to an S3 bucket.
21
+
22
+ Parameters
23
+ ----------
24
+ file_name : str
25
+ The name of the file being uploaded.
26
+ bucket : str
27
+ The name of the bucket to upload to.
28
+ object_name : str
29
+ The name of the object in the bucket. Defaults to `file_name`.
30
+
31
+ Returns
32
+ -------
33
+ bool
34
+ True if file was uploaded, else False.
35
+
36
+ Examples
37
+ --------
38
+ >>> upload_file("someFileNameHere.raw", "someBucketName")
39
+ >>> True
40
+ """
41
+ # If S3 object_name was not specified, use file_name
42
+ if object_name is None:
43
+ object_name = os.path.basename(file_name)
44
+
45
+ # Upload the file
46
+ s3_client = boto3.client("s3")
47
+ try:
48
+ response = s3_client.upload_file(file_name, bucket, object_name)
49
+ except ClientError as e:
50
+ return False
51
+ return True
52
+
53
+
54
+ def dict_to_df(data):
55
+ """
56
+ Returns a Pandas DataFrame from a dictionary.
57
+
58
+ Parameters
59
+ ----------
60
+ data : dict
61
+ The dictionary to convert to a Pandas DataFrame.
62
+
63
+ Returns
64
+ -------
65
+ pandas.core.frame.DataFrame
66
+ A Pandas DataFrame.
67
+
68
+ Examples
69
+ --------
70
+ >>> data = {
71
+ "Sample ID": [1, 2, 3, 4, 5, 6],
72
+ "Sample name": ["SampleName1", "SampleName2", "SampleName3", "SampleName4", "SampleName5", "SampleName6"],
73
+ "Well location": [1, 2, 3, 4, 5, 6],
74
+ }
75
+ >>> df = dict_to_df(data)
76
+ >>> print(df)
77
+ >>> Sample ID Sample name Well location
78
+ 0 1 SampleName1 1
79
+ 1 2 SampleName2 2
80
+ 2 3 SampleName3 3
81
+ ... ... ... ...
82
+
83
+ """
84
+ df = pd.DataFrame.from_dict(data)
85
+ return df
86
+
87
+
88
+ def url_to_df(url):
89
+ """
90
+ Returns a Pandas DataFrame from a URL.
91
+
92
+ Parameters
93
+ ----------
94
+ url : str
95
+ The URL of the CSV file.
96
+
97
+ Returns
98
+ -------
99
+ pandas.core.frame.DataFrame
100
+ A Pandas DataFrame.
101
+
102
+ Examples
103
+ --------
104
+ >>> csv = url_to_df("link_to_csv_file")
105
+ >>> print(csv)
106
+ >>> Sample ID Sample name Well location MS file name
107
+ 0 1 SampleName1 1 SDKTest1.raw
108
+ 1 2 SampleName2 2 SDKTest2.raw
109
+ 2 3 SampleName3 3 SDKTest3.raw
110
+ 3 4 SampleName4 4 SDKTest4.raw
111
+ 4 5 SampleName5 5 SDKTest5.raw
112
+ 5 6 SampleName6 6 SDKTest6.raw
113
+ """
114
+
115
+ url_content = io.StringIO(requests.get(url).content.decode("utf-8"))
116
+ csv = pd.read_csv(url_content, sep="\t")
117
+ return csv
118
+
119
+
120
+ def get_sample_info(
121
+ plate_id,
122
+ ms_data_files,
123
+ plate_map_file,
124
+ space,
125
+ sample_description_file=None,
126
+ ):
127
+ """
128
+ Returns all `sample_id` and `sample_name` values for a plate_map_file and checks if ms_data_files are contained within the plate_map_file.
129
+
130
+ Parameters
131
+ ----------
132
+ plate_id : str
133
+ The plate ID.
134
+ ms_data_files : list
135
+ A list of MS data files.
136
+ plate_map_file : str
137
+ The plate map file.
138
+ space : str
139
+ The space.
140
+ sample_description_file : str
141
+ Path to the sample description file.
142
+
143
+ Returns
144
+ -------
145
+ list
146
+ A list of dictionaries containing the `plateID`, `sampleID`, `sampleName`, and `sampleUserGroup` values.
147
+
148
+ >>> get_sample_info("plate_id", ["AgamSDKTest1.raw", "AgamSDKTest2.raw"], "AgamSDKPlateMapATest.csv", "sdkTestPlateId1", "SDKPlate", "Generated from SDK")
149
+ >>> [
150
+ {
151
+ "plateID": "YOUR_PLATE_ID",
152
+ "sampleID": "YOUR_SAMPLE_ID",
153
+ "sampleName": "YOUR_SAMPLE_NAME",
154
+ "sampleUserGroup": "YOUR_SAMPLE_USER_GROUP"
155
+ }
156
+ ]
157
+ """
158
+
159
+ df = pd.read_csv(plate_map_file, on_bad_lines="skip")
160
+ data = df.iloc[:, :] # all the data in the platemap csv
161
+ files = data["MS file name"] # all filenames in the platemap csv
162
+ local_file_names = set(
163
+ [os.path.basename(file) for file in ms_data_files]
164
+ ) # all filenames in the local directory
165
+ res = []
166
+
167
+ # Step 1: Check if ms_data_files are contained within the plate_map_file.
168
+ if len(files) != len(local_file_names):
169
+ raise ValueError("Plate map file is invalid.")
170
+
171
+ for file in files:
172
+ if file not in local_file_names:
173
+ raise ValueError(
174
+ "Plate map file does not contain the attached MS data files."
175
+ )
176
+
177
+ # Step 2: Validating and mapping the contents of the sample description file.
178
+ if sample_description_file:
179
+ sdf = pd.read_csv(sample_description_file, on_bad_lines="skip")
180
+ sdf_data = sdf.iloc[:, :]
181
+
182
+ sdf.rename(columns={"Sample Name": "Sample name"}, inplace=True)
183
+
184
+ # Step 3: CSV manipulation.
185
+ number_of_rows = df.shape[0] # for platemap csv
186
+
187
+ for i in range(number_of_rows):
188
+ row = df.iloc[i]
189
+ sample_id = row["Sample ID"]
190
+ sample_name = row["Sample name"]
191
+ sample_info = {
192
+ "plateID": plate_id,
193
+ "sampleID": sample_id,
194
+ "sampleName": sample_name,
195
+ "sampleUserGroup": space,
196
+ }
197
+
198
+ if sample_description_file:
199
+ sdf_row = dict(sdf.iloc[i])
200
+ row_names = list(sdf_row.keys())
201
+
202
+ if sdf_row["Sample name"] == sample_name:
203
+ for row_name in row_names:
204
+ sdf_data = sdf_row[row_name]
205
+ sample_info[camel_case(row_name)] = (
206
+ sdf_data if pd.notna(sdf_data) else ""
207
+ )
208
+
209
+ res.append(sample_info)
210
+
211
+ return res
212
+
213
+
214
+ def parse_plate_map_file(plate_map_file, samples, raw_file_paths, space=None):
215
+ """
216
+ Parses the plate map CSV file and returns a list of parameters for each sample.
217
+
218
+ Parameters
219
+ ----------
220
+ plate_map_file : str
221
+ The plate map file.
222
+ samples : list
223
+ A list of samples.
224
+ raw_file_paths : dict
225
+ A dictionary of raw file paths.
226
+ space : str
227
+ The space or usergroup.
228
+
229
+ Returns
230
+ -------
231
+ list
232
+ A list of dictionaries containing all the parameters for each sample.
233
+
234
+ Examples
235
+ --------
236
+ >>> raw_file_paths = { "SDKTest1.raw": "FILE_PATH_1", "SDKTest2.raw": "FILE_PATH_2" }
237
+ >>> samples = [
238
+ {
239
+ "id": "SAMPLE_ID_HERE",
240
+ "tenant_id": "TENANT_ID_HERE",
241
+ "plate_id": "PLATE_ID_HERE",
242
+ },
243
+ {
244
+ "id": "SAMPLE_ID_HERE",
245
+ "tenant_id": "TENANT_ID_HERE",
246
+ "plate_id": "PLATE_ID_HERE",
247
+ }
248
+ ]
249
+ >>> parse_plate_map_file("AgamSDKPlateMapATest.csv", samples, raw_file_paths, "SDKPlate")
250
+ >>> [
251
+ {
252
+ "sampleId": "YOUR_SAMPLE_ID",
253
+ "sample_id_tracking": "YOUR_SAMPLE_ID_TRACKING",
254
+ "wellLocation": "YOUR_WELL_LOCATION",
255
+ ...
256
+ },
257
+ {
258
+ "sampleId": "YOUR_SAMPLE_ID",
259
+ "sample_id_tracking": "YOUR_SAMPLE_ID_TRACKING",
260
+ "wellLocation": "YOUR_WELL_LOCATION",
261
+ ...
262
+ }
263
+ ]
264
+ """
265
+
266
+ df = pd.read_csv(plate_map_file, on_bad_lines="skip")
267
+ number_of_rows = df.shape[0]
268
+ res = []
269
+
270
+ for rowIndex in range(number_of_rows):
271
+ row = df.iloc[rowIndex]
272
+ path = None
273
+ sample_id = None
274
+
275
+ if (
276
+ samples[rowIndex]["sample_id"] == row["Sample ID"]
277
+ and samples[rowIndex]["sample_name"] == row["Sample name"]
278
+ ):
279
+ sample_id = samples[rowIndex]["id"]
280
+
281
+ for filename in raw_file_paths:
282
+ if filename == row["MS file name"]:
283
+ path = raw_file_paths[filename]
284
+
285
+ if not path or not sample_id:
286
+ raise ValueError("Plate map file is invalid.")
287
+
288
+ res.append(
289
+ {
290
+ "sampleId": str(sample_id),
291
+ "sample_id_tracking": str(row["Sample ID"]),
292
+ "wellLocation": (
293
+ str(row["Well location"])
294
+ if pd.notna(row["Well location"])
295
+ else ""
296
+ ),
297
+ "nanoparticle": (
298
+ str(row["Nanoparticle"])
299
+ if pd.notna(row["Nanoparticle"])
300
+ else ""
301
+ ),
302
+ "nanoparticleID": (
303
+ str(row["Nanoparticle ID"])
304
+ if pd.notna(row["Nanoparticle ID"])
305
+ else ""
306
+ ),
307
+ "control": (
308
+ str(row["Control"]) if pd.notna(row["Control"]) else ""
309
+ ),
310
+ "controlID": (
311
+ str(row["Control ID"])
312
+ if pd.notna(row["Control ID"])
313
+ else ""
314
+ ),
315
+ "instrumentName": (
316
+ str(row["Instrument name"])
317
+ if pd.notna(row["Instrument name"])
318
+ else ""
319
+ ),
320
+ "dateSamplePrep": (
321
+ str(row["Date sample preparation"])
322
+ if pd.notna(row["Date sample preparation"])
323
+ else ""
324
+ ),
325
+ "sampleVolume": (
326
+ str(row["Sample volume"])
327
+ if pd.notna(row["Sample volume"])
328
+ else ""
329
+ ),
330
+ "peptideConcentration": (
331
+ str(row["Peptide concentration"])
332
+ if pd.notna(row["Peptide concentration"])
333
+ else ""
334
+ ),
335
+ "peptideMassSample": (
336
+ str(row["Peptide mass sample"])
337
+ if pd.notna(row["Peptide mass sample"])
338
+ else ""
339
+ ),
340
+ "dilutionFactor": (
341
+ str(row["Dilution factor"])
342
+ if pd.notna(row["Dilution factor"])
343
+ else ""
344
+ ),
345
+ "msdataUserGroup": space,
346
+ "rawFilePath": path,
347
+ }
348
+ )
349
+
350
+ return res
351
+
352
+
353
+ def valid_ms_data_file(path):
354
+ """
355
+ Checks if an MS data file exists and if its extension is valid for upload.
356
+
357
+ Parameters
358
+ ----------
359
+ path : str
360
+ The path to the MS data file.
361
+
362
+ Returns
363
+ -------
364
+ bool
365
+ True if the file exists and its extension is valid, False otherwise.
366
+ """
367
+
368
+ if not os.path.exists(path):
369
+ return False
370
+
371
+ full_filename = path.split("/")[-1].split(".")
372
+
373
+ if len(full_filename) >= 3:
374
+ extension = f'.{".".join(full_filename[-2:])}'
375
+ else:
376
+ extension = f".{full_filename[-1]}"
377
+
378
+ return extension.lower() in [
379
+ ".d",
380
+ ".d.zip",
381
+ ".mzml",
382
+ ".raw",
383
+ ".mzml",
384
+ ".wiff",
385
+ ".wiff.scan",
386
+ ]
387
+
388
+
389
+ def download_hook(t):
390
+ """
391
+ Wraps tqdm instance.
392
+
393
+ Example
394
+ -------
395
+ >>> with tqdm(...) as t:
396
+ ... reporthook = download_hook(t)
397
+ ... urllib.urlretrieve(..., reporthook=reporthook)
398
+ """
399
+ last_b = [0]
400
+
401
+ def update_to(b=1, bsize=1, tsize=None):
402
+ """
403
+ b : int, optional
404
+ Number of blocks transferred so far [default: 1].
405
+ bsize : int, optional
406
+ Size of each block (in tqdm units) [default: 1].
407
+ tsize : int, optional
408
+ Total size (in tqdm units). If [default: None] remains unchanged.
409
+ """
410
+ if tsize is not None:
411
+ t.total = tsize
412
+ t.update((b - last_b[0]) * bsize)
413
+ last_b[0] = b
414
+
415
+ return update_to
416
+
417
+
418
+ def camel_case(s):
419
+ # Use regular expression substitution to replace underscores and hyphens with spaces,
420
+ # then title case the string (capitalize the first letter of each word), and remove spaces
421
+ s = sub(r"(_|-)+", " ", s).title().replace(" ", "")
422
+
423
+ # Join the string, ensuring the first letter is lowercase
424
+ return "".join([s[0].lower(), s[1:]])