seer-pas-sdk 0.1.3__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- seer_pas_sdk/auth/auth.py +23 -1
- seer_pas_sdk/common/__init__.py +370 -72
- seer_pas_sdk/common/errors.py +5 -0
- seer_pas_sdk/common/groupanalysis.py +55 -0
- seer_pas_sdk/core/sdk.py +1855 -371
- seer_pas_sdk/core/unsupported.py +1634 -0
- seer_pas_sdk/objects/__init__.py +2 -0
- seer_pas_sdk/objects/groupanalysis.py +30 -0
- seer_pas_sdk/objects/platemap.py +67 -22
- seer_pas_sdk/objects/volcanoplot.py +290 -0
- seer_pas_sdk-3.0.0.dist-info/METADATA +231 -0
- seer_pas_sdk-3.0.0.dist-info/RECORD +18 -0
- {seer_pas_sdk-0.1.3.dist-info → seer_pas_sdk-3.0.0.dist-info}/WHEEL +1 -1
- {seer_pas_sdk-0.1.3.dist-info → seer_pas_sdk-3.0.0.dist-info}/top_level.txt +0 -1
- seer_pas_sdk-0.1.3.dist-info/METADATA +0 -50
- seer_pas_sdk-0.1.3.dist-info/RECORD +0 -19
- tests/__init__.py +0 -0
- tests/conftest.py +0 -17
- tests/test_auth.py +0 -48
- tests/test_common.py +0 -99
- tests/test_objects.py +0 -91
- tests/test_sdk.py +0 -11
- {seer_pas_sdk-0.1.3.dist-info → seer_pas_sdk-3.0.0.dist-info/licenses}/LICENSE.txt +0 -0
seer_pas_sdk/auth/auth.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import requests
|
|
2
|
+
import jwt
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
class Auth:
|
|
@@ -25,7 +26,9 @@ class Auth:
|
|
|
25
26
|
self.__password = password
|
|
26
27
|
|
|
27
28
|
if instance not in Auth._instances:
|
|
28
|
-
if instance.startswith("https://")
|
|
29
|
+
if instance.startswith("https://") or instance.startswith(
|
|
30
|
+
"http://"
|
|
31
|
+
):
|
|
29
32
|
# Support arbitrary endpoint for testing
|
|
30
33
|
self.url = instance
|
|
31
34
|
else:
|
|
@@ -35,6 +38,14 @@ class Auth:
|
|
|
35
38
|
|
|
36
39
|
self.instance = instance
|
|
37
40
|
|
|
41
|
+
# Null initialize multi tenant attributes
|
|
42
|
+
(
|
|
43
|
+
self.base_tenant_id,
|
|
44
|
+
self.active_tenant_id,
|
|
45
|
+
self.base_role,
|
|
46
|
+
self.active_role,
|
|
47
|
+
) = [None] * 4
|
|
48
|
+
|
|
38
49
|
def login(self):
|
|
39
50
|
"""
|
|
40
51
|
Logs into the PAS instance using the mapped URL and the login credentials (username and password) provided in the constructor.
|
|
@@ -73,5 +84,16 @@ class Auth:
|
|
|
73
84
|
raise ValueError(
|
|
74
85
|
"Check if the credentials are correct or if the backend is running or not."
|
|
75
86
|
)
|
|
87
|
+
decoded_token = jwt.decode(
|
|
88
|
+
res["id_token"], options={"verify_signature": False}
|
|
89
|
+
)
|
|
90
|
+
self.base_tenant_id = decoded_token["custom:tenantId"]
|
|
91
|
+
self.base_role = decoded_token["custom:role"]
|
|
92
|
+
|
|
93
|
+
if not self.active_tenant_id:
|
|
94
|
+
self.active_tenant_id = self.base_tenant_id
|
|
95
|
+
|
|
96
|
+
if not self.active_role:
|
|
97
|
+
self.active_role = self.base_role
|
|
76
98
|
|
|
77
99
|
return res["id_token"], res["access_token"]
|
seer_pas_sdk/common/__init__.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from dotenv import load_dotenv
|
|
2
2
|
from botocore.config import Config
|
|
3
3
|
from botocore.exceptions import ClientError
|
|
4
|
-
|
|
4
|
+
import re
|
|
5
|
+
from re import match, sub, UNICODE
|
|
5
6
|
|
|
6
7
|
import pandas as pd
|
|
7
8
|
import os
|
|
@@ -9,13 +10,19 @@ import io
|
|
|
9
10
|
import requests
|
|
10
11
|
import boto3
|
|
11
12
|
import json
|
|
13
|
+
import zipfile
|
|
14
|
+
import tempfile
|
|
12
15
|
|
|
13
|
-
from ..auth import Auth
|
|
16
|
+
from ..auth.auth import Auth
|
|
17
|
+
|
|
18
|
+
from .groupanalysis import *
|
|
19
|
+
|
|
20
|
+
from .errors import *
|
|
14
21
|
|
|
15
22
|
load_dotenv()
|
|
16
23
|
|
|
17
24
|
|
|
18
|
-
def upload_file(file_name, bucket, object_name=None):
|
|
25
|
+
def upload_file(file_name, bucket, credentials, object_name=None):
|
|
19
26
|
"""
|
|
20
27
|
Upload a file to an S3 bucket.
|
|
21
28
|
|
|
@@ -43,7 +50,12 @@ def upload_file(file_name, bucket, object_name=None):
|
|
|
43
50
|
object_name = os.path.basename(file_name)
|
|
44
51
|
|
|
45
52
|
# Upload the file
|
|
46
|
-
s3_client = boto3.client(
|
|
53
|
+
s3_client = boto3.client(
|
|
54
|
+
"s3",
|
|
55
|
+
aws_access_key_id=credentials["AccessKeyId"],
|
|
56
|
+
aws_secret_access_key=credentials["SecretAccessKey"],
|
|
57
|
+
aws_session_token=credentials["SessionToken"],
|
|
58
|
+
)
|
|
47
59
|
try:
|
|
48
60
|
response = s3_client.upload_file(file_name, bucket, object_name)
|
|
49
61
|
except ClientError as e:
|
|
@@ -85,19 +97,28 @@ def dict_to_df(data):
|
|
|
85
97
|
return df
|
|
86
98
|
|
|
87
99
|
|
|
88
|
-
|
|
100
|
+
# Most cases appear to be a .tsv file.
|
|
101
|
+
def url_to_df(url, is_tsv=True):
|
|
89
102
|
"""
|
|
90
|
-
|
|
103
|
+
Fetches a TSV/CSV file from a URL and returns as a Pandas DataFrame.
|
|
91
104
|
|
|
92
105
|
Parameters
|
|
93
106
|
----------
|
|
94
107
|
url : str
|
|
95
|
-
The URL of the CSV file.
|
|
108
|
+
The URL of the TSV/CSV file.
|
|
109
|
+
|
|
110
|
+
is_tsv : bool
|
|
111
|
+
True if the file is a TSV file, False if it is a CSV file.
|
|
96
112
|
|
|
97
113
|
Returns
|
|
98
114
|
-------
|
|
99
115
|
pandas.core.frame.DataFrame
|
|
100
|
-
|
|
116
|
+
The data from the TSV/CSV file as a Pandas DataFrame
|
|
117
|
+
|
|
118
|
+
Raises
|
|
119
|
+
------
|
|
120
|
+
ValueError
|
|
121
|
+
Error response from AWS S3
|
|
101
122
|
|
|
102
123
|
Examples
|
|
103
124
|
--------
|
|
@@ -112,14 +133,18 @@ def url_to_df(url):
|
|
|
112
133
|
5 6 SampleName6 6 SDKTest6.raw
|
|
113
134
|
"""
|
|
114
135
|
|
|
136
|
+
if not url:
|
|
137
|
+
return pd.DataFrame()
|
|
115
138
|
url_content = io.StringIO(requests.get(url).content.decode("utf-8"))
|
|
116
|
-
|
|
139
|
+
if is_tsv:
|
|
140
|
+
csv = pd.read_csv(url_content, sep="\t")
|
|
141
|
+
else:
|
|
142
|
+
csv = pd.read_csv(url_content)
|
|
117
143
|
return csv
|
|
118
144
|
|
|
119
145
|
|
|
120
146
|
def get_sample_info(
|
|
121
147
|
plate_id,
|
|
122
|
-
ms_data_files,
|
|
123
148
|
plate_map_file,
|
|
124
149
|
space,
|
|
125
150
|
sample_description_file=None,
|
|
@@ -131,8 +156,6 @@ def get_sample_info(
|
|
|
131
156
|
----------
|
|
132
157
|
plate_id : str
|
|
133
158
|
The plate ID.
|
|
134
|
-
ms_data_files : list
|
|
135
|
-
A list of MS data files.
|
|
136
159
|
plate_map_file : str
|
|
137
160
|
The plate map file.
|
|
138
161
|
space : str
|
|
@@ -145,7 +168,7 @@ def get_sample_info(
|
|
|
145
168
|
list
|
|
146
169
|
A list of dictionaries containing the `plateID`, `sampleID`, `sampleName`, and `sampleUserGroup` values.
|
|
147
170
|
|
|
148
|
-
>>> get_sample_info("plate_id",
|
|
171
|
+
>>> get_sample_info("plate_id", "AgamSDKPlateMapATest.csv", "sdkTestPlateId1", "SDKPlate", "Generated from SDK")
|
|
149
172
|
>>> [
|
|
150
173
|
{
|
|
151
174
|
"plateID": "YOUR_PLATE_ID",
|
|
@@ -157,23 +180,9 @@ def get_sample_info(
|
|
|
157
180
|
"""
|
|
158
181
|
|
|
159
182
|
df = pd.read_csv(plate_map_file, on_bad_lines="skip")
|
|
160
|
-
|
|
161
|
-
files = data["MS file name"] # all filenames in the platemap csv
|
|
162
|
-
local_file_names = set(
|
|
163
|
-
[os.path.basename(file) for file in ms_data_files]
|
|
164
|
-
) # all filenames in the local directory
|
|
183
|
+
# all filenames in the local directory
|
|
165
184
|
res = []
|
|
166
185
|
|
|
167
|
-
# Step 1: Check if ms_data_files are contained within the plate_map_file.
|
|
168
|
-
if len(files) != len(local_file_names):
|
|
169
|
-
raise ValueError("Plate map file is invalid.")
|
|
170
|
-
|
|
171
|
-
for file in files:
|
|
172
|
-
if file not in local_file_names:
|
|
173
|
-
raise ValueError(
|
|
174
|
-
"Plate map file does not contain the attached MS data files."
|
|
175
|
-
)
|
|
176
|
-
|
|
177
186
|
# Step 2: Validating and mapping the contents of the sample description file.
|
|
178
187
|
if sample_description_file:
|
|
179
188
|
sdf = pd.read_csv(sample_description_file, on_bad_lines="skip")
|
|
@@ -208,9 +217,164 @@ def get_sample_info(
|
|
|
208
217
|
|
|
209
218
|
res.append(sample_info)
|
|
210
219
|
|
|
220
|
+
# Step 4: drop duplicates on sampleID
|
|
221
|
+
df = pd.DataFrame(res).drop_duplicates(subset=["sampleID"])
|
|
222
|
+
res = df.to_dict(orient="records")
|
|
211
223
|
return res
|
|
212
224
|
|
|
213
225
|
|
|
226
|
+
def _validate_rawfile_extensions(rawfile):
|
|
227
|
+
valid_extensions = [".d", ".d.zip", ".mzml", ".raw", ".wiff", ".wiff.scan"]
|
|
228
|
+
if not rawfile.lower().endswith(tuple(valid_extensions)):
|
|
229
|
+
return False
|
|
230
|
+
return True
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def entity_name_ruler(entity_name):
|
|
234
|
+
if pd.isna(entity_name):
|
|
235
|
+
return False
|
|
236
|
+
pattern = r"^[\w ._+()!@-]+$"
|
|
237
|
+
if match(pattern, entity_name, UNICODE):
|
|
238
|
+
return True
|
|
239
|
+
else:
|
|
240
|
+
return False
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def validate_plate_map(df, local_file_names):
|
|
244
|
+
"""
|
|
245
|
+
Validates the plate map contents
|
|
246
|
+
|
|
247
|
+
Parameters
|
|
248
|
+
----------
|
|
249
|
+
plate_map: pd.Dataframe
|
|
250
|
+
The plate map data as a dataframe
|
|
251
|
+
|
|
252
|
+
local_file_names: file names that were passed to the top level function.
|
|
253
|
+
|
|
254
|
+
Returns
|
|
255
|
+
-------
|
|
256
|
+
pd.DataFrame : the cleaned data as a dataframe
|
|
257
|
+
|
|
258
|
+
Examples
|
|
259
|
+
--------
|
|
260
|
+
>>> df = validate_plate_map_file("AgamSDKPlateMapATest.csv")
|
|
261
|
+
"""
|
|
262
|
+
|
|
263
|
+
required_cols = [
|
|
264
|
+
"MS file name",
|
|
265
|
+
"Sample name",
|
|
266
|
+
"Sample ID",
|
|
267
|
+
"Well location",
|
|
268
|
+
"Plate ID",
|
|
269
|
+
"Plate Name",
|
|
270
|
+
]
|
|
271
|
+
|
|
272
|
+
# We use the presence of the "Method set ID" column as a heuristic to determine if the plate map is Biscayne+
|
|
273
|
+
if "Method set ID" not in df.columns:
|
|
274
|
+
required_cols.append("Control")
|
|
275
|
+
|
|
276
|
+
# Catch case variations of Plate Name due to change between XT and Biscayne
|
|
277
|
+
pattern = re.compile(r"(?i)(Plate Name)")
|
|
278
|
+
matches = [s for s in df.columns if pattern.match(s)]
|
|
279
|
+
if matches:
|
|
280
|
+
df.rename(columns={matches[0]: "Plate Name"}, inplace=True)
|
|
281
|
+
|
|
282
|
+
if not all(col in df.columns for col in required_cols):
|
|
283
|
+
err_headers = [
|
|
284
|
+
"'" + col + "'" for col in required_cols if col not in df.columns
|
|
285
|
+
]
|
|
286
|
+
raise ValueError(
|
|
287
|
+
f"The following column headers are required: {', '.join(err_headers)}"
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
# Check entity name requirement
|
|
291
|
+
invalid_plate_ids = df[~df["Plate ID"].apply(entity_name_ruler)]
|
|
292
|
+
|
|
293
|
+
invalid_plate_names = df[~df["Plate Name"].apply(entity_name_ruler)]
|
|
294
|
+
|
|
295
|
+
if not invalid_plate_ids.empty or not invalid_plate_names.empty:
|
|
296
|
+
error_msg = ""
|
|
297
|
+
if not invalid_plate_ids.empty:
|
|
298
|
+
error_msg += f"Invalid plate ID(s): {', '.join(invalid_plate_ids['Plate ID'].tolist())}"
|
|
299
|
+
if not invalid_plate_names.empty:
|
|
300
|
+
error_msg += f"Invalid plate name(s): {', '.join(invalid_plate_names['Plate Name'].tolist())}"
|
|
301
|
+
raise ValueError(error_msg)
|
|
302
|
+
|
|
303
|
+
# Check numeric columns
|
|
304
|
+
numeric_cols = [
|
|
305
|
+
"Sample volume",
|
|
306
|
+
"Peptide concentration",
|
|
307
|
+
"Peptide mass sample",
|
|
308
|
+
"Recon volume",
|
|
309
|
+
"Reconstituted peptide concentration",
|
|
310
|
+
"Recovered peptide mass",
|
|
311
|
+
"Reconstitution volume",
|
|
312
|
+
]
|
|
313
|
+
|
|
314
|
+
invalid_cols = []
|
|
315
|
+
for col in numeric_cols:
|
|
316
|
+
if col in df.columns:
|
|
317
|
+
try:
|
|
318
|
+
df[col] = pd.to_numeric(df[col], errors="raise")
|
|
319
|
+
except Exception as e:
|
|
320
|
+
invalid_cols.append(col)
|
|
321
|
+
|
|
322
|
+
if invalid_cols:
|
|
323
|
+
raise ValueError(
|
|
324
|
+
f"The following column(s) must be numeric: {', '.join(invalid_cols)}"
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
files = [os.path.basename(x) for x in df["MS file name"].tolist()]
|
|
328
|
+
|
|
329
|
+
# Check if ms_data_files are contained within the plate_map_file.
|
|
330
|
+
if len(files) != len(local_file_names):
|
|
331
|
+
raise ValueError(
|
|
332
|
+
f"User provided {len(local_file_names)} MS files, however the plate map lists {len(files)} MS files. \
|
|
333
|
+
Please check your inputs."
|
|
334
|
+
)
|
|
335
|
+
missing_files = []
|
|
336
|
+
for file in local_file_names:
|
|
337
|
+
if os.path.basename(file) not in files:
|
|
338
|
+
missing_files.append(file)
|
|
339
|
+
|
|
340
|
+
# Found file mismatch between function argument and plate map
|
|
341
|
+
if missing_files:
|
|
342
|
+
msg = ""
|
|
343
|
+
try:
|
|
344
|
+
msg = f"The following file names were not found in the plate map: {', '.join(missing_files)}. Please revise the plate map file."
|
|
345
|
+
except:
|
|
346
|
+
raise ValueError(
|
|
347
|
+
"Rawfile names must be type string. Float or None type detected."
|
|
348
|
+
)
|
|
349
|
+
raise ValueError(msg)
|
|
350
|
+
|
|
351
|
+
# Check rawfiles end with valid extensions
|
|
352
|
+
invalid_rawfile_extensions = df[
|
|
353
|
+
~df["MS file name"].apply(_validate_rawfile_extensions)
|
|
354
|
+
]
|
|
355
|
+
if not invalid_rawfile_extensions.empty:
|
|
356
|
+
raise ValueError(
|
|
357
|
+
f"Invalid raw file extensions: {', '.join(invalid_rawfile_extensions['MS file name'].tolist())}"
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
# Check sample IDs are one to one with plate ID, plate name
|
|
361
|
+
sample_ids = df["Sample ID"].unique()
|
|
362
|
+
for sample in sample_ids:
|
|
363
|
+
queryset = df[df["Sample ID"] == sample]
|
|
364
|
+
plate_names = queryset["Plate ID"].unique()
|
|
365
|
+
plate_ids = queryset["Plate ID"].unique()
|
|
366
|
+
if len(plate_names) > 1:
|
|
367
|
+
raise ValueError(
|
|
368
|
+
f"Sample ID {sample} is associated with multiple plates: {', '.join(plate_names)}"
|
|
369
|
+
)
|
|
370
|
+
if len(plate_ids) > 1:
|
|
371
|
+
raise ValueError(
|
|
372
|
+
f"Sample ID {sample} is associated with multiple plates: {', '.join(plate_ids)}"
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
return df
|
|
376
|
+
|
|
377
|
+
|
|
214
378
|
def parse_plate_map_file(plate_map_file, samples, raw_file_paths, space=None):
|
|
215
379
|
"""
|
|
216
380
|
Parses the plate map CSV file and returns a list of parameters for each sample.
|
|
@@ -221,8 +385,8 @@ def parse_plate_map_file(plate_map_file, samples, raw_file_paths, space=None):
|
|
|
221
385
|
The plate map file.
|
|
222
386
|
samples : list
|
|
223
387
|
A list of samples.
|
|
224
|
-
raw_file_paths
|
|
225
|
-
A dictionary
|
|
388
|
+
raw_file_paths: dict
|
|
389
|
+
A dictionary mapping the plate map MS file paths with the cloud upload path.
|
|
226
390
|
space : str
|
|
227
391
|
The space or usergroup.
|
|
228
392
|
|
|
@@ -246,7 +410,7 @@ def parse_plate_map_file(plate_map_file, samples, raw_file_paths, space=None):
|
|
|
246
410
|
"plate_id": "PLATE_ID_HERE",
|
|
247
411
|
}
|
|
248
412
|
]
|
|
249
|
-
>>> parse_plate_map_file("AgamSDKPlateMapATest.csv", samples,
|
|
413
|
+
>>> parse_plate_map_file("AgamSDKPlateMapATest.csv", samples, "SDKPlate")
|
|
250
414
|
>>> [
|
|
251
415
|
{
|
|
252
416
|
"sampleId": "YOUR_SAMPLE_ID",
|
|
@@ -267,23 +431,35 @@ def parse_plate_map_file(plate_map_file, samples, raw_file_paths, space=None):
|
|
|
267
431
|
number_of_rows = df.shape[0]
|
|
268
432
|
res = []
|
|
269
433
|
|
|
434
|
+
# reformat samples to be a dictionary with sample_id as the key
|
|
435
|
+
samples = {sample["sample_id"]: sample for sample in samples}
|
|
436
|
+
|
|
437
|
+
# Catch case variations of Plate Name due to change between XT and Biscayne
|
|
438
|
+
pattern = re.compile(r"(?i)(Plate Name)")
|
|
439
|
+
matches = [s for s in df.columns if pattern.match(s)]
|
|
440
|
+
if matches:
|
|
441
|
+
df.rename(columns={matches[0]: "Plate Name"}, inplace=True)
|
|
442
|
+
|
|
270
443
|
for rowIndex in range(number_of_rows):
|
|
271
444
|
row = df.iloc[rowIndex]
|
|
272
|
-
path = None
|
|
273
445
|
sample_id = None
|
|
446
|
+
path = None
|
|
274
447
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
448
|
+
# Validate that the sample ID exists in the samples list
|
|
449
|
+
if samples.get(row["Sample ID"], None):
|
|
450
|
+
sample_id = samples[row["Sample ID"]]["id"]
|
|
451
|
+
else:
|
|
452
|
+
raise ValueError(
|
|
453
|
+
f'Error fetching id for sample ID {row["Sample ID"]}'
|
|
454
|
+
)
|
|
280
455
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
path = raw_file_paths[filename]
|
|
456
|
+
# Map display file path to its underlying file path
|
|
457
|
+
path = raw_file_paths.get(os.path.basename(row["MS file name"]), None)
|
|
284
458
|
|
|
285
|
-
if not path
|
|
286
|
-
raise ValueError(
|
|
459
|
+
if not path:
|
|
460
|
+
raise ValueError(
|
|
461
|
+
f"Row {rowIndex} is missing a value in MS file name."
|
|
462
|
+
)
|
|
287
463
|
|
|
288
464
|
res.append(
|
|
289
465
|
{
|
|
@@ -296,50 +472,113 @@ def parse_plate_map_file(plate_map_file, samples, raw_file_paths, space=None):
|
|
|
296
472
|
),
|
|
297
473
|
"nanoparticle": (
|
|
298
474
|
str(row["Nanoparticle"])
|
|
299
|
-
if pd.notna(row
|
|
300
|
-
else
|
|
475
|
+
if pd.notna(row.get("Nanoparticle", None))
|
|
476
|
+
else (
|
|
477
|
+
str(row["Nanoparticle set"])
|
|
478
|
+
if pd.notna(row.get("Nanoparticle set", None))
|
|
479
|
+
else ""
|
|
480
|
+
)
|
|
301
481
|
),
|
|
302
482
|
"nanoparticleID": (
|
|
303
483
|
str(row["Nanoparticle ID"])
|
|
304
|
-
if pd.notna(row
|
|
305
|
-
else
|
|
484
|
+
if pd.notna(row.get("Nanoparticle ID", None))
|
|
485
|
+
else (
|
|
486
|
+
str(row["Nanoparticle set ID"])
|
|
487
|
+
if pd.notna(row.get("Nanoparticle set ID", None))
|
|
488
|
+
else ""
|
|
489
|
+
)
|
|
306
490
|
),
|
|
307
491
|
"control": (
|
|
308
|
-
str(row["Control"])
|
|
492
|
+
str(row["Control"])
|
|
493
|
+
if pd.notna(row.get("Control", None))
|
|
494
|
+
else ""
|
|
309
495
|
),
|
|
310
496
|
"controlID": (
|
|
311
497
|
str(row["Control ID"])
|
|
312
|
-
if pd.notna(row
|
|
498
|
+
if pd.notna(row.get("Control ID", None))
|
|
313
499
|
else ""
|
|
314
500
|
),
|
|
315
501
|
"instrumentName": (
|
|
316
502
|
str(row["Instrument name"])
|
|
317
|
-
if pd.notna(row
|
|
318
|
-
else
|
|
503
|
+
if pd.notna(row.get("Instrument name", None))
|
|
504
|
+
else (
|
|
505
|
+
str(row["Instrument ID"])
|
|
506
|
+
if pd.notna(row.get("Instrument ID", None))
|
|
507
|
+
else ""
|
|
508
|
+
)
|
|
319
509
|
),
|
|
320
510
|
"dateSamplePrep": (
|
|
321
511
|
str(row["Date sample preparation"])
|
|
322
|
-
if pd.notna(row
|
|
323
|
-
else
|
|
512
|
+
if pd.notna(row.get("Date sample preparation", None))
|
|
513
|
+
else (
|
|
514
|
+
str(row["Date assay initiated"])
|
|
515
|
+
if pd.notna(row.get("Date assay initiated", None))
|
|
516
|
+
else ""
|
|
517
|
+
)
|
|
324
518
|
),
|
|
325
519
|
"sampleVolume": (
|
|
326
520
|
str(row["Sample volume"])
|
|
327
|
-
if pd.notna(row
|
|
521
|
+
if pd.notna(row.get("Sample volume", None))
|
|
328
522
|
else ""
|
|
329
523
|
),
|
|
330
524
|
"peptideConcentration": (
|
|
331
525
|
str(row["Peptide concentration"])
|
|
332
|
-
if pd.notna(row
|
|
333
|
-
else
|
|
526
|
+
if pd.notna(row.get("Peptide concentration", None))
|
|
527
|
+
else (
|
|
528
|
+
str(row["Reconstituted peptide concentration"])
|
|
529
|
+
if pd.notna(
|
|
530
|
+
row.get(
|
|
531
|
+
"Reconstituted peptide concentration", None
|
|
532
|
+
)
|
|
533
|
+
)
|
|
534
|
+
else ""
|
|
535
|
+
)
|
|
334
536
|
),
|
|
335
537
|
"peptideMassSample": (
|
|
336
538
|
str(row["Peptide mass sample"])
|
|
337
|
-
if pd.notna(row
|
|
338
|
-
else
|
|
539
|
+
if pd.notna(row.get("Peptide mass sample", None))
|
|
540
|
+
else (
|
|
541
|
+
str(row["Recovered peptide mass"])
|
|
542
|
+
if pd.notna(row.get("Recovered peptide mass", None))
|
|
543
|
+
else ""
|
|
544
|
+
)
|
|
545
|
+
),
|
|
546
|
+
"reconVolume": (
|
|
547
|
+
str(row["Recon volume"])
|
|
548
|
+
if pd.notna(row.get("Recon volume", None))
|
|
549
|
+
else (
|
|
550
|
+
str(row["Reconstitution volume"])
|
|
551
|
+
if pd.notna(row.get("Reconstitution volume", None))
|
|
552
|
+
else ""
|
|
553
|
+
)
|
|
339
554
|
),
|
|
340
555
|
"dilutionFactor": (
|
|
341
556
|
str(row["Dilution factor"])
|
|
342
|
-
if pd.notna(row
|
|
557
|
+
if pd.notna(row.get("Dilution factor", None))
|
|
558
|
+
else ""
|
|
559
|
+
),
|
|
560
|
+
"sampleTubeId": (
|
|
561
|
+
str(row["Sample tube ID"])
|
|
562
|
+
if pd.notna(row.get("Sample tube ID", None))
|
|
563
|
+
else ""
|
|
564
|
+
),
|
|
565
|
+
"assayProduct": (
|
|
566
|
+
str(row["Assay"])
|
|
567
|
+
if pd.notna(row.get("Assay", None))
|
|
568
|
+
else (
|
|
569
|
+
str(row["Assay product"])
|
|
570
|
+
if pd.notna(row.get("Assay product", None))
|
|
571
|
+
else ""
|
|
572
|
+
)
|
|
573
|
+
),
|
|
574
|
+
"methodSetId": (
|
|
575
|
+
str(row["Method set ID"])
|
|
576
|
+
if pd.notna(row.get("Method set ID", None))
|
|
577
|
+
else ""
|
|
578
|
+
),
|
|
579
|
+
"assayMethodId": (
|
|
580
|
+
str(row["Assay method ID"])
|
|
581
|
+
if pd.notna(row.get("Assay method ID", None))
|
|
343
582
|
else ""
|
|
344
583
|
),
|
|
345
584
|
"msdataUserGroup": space,
|
|
@@ -368,22 +607,34 @@ def valid_ms_data_file(path):
|
|
|
368
607
|
if not os.path.exists(path):
|
|
369
608
|
return False
|
|
370
609
|
|
|
371
|
-
|
|
610
|
+
return _validate_rawfile_extensions(path)
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
def valid_pas_folder_path(path):
|
|
614
|
+
"""
|
|
615
|
+
Checks if a PAS folder path is valid.
|
|
372
616
|
|
|
373
|
-
|
|
374
|
-
|
|
617
|
+
Parameters
|
|
618
|
+
----------
|
|
619
|
+
path : str
|
|
620
|
+
The path to the PAS folder.
|
|
621
|
+
|
|
622
|
+
Returns
|
|
623
|
+
-------
|
|
624
|
+
bool
|
|
625
|
+
True if the path is valid, False otherwise.
|
|
626
|
+
"""
|
|
627
|
+
|
|
628
|
+
#
|
|
629
|
+
# Invalidate the following patterns:
|
|
630
|
+
# 1. Leading forward slash
|
|
631
|
+
# 2. Trailing forward slash
|
|
632
|
+
# 3. Double forward slashes
|
|
633
|
+
#
|
|
634
|
+
if not all(path.split("/")):
|
|
635
|
+
return False
|
|
375
636
|
else:
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
return extension.lower() in [
|
|
379
|
-
".d",
|
|
380
|
-
".d.zip",
|
|
381
|
-
".mzml",
|
|
382
|
-
".raw",
|
|
383
|
-
".mzml",
|
|
384
|
-
".wiff",
|
|
385
|
-
".wiff.scan",
|
|
386
|
-
]
|
|
637
|
+
return True
|
|
387
638
|
|
|
388
639
|
|
|
389
640
|
def download_hook(t):
|
|
@@ -422,3 +673,50 @@ def camel_case(s):
|
|
|
422
673
|
|
|
423
674
|
# Join the string, ensuring the first letter is lowercase
|
|
424
675
|
return "".join([s[0].lower(), s[1:]])
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
def rename_d_zip_file(source, destination):
|
|
679
|
+
"""
|
|
680
|
+
Renames a .d.zip file. The function extracts the contents of the source zip file, renames the inner .d folder, and rezips the contents into the destination zip file.
|
|
681
|
+
|
|
682
|
+
Parameters
|
|
683
|
+
----------
|
|
684
|
+
file : str
|
|
685
|
+
The name of the zip file.
|
|
686
|
+
new_name : str
|
|
687
|
+
The new name of the zip file.
|
|
688
|
+
|
|
689
|
+
Returns
|
|
690
|
+
-------
|
|
691
|
+
None
|
|
692
|
+
|
|
693
|
+
Examples
|
|
694
|
+
--------
|
|
695
|
+
>>> rename_zip_file("old_name.zip", "new_name.zip")
|
|
696
|
+
Renamed old_name.zip to new_name.zip
|
|
697
|
+
|
|
698
|
+
"""
|
|
699
|
+
if not source.lower().endswith(".d.zip"):
|
|
700
|
+
raise ValueError("Invalid zip file extension")
|
|
701
|
+
|
|
702
|
+
if not destination.lower().endswith(".d.zip"):
|
|
703
|
+
raise ValueError("Invalid zip file extension")
|
|
704
|
+
|
|
705
|
+
# Remove the .zip extension from the destination file
|
|
706
|
+
d_destination = destination[:-4]
|
|
707
|
+
|
|
708
|
+
# Create a temporary directory to extract the contents
|
|
709
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
710
|
+
# Unzip the source file
|
|
711
|
+
with zipfile.ZipFile(source, "r") as zip_ref:
|
|
712
|
+
zip_ref.extractall(temp_dir)
|
|
713
|
+
|
|
714
|
+
# Rezip the contents into the destination file
|
|
715
|
+
with zipfile.ZipFile(destination, "w") as zip_ref:
|
|
716
|
+
for foldername, subfolders, filenames in os.walk(temp_dir):
|
|
717
|
+
for filename in filenames:
|
|
718
|
+
file_path = os.path.join(foldername, filename)
|
|
719
|
+
arcname = f"{d_destination}/{os.path.relpath(file_path, temp_dir)}" # substitute the original .d name with the new .d name
|
|
720
|
+
zip_ref.write(file_path, arcname)
|
|
721
|
+
|
|
722
|
+
print(f"Renamed {source} to {destination}")
|