seer-pas-sdk 1.1.1__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
seer_pas_sdk/core/sdk.py CHANGED
@@ -7,7 +7,6 @@ import requests
7
7
  import urllib.request
8
8
  import ssl
9
9
 
10
-
11
10
  from typing import List as _List, Tuple as _Tuple
12
11
 
13
12
  from ..common import *
@@ -15,8 +14,6 @@ from ..auth import Auth
15
14
  from ..objects.volcanoplot import VolcanoPlotBuilder
16
15
  from ..objects.headers import *
17
16
 
18
- import warnings
19
-
20
17
 
21
18
  class SeerSDK:
22
19
  """
@@ -31,31 +28,57 @@ class SeerSDK:
31
28
  >>> seer_sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE)
32
29
  """
33
30
 
34
- def __init__(self, username, password, instance="US", tenant=None):
31
+ def __init__(
32
+ self,
33
+ username: str,
34
+ password: str,
35
+ instance: str = "US",
36
+ tenant: str = None,
37
+ tenant_id: str = None,
38
+ ):
35
39
  try:
36
40
  self._auth = Auth(username, password, instance)
37
41
 
38
42
  self._auth._login()
39
43
  print(f"User '{username}' logged in.\n")
44
+ except Exception as e:
45
+ raise ValueError(
46
+ f"Could not log in.\nPlease check your credentials and/or instance: {e}."
47
+ )
40
48
 
41
- if not tenant:
42
- tenant = self._auth.active_tenant_id
43
- try:
44
- self.switch_tenant(tenant)
45
- except Exception as e:
49
+ # direct logged in user to the specified tenant
50
+ tenant_data = pd.DataFrame(
51
+ self.get_user_tenant(index=False),
52
+ columns=["institution", "tenantId"],
53
+ ).rename(
54
+ columns={"institution": "Tenant name", "tenantId": "Tenant ID"}
55
+ )
56
+ tenant_names = tenant_data["Tenant name"].tolist()
57
+ tenant_ids = tenant_data["Tenant ID"].tolist()
58
+
59
+ # precondition: None is not a valid tenant_name or tenant_id.
60
+ if tenant_id is None and tenant is None:
61
+ self.logout()
62
+ if None in tenant_names:
46
63
  print(
47
- f"Encountered an error directing you to tenant {tenant}: {e}."
64
+ "Warning: You have access to a tenant with no name. Please either provide a tenant name in the PAS website or specify a tenant_id to access that tenant."
48
65
  )
49
- print("Logging into home tenant...")
50
- # If an error occurs while directing the user to a tenant, default to home tenant.
51
- print(f"You are now active in {self.get_active_tenant_name()}")
52
- except ServerError as e:
53
- raise e
54
- except Exception as e:
55
66
  raise ValueError(
56
- f"Could not log in.\nPlease check your credentials and/or instance: {e}."
67
+ f"Either tenant or tenant_id must be provided. Please indicate one of the following tenants: \n{tenant_data.to_string(index=False)}"
57
68
  )
58
69
 
70
+ if tenant_id not in tenant_ids:
71
+ if tenant in tenant_names:
72
+ # if multiple tenants exist for the same institution name, fall back on multiple tenant error in switch_tenant.
73
+ self.switch_tenant(tenant)
74
+ else:
75
+ self.logout()
76
+ raise ValueError(
77
+ f"Invalid tenant or tenant_id provided. Please indicate one of the following tenants: \n{tenant_data.to_string(index=False)}"
78
+ )
79
+ else:
80
+ self.switch_tenant(tenant_id)
81
+
59
82
  def logout(self):
60
83
  """
61
84
  Perform a logout operation for the current user of the SDK instance.
@@ -115,7 +138,7 @@ class SeerSDK:
115
138
  response = s.get(f"{self._auth.url}api/v1/usertenants")
116
139
 
117
140
  if response.status_code != 200:
118
- raise ValueError(
141
+ raise ServerError(
119
142
  "Invalid request. Please check your parameters."
120
143
  )
121
144
 
@@ -1202,8 +1225,8 @@ class SeerSDK:
1202
1225
 
1203
1226
  >>> seer_sdk.get_msruns(sample_ids)
1204
1227
  >>> [
1205
- {"id": "SAMPLE_ID_1_HERE" ... },
1206
- {"id": "SAMPLE_ID_2_HERE" ... }
1228
+ {"id": "MSRUN_ID_1_HERE" ... },
1229
+ {"id": "MSRUN_ID_2_HERE" ... }
1207
1230
  ]
1208
1231
 
1209
1232
  >>> seer_sdk.get_msruns(sample_ids, as_df=True)
@@ -1269,8 +1292,8 @@ class SeerSDK:
1269
1292
 
1270
1293
  >>> seer_sdk.find_msruns(sample_ids)
1271
1294
  >>> [
1272
- {"id": "SAMPLE_ID_1_HERE" ... },
1273
- {"id": "SAMPLE_ID_2_HERE" ... }
1295
+ {"id": "MSRUN_ID_1_HERE" ... },
1296
+ {"id": "MSRUN_ID_2_HERE" ... }
1274
1297
  ]
1275
1298
 
1276
1299
  >>> seer_sdk.find_msruns(sample_ids, as_df=True)
@@ -1284,25 +1307,34 @@ class SeerSDK:
1284
1307
  URL = f"{self._auth.url}api/v1/msdatas/items"
1285
1308
 
1286
1309
  res = []
1287
- for sample_id in sample_ids:
1288
1310
 
1289
- with self._get_auth_session("findmsdatas") as s:
1311
+ params = {"all": "true"}
1290
1312
 
1291
- msdatas = s.post(URL, json={"sampleId": sample_id})
1313
+ with self._get_auth_session("findmsdatas") as s:
1292
1314
 
1293
- if msdatas.status_code != 200 or not msdatas.json()["data"]:
1294
- raise ValueError(
1295
- f"Failed to fetch MS data for sample ID={sample_id}."
1296
- )
1315
+ msdatas = s.post(
1316
+ URL, json={"sampleId": ",".join(sample_ids)}, params=params
1317
+ )
1297
1318
 
1298
- res += [x for x in msdatas.json()["data"]]
1319
+ if msdatas.status_code != 200 or not msdatas.json()["data"]:
1320
+ raise ValueError(
1321
+ f"Failed to fetch MS data for sample IDs={sample_ids}."
1322
+ )
1323
+
1324
+ res += [x for x in msdatas.json()["data"]]
1299
1325
 
1300
1326
  spaces = {x["id"]: x["usergroup_name"] for x in self.get_spaces()}
1327
+
1328
+ def filepath_to_msrunid(filepath):
1329
+ return os.path.basename(filepath).split(".")[0]
1330
+
1301
1331
  for entry in res:
1302
1332
  if "tenant_id" in entry:
1303
1333
  del entry["tenant_id"]
1304
1334
 
1305
1335
  if "raw_file_path" in entry:
1336
+ # Provide a human-readable MS run id
1337
+ entry["Run"] = filepath_to_msrunid(entry["raw_file_path"])
1306
1338
  # Simple lambda function to find the third occurrence of '/' in the raw file path
1307
1339
  location = lambda s: len(s) - len(s.split("/", 3)[-1])
1308
1340
  # Slicing the string from the location
@@ -1313,6 +1345,13 @@ class SeerSDK:
1313
1345
  entry["space"] = spaces.get(entry["user_group"], "General")
1314
1346
  del entry["user_group"]
1315
1347
 
1348
+ # Rename the key sample_id to sample_uuid
1349
+ if "sample_id" in entry:
1350
+ entry["sample_uuid"] = entry.pop("sample_id")
1351
+ # Rename the key sample_id_tracking to sample_id
1352
+ if "sample_id_tracking" in entry:
1353
+ entry["sample_id"] = entry.pop("sample_id_tracking")
1354
+
1316
1355
  if not res and as_df:
1317
1356
  return pd.DataFrame(columns=MSRUN_COLUMNS)
1318
1357
  return res if not as_df else dict_to_df(res)
@@ -1471,7 +1510,7 @@ class SeerSDK:
1471
1510
  analysis_protocol_engine=res["analysis_engine"],
1472
1511
  )
1473
1512
  )
1474
- except:
1513
+ except Exception:
1475
1514
  res["fasta"] = ""
1476
1515
  return res
1477
1516
  else:
@@ -1590,10 +1629,7 @@ class SeerSDK:
1590
1629
  try:
1591
1630
  res[entry]["fasta"] = ",".join(
1592
1631
  self._get_analysis_protocol_fasta_filenames(
1593
- analysis_protocol_id=res[entry]["id"],
1594
- analysis_protocol_engine=res[entry].get(
1595
- "analysis_engine", None
1596
- ),
1632
+ analysis_protocol_id=res[entry]["id"]
1597
1633
  )
1598
1634
  )
1599
1635
  except:
@@ -1821,22 +1857,16 @@ class SeerSDK:
1821
1857
  if not res.get("is_folder") and res.get(
1822
1858
  "analysis_protocol_id"
1823
1859
  ):
1824
- analysis_protocol = self.get_analysis_protocol(
1825
- analysis_protocol_id=res.get("analysis_protocol_id")
1826
- )
1827
1860
  try:
1828
1861
  res["fasta"] = ",".join(
1829
1862
  self._get_analysis_protocol_fasta_filenames(
1830
1863
  analysis_protocol_id=res.get(
1831
1864
  "analysis_protocol_id"
1832
- ),
1833
- analysis_protocol_engine=analysis_protocol.get(
1834
- "analysis_engine", None
1835
- ),
1865
+ )
1836
1866
  )
1837
1867
  )
1838
1868
  except Exception as e:
1839
- print("Warning: Could not fetch fasta files.")
1869
+ print("Error: Could not fetch fasta files.")
1840
1870
  res["fasta"] = None
1841
1871
  else:
1842
1872
  res["fasta"] = None
@@ -1854,49 +1884,60 @@ class SeerSDK:
1854
1884
  else:
1855
1885
  return res[0]
1856
1886
 
1887
+ def _lookup_analysis_folders(self):
1888
+ """
1889
+ Helper function to map analysis folder ids to names.
1890
+ """
1891
+ with self._get_auth_session("getanalysisfolders") as s:
1892
+ URL = f"{self._auth.url}api/v1/analyses"
1893
+ params = {"all": "true", "folderonly": "true"}
1894
+ folders = s.get(URL, params=params)
1895
+ if folders.status_code != 200:
1896
+ raise ValueError(
1897
+ "Failed to fetch analysis folders. Please check your connection."
1898
+ )
1899
+ res = folders.json()["data"]
1900
+ return res
1901
+
1857
1902
  def find_analyses(
1858
1903
  self,
1859
1904
  analysis_id: str = None,
1905
+ analysis_name: str = None,
1860
1906
  folder_id: str = None,
1861
- show_folders: bool = True,
1862
- analysis_only: bool = True,
1907
+ folder_name: str = None,
1863
1908
  project_id: str = None,
1909
+ project_name: str = None,
1864
1910
  plate_name: str = None,
1865
1911
  as_df=False,
1866
- **kwargs,
1867
1912
  ):
1868
1913
  """
1869
- Returns a list of analyses objects for the authenticated user. If no id is provided, returns all analyses for the authenticated user.
1870
- Search parameters may be passed in as keyword arguments to filter the results. Acceptable values are 'analysis_name', 'folder_name', 'description', 'notes', or 'number_msdatafile'.
1871
- Only search on a single field is supported.
1914
+ Returns a list of analyses objects for the authenticated user. If None is provided for all query arguments, returns all analyses for the authenticated user.
1872
1915
 
1873
1916
  Parameters
1874
1917
  ----------
1875
1918
  analysis_id : str, optional
1876
1919
  ID of the analysis to be fetched, defaulted to None.
1877
1920
 
1921
+ analysis_name : str, optional
1922
+ Name of the analysis to be fetched, defaulted to None. Results will be matched on substring basis.
1923
+
1878
1924
  folder_id : str, optional
1879
- ID of the folder to be fetched, defaulted to None.
1925
+ Unique ID of an analysis folder to filter results, defaulted to None.
1880
1926
 
1881
- show_folders : bool, optional
1882
- Mark True if folder contents are to be returned in the response, i.e. recursive search, defaulted to True.
1883
- Will be disabled if an analysis id is provided.
1884
-
1885
- analysis_only : bool, optional
1886
- Mark True if only analyses objects are to be returned in the response, defaulted to True.
1887
- If marked false, folder objects will also be included in the response.
1927
+ folder_name : str, optional
1928
+ Name of an analysis folder to filter results, defaulted to None.
1888
1929
 
1889
1930
  project_id : str, optional
1890
- ID of the project to be fetched, defaulted to None.
1931
+ Unique ID of an analysis folder to filter results, defaulted to None.
1932
+
1933
+ project_name : str, optional
1934
+ Name of a project to filter results, defaulted to None.
1891
1935
 
1892
1936
  plate_name : str, optional
1893
- Name of the plate to be fetched, defaulted to None.
1937
+ Name of a plate to filter results, defaulted to None.
1894
1938
 
1895
1939
  as_df : bool, optional
1896
- whether the result should be converted to a DataFrame, defaulted to False.
1897
-
1898
- **kwargs : dict, optional
1899
- Search keyword parameters to be passed in. Acceptable values are 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'.
1940
+ Whether the result should be converted to a DataFrame, defaulted to False.
1900
1941
 
1901
1942
  Returns
1902
1943
  -------
@@ -1930,51 +1971,44 @@ class SeerSDK:
1930
1971
  URL = f"{self._auth.url}api/v1/analyses"
1931
1972
  res = []
1932
1973
 
1933
- search_field = None
1934
- search_item = None
1935
- if kwargs:
1936
- if len(kwargs.keys()) > 1:
1937
- raise ValueError("Please include only one search parameter.")
1938
- search_field = list(kwargs.keys())[0]
1939
- search_item = kwargs[search_field]
1940
-
1941
- if not search_item:
1942
- raise ValueError(
1943
- f"Please provide a non null value for {search_field}"
1944
- )
1945
-
1946
- if search_field and search_field not in [
1947
- "analysis_name",
1948
- "folder_name",
1949
- "analysis_protocol_name",
1950
- "description",
1951
- "notes",
1952
- "number_msdatafile",
1953
- ]:
1954
- raise ValueError(
1955
- "Invalid search field. Please choose between 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'."
1956
- )
1957
-
1958
1974
  if analysis_id:
1959
1975
  try:
1960
1976
  return [self.get_analysis(analysis_id=analysis_id)]
1961
- except:
1977
+ except Exception:
1962
1978
  return []
1963
1979
 
1980
+ analysis_folders = self._lookup_analysis_folders()
1981
+ analysis_folder_id_to_name = {
1982
+ x["id"]: x["analysis_name"] for x in analysis_folders
1983
+ }
1984
+ analysis_folder_name_to_id = {
1985
+ v: k for k, v in analysis_folder_id_to_name.items()
1986
+ }
1987
+
1988
+ if folder_name and not folder_id:
1989
+ folder_id = analysis_folder_name_to_id.get(folder_name, None)
1990
+ if not folder_id:
1991
+ raise ValueError(f"No folder found with name '{folder_name}'.")
1992
+
1993
+ if project_name and not project_id:
1994
+ project = self.get_project(project_name=project_name)
1995
+ if not project:
1996
+ raise ValueError(
1997
+ f"No project found with name '{project_name}'."
1998
+ )
1999
+ project_id = project["id"]
2000
+
1964
2001
  with self._get_auth_session("findanalyses") as s:
1965
2002
 
1966
- params = {"all": "true"}
2003
+ params = {"all": "true", "analysisonly": "true"}
1967
2004
  if folder_id:
1968
2005
  params["folder"] = folder_id
1969
2006
 
1970
- if search_field:
1971
- params["searchFields"] = search_field
1972
- params["searchItem"] = search_item
2007
+ if analysis_name:
2008
+ params["searchFields"] = "analysis_name"
2009
+ params["searchItem"] = analysis_name
1973
2010
  del params["all"]
1974
2011
 
1975
- if search_field == "folder_name":
1976
- params["searchFields"] = "analysis_name"
1977
-
1978
2012
  if project_id:
1979
2013
  params["projectId"] = project_id
1980
2014
 
@@ -1989,9 +2023,8 @@ class SeerSDK:
1989
2023
  )
1990
2024
  res = analyses.json()["data"]
1991
2025
 
1992
- folders = []
1993
2026
  spaces = {x["id"]: x["usergroup_name"] for x in self.get_spaces()}
1994
- protocol_to_engine_map = dict()
2027
+ protocol_to_fasta = {}
1995
2028
  for entry in range(len(res)):
1996
2029
  if "tenant_id" in res[entry]:
1997
2030
  del res[entry]["tenant_id"]
@@ -2005,11 +2038,14 @@ class SeerSDK:
2005
2038
  ][location(res[entry]["parameter_file_path"]) :]
2006
2039
 
2007
2040
  if (
2008
- show_folders
2009
- and not analysis_id
2010
- and res[entry]["is_folder"]
2041
+ "folder_id" in res[entry]
2042
+ and res[entry]["folder_id"] is not None
2011
2043
  ):
2012
- folders.append(res[entry]["id"])
2044
+ res[entry]["folder_name"] = analysis_folder_id_to_name.get(
2045
+ res[entry]["folder_id"], None
2046
+ )
2047
+ res[entry]["folder_uuid"] = res[entry]["folder_id"]
2048
+ del res[entry]["folder_id"]
2013
2049
 
2014
2050
  if "user_group" in res[entry]:
2015
2051
  res[entry]["space"] = spaces.get(
@@ -2020,51 +2056,34 @@ class SeerSDK:
2020
2056
  if (not res[entry].get("is_folder")) and res[entry].get(
2021
2057
  "analysis_protocol_id"
2022
2058
  ):
2023
- if (
2024
- res[entry]["analysis_protocol_id"]
2025
- in protocol_to_engine_map
2026
- ):
2027
- analysis_protocol_engine = protocol_to_engine_map[
2028
- res[entry]["analysis_protocol_id"]
2059
+ # analysis_protocol_id for this result row
2060
+ local_analysis_protocol_id = res[entry].get(
2061
+ "analysis_protocol_id"
2062
+ )
2063
+ if local_analysis_protocol_id in protocol_to_fasta:
2064
+ res[entry]["fasta"] = protocol_to_fasta[
2065
+ local_analysis_protocol_id
2029
2066
  ]
2030
2067
  else:
2031
2068
  try:
2032
- analysis_protocol = self.get_analysis_protocol(
2033
- analysis_protocol_id=res[entry].get(
2034
- "analysis_protocol_id"
2069
+ res[entry]["fasta"] = ",".join(
2070
+ self._get_analysis_protocol_fasta_filenames(
2071
+ local_analysis_protocol_id,
2072
+ analysis_protocol_engine=res[entry].get(
2073
+ "analysis_engine"
2074
+ ),
2035
2075
  )
2036
2076
  )
2037
- analysis_protocol_engine = analysis_protocol.get(
2038
- "analysis_engine", None
2077
+ protocol_to_fasta[local_analysis_protocol_id] = (
2078
+ res[entry]["fasta"]
2039
2079
  )
2040
- protocol_to_engine_map[
2041
- res[entry]["analysis_protocol_id"]
2042
- ] = analysis_protocol_engine
2043
2080
  except:
2044
- analysis_protocol_engine = None
2045
- try:
2046
- res[entry]["fasta"] = ",".join(
2047
- self._get_analysis_protocol_fasta_filenames(
2048
- res[entry]["analysis_protocol_id"],
2049
- analysis_protocol_engine=analysis_protocol_engine,
2081
+ print(
2082
+ f"Error: Could not fetch fasta files for analysis {res[entry].get('analysis_name')}."
2050
2083
  )
2051
- )
2052
- except:
2053
- print(
2054
- f"Warning: Could not fetch fasta files for analysis {res[entry].get('analysis_name')}."
2055
- )
2056
- res[entry]["fasta"] = None
2057
2084
  else:
2058
2085
  res[entry]["fasta"] = None
2059
2086
 
2060
- # recursive solution to get analyses in folders
2061
- for folder in folders:
2062
- res += self.find_analyses(folder_id=folder)
2063
-
2064
- if analysis_only:
2065
- res = [
2066
- analysis for analysis in res if not analysis["is_folder"]
2067
- ]
2068
2087
  if not res and as_df:
2069
2088
  return pd.DataFrame(columns=ANALYSIS_COLUMNS)
2070
2089
  return res if not as_df else dict_to_df(res)
@@ -2376,7 +2395,11 @@ class SeerSDK:
2376
2395
  return files
2377
2396
 
2378
2397
  def get_search_result(
2379
- self, analysis_id: str, analyte_type: str, rollup: str
2398
+ self,
2399
+ analysis_id: str,
2400
+ analyte_type: str,
2401
+ rollup: str,
2402
+ columns: _List[str] = None,
2380
2403
  ):
2381
2404
  """
2382
2405
  Load one of the files available via the "Download result files" button on the PAS UI.
@@ -2417,6 +2440,7 @@ class SeerSDK:
2417
2440
  "npLink"
2418
2441
  ]["url"],
2419
2442
  dtype=dtype,
2443
+ usecols=columns,
2420
2444
  )
2421
2445
  elif rollup == "panel":
2422
2446
  return download_df(
@@ -2424,6 +2448,7 @@ class SeerSDK:
2424
2448
  "panelLink"
2425
2449
  ]["url"],
2426
2450
  dtype=dtype,
2451
+ usecols=columns,
2427
2452
  )
2428
2453
  elif analyte_type == "peptide":
2429
2454
  if rollup == "np":
@@ -2432,6 +2457,7 @@ class SeerSDK:
2432
2457
  "npLink"
2433
2458
  ]["url"],
2434
2459
  dtype=dtype,
2460
+ usecols=columns,
2435
2461
  )
2436
2462
  elif rollup == "panel":
2437
2463
  return download_df(
@@ -2439,12 +2465,14 @@ class SeerSDK:
2439
2465
  "panelLink"
2440
2466
  ]["url"],
2441
2467
  dtype=dtype,
2468
+ usecols=columns,
2442
2469
  )
2443
2470
  else:
2444
2471
  return download_df(
2445
2472
  self.get_search_result_file_url(
2446
2473
  analysis_id, filename="report.tsv"
2447
- )["url"]
2474
+ )["url"],
2475
+ usecols=columns,
2448
2476
  )
2449
2477
 
2450
2478
  def download_search_output_file(
@@ -4059,7 +4087,7 @@ class SeerSDK:
4059
4087
  print(f"Downloaded file to {download_path}/{file}")
4060
4088
 
4061
4089
  def _get_analysis_protocol_fasta_filenames(
4062
- self, analysis_protocol_id: str, analysis_protocol_engine: str
4090
+ self, analysis_protocol_id: str, analysis_protocol_engine: str = None
4063
4091
  ):
4064
4092
  """
4065
4093
  Helper function - Get the fasta file name(s) associated with a given analysis protocol and engine.
@@ -4069,6 +4097,14 @@ class SeerSDK:
4069
4097
  Returns:
4070
4098
  list[str]: A list of fasta file names associated with the analysis protocol.
4071
4099
  """
4100
+ if not analysis_protocol_engine:
4101
+ analysis_protocol_engine = self.get_analysis_protocol(
4102
+ analysis_protocol_id=analysis_protocol_id
4103
+ ).get("analysis_engine")
4104
+ if not analysis_protocol_engine:
4105
+ raise ValueError(
4106
+ f"Could not retrieve analysis protocol engine for analysis protocol {analysis_protocol_id}."
4107
+ )
4072
4108
  analysis_protocol_engine = analysis_protocol_engine.lower()
4073
4109
  if analysis_protocol_engine == "diann":
4074
4110
  URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/diann/{analysis_protocol_id}"
@@ -4108,8 +4144,35 @@ class SeerSDK:
4108
4144
  raise ServerError("No fasta file name returned from server.")
4109
4145
  return fasta_filenames
4110
4146
 
4111
- def get_analysis_protocol_fasta_link(
4112
- self, analysis_protocol_id=None, analysis_id=None, analysis_name=None
4147
+ def _get_analysis_protocol_fasta_url(
4148
+ self, analysis_protocol_fasta_name: str
4149
+ ):
4150
+ """
4151
+ Helper function - Get the download link for a given fasta file name.
4152
+
4153
+ Args:
4154
+ analysis_protocol_fasta_name (str): Name of the fasta file.
4155
+
4156
+ Returns:
4157
+ str: The URL to download the fasta file.
4158
+ """
4159
+ URL = f"{self._auth.url}api/v1/analysisProtocolFiles/getUrl"
4160
+ with self._get_auth_session("getanalysisprotocolfilesurl") as s:
4161
+ response = s.post(
4162
+ URL, json={"filepath": analysis_protocol_fasta_name}
4163
+ )
4164
+ if response.status_code != 200:
4165
+ raise ServerError(
4166
+ f"Could not retrieve download link for {analysis_protocol_fasta_name}."
4167
+ )
4168
+ url = response.json()["url"]
4169
+ return url
4170
+
4171
+ def get_analysis_protocol_fasta_urls(
4172
+ self,
4173
+ analysis_protocol_id=None,
4174
+ analysis_id=None,
4175
+ analysis_name=None,
4113
4176
  ):
4114
4177
  """Get the download link(s) for the fasta file(s) associated with a given analysis protocol.
4115
4178
  Args:
@@ -4120,52 +4183,31 @@ class SeerSDK:
4120
4183
  Returns:
4121
4184
  list[dict]: A list of dictionaries containing the 'filename' and the 'url' to download the fasta file.
4122
4185
  """
4123
- if analysis_name and (not analysis_id):
4124
- analyses = self.find_analyses(analysis_name=analysis_name)
4125
- if len(analyses) > 1:
4126
- raise ValueError(
4127
- f"Multiple analyses found with name {analysis_name}. Please provide an analysis ID instead."
4128
- )
4129
- elif len(analyses) == 0:
4130
- raise ValueError(
4131
- f"No analyses found with name {analysis_name}."
4132
- )
4133
- else:
4134
- analysis_id = analyses[0]["id"]
4135
-
4136
- if not (bool(analysis_protocol_id) ^ bool(analysis_id)):
4137
- raise ValueError(
4138
- "Please provide either an analysis ID or an analysis protocol ID."
4139
- )
4140
-
4141
4186
  if not analysis_protocol_id:
4142
- try:
4143
- analysis_protocol_id = self.get_analysis(
4144
- analysis_id=analysis_id
4145
- )["analysis_protocol_id"]
4146
- except KeyError:
4147
- raise ValueError(f"Could not parse server response.")
4187
+ analysis = self.get_analysis(
4188
+ analysis_id=analysis_id,
4189
+ analysis_name=analysis_name,
4190
+ )
4191
+ analysis_protocol_id = analysis.get("analysis_protocol_id")
4148
4192
 
4149
- engine = self.get_analysis_protocol(
4150
- analysis_protocol_id=analysis_protocol_id
4151
- ).get("analysis_engine", None)
4152
4193
  fasta_filenames = self._get_analysis_protocol_fasta_filenames(
4153
- analysis_protocol_id=analysis_protocol_id,
4154
- analysis_protocol_engine=engine,
4194
+ analysis_protocol_id=analysis_protocol_id
4155
4195
  )
4156
- URL = f"{self._auth.url}api/v1/analysisProtocolFiles/getUrl"
4157
- links = []
4158
- for file in fasta_filenames:
4159
- with self._get_auth_session("getanalysisprotocolfilesurl") as s:
4160
- filename = os.path.basename(file)
4161
- response = s.post(URL, json={"filepath": file})
4162
- if response.status_code != 200:
4163
- print(
4164
- f"ERROR: Could not retrieve download link for {filename}."
4165
- )
4166
- continue
4167
- url = response.json()["url"]
4168
- links.append({"filename": filename, "url": url})
4196
+
4197
+ links = {}
4198
+ for filepath in fasta_filenames:
4199
+ filename = os.path.basename(filepath)
4200
+ try:
4201
+ url = self._get_analysis_protocol_fasta_url(
4202
+ analysis_protocol_fasta_name=filepath
4203
+ )
4204
+ except ServerError:
4205
+ print(
4206
+ f"ERROR: Could not retrieve download link for {filename}."
4207
+ )
4208
+ continue
4209
+
4210
+ links[filename] = url
4169
4211
  return links
4170
4212
 
4171
4213
  def download_analysis_protocol_fasta(
@@ -4186,20 +4228,28 @@ class SeerSDK:
4186
4228
  Returns:
4187
4229
  list[str] : The path to the downloaded fasta file(s).
4188
4230
  """
4189
-
4190
- links = [
4191
- (x["filename"], x["url"])
4192
- for x in self.get_analysis_protocol_fasta_link(
4193
- analysis_protocol_id=analysis_protocol_id,
4194
- analysis_id=analysis_id,
4195
- analysis_name=analysis_name,
4231
+ if not analysis_protocol_id:
4232
+ analysis = self.get_analysis(
4233
+ analysis_id=analysis_id, analysis_name=analysis_name
4196
4234
  )
4197
- ]
4235
+ analysis_protocol_id = analysis.get("analysis_protocol_id")
4236
+
4237
+ filepaths = self._get_analysis_protocol_fasta_filenames(
4238
+ analysis_protocol_id=analysis_protocol_id
4239
+ )
4198
4240
  if not download_path:
4199
4241
  download_path = os.getcwd()
4200
4242
 
4201
4243
  downloads = []
4202
- for filename, url in links:
4244
+ for filepath in filepaths:
4245
+ # run sequentially to avoid signed url expiration
4246
+ url = self._get_analysis_protocol_fasta_url(
4247
+ analysis_protocol_fasta_name=filepath
4248
+ )
4249
+ filename = os.path.basename(filepath)
4250
+
4251
+ # relative path of the file after download
4252
+ local_filename = f"{download_path}/{filename}"
4203
4253
  print(f"Downloading {filename}")
4204
4254
  for _ in range(2):
4205
4255
  try:
@@ -4215,7 +4265,7 @@ class SeerSDK:
4215
4265
  )
4216
4266
  urllib.request.urlretrieve(
4217
4267
  url,
4218
- f"{download_path}/{filename}",
4268
+ local_filename,
4219
4269
  reporthook=download_hook(t),
4220
4270
  data=None,
4221
4271
  )
@@ -4224,5 +4274,5 @@ class SeerSDK:
4224
4274
  if not os.path.isdir(f"{download_path}"):
4225
4275
  os.makedirs(f"{download_path}")
4226
4276
 
4227
- downloads.append(f"{download_path}/{filename}")
4277
+ downloads.append(local_filename)
4228
4278
  return downloads