nmdc-runtime 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

@@ -0,0 +1,433 @@
1
+ import os
2
+ import datetime
3
+ import xml.etree.ElementTree as ET
4
+ import xml.dom.minidom
5
+
6
+ from typing import Any
7
+ from urllib.parse import urlparse
8
+ from nmdc_runtime.site.export.ncbi_xml_utils import (
9
+ handle_controlled_identified_term_value,
10
+ handle_controlled_term_value,
11
+ handle_geolocation_value,
12
+ handle_quantity_value,
13
+ handle_text_value,
14
+ handle_timestamp_value,
15
+ handle_float_value,
16
+ handle_string_value,
17
+ load_mappings,
18
+ validate_xml,
19
+ )
20
+
21
+
22
+ class NCBISubmissionXML:
23
+ def __init__(self, nmdc_study: Any, ncbi_submission_metadata: dict):
24
+ self.root = ET.Element("Submission")
25
+
26
+ self.nmdc_study_id = nmdc_study.get("id")
27
+ self.nmdc_study_title = nmdc_study.get("title")
28
+ self.nmdc_study_description = nmdc_study.get("description")
29
+ self.ncbi_bioproject_id = nmdc_study.get("insdc_bioproject_identifiers")
30
+ self.nmdc_pi_email = nmdc_study.get("principal_investigator", {}).get("email")
31
+ nmdc_study_pi_name = (
32
+ nmdc_study.get("principal_investigator", {}).get("name").split()
33
+ )
34
+ self.first_name = nmdc_study_pi_name[0]
35
+ self.last_name = nmdc_study_pi_name[1] if len(nmdc_study_pi_name) > 1 else None
36
+
37
+ self.nmdc_ncbi_attribute_mapping_file_url = ncbi_submission_metadata.get(
38
+ "nmdc_ncbi_attribute_mapping_file_url"
39
+ )
40
+ self.ncbi_submission_metadata = ncbi_submission_metadata.get(
41
+ "ncbi_submission_metadata", {}
42
+ )
43
+ self.ncbi_biosample_metadata = ncbi_submission_metadata.get(
44
+ "ncbi_biosample_metadata", {}
45
+ )
46
+
47
+ # dispatcher dictionary capturing handlers for NMDC object to NCBI flat Attribute
48
+ # type handlers
49
+ self.type_handlers = {
50
+ "QuantityValue": handle_quantity_value,
51
+ "TextValue": handle_text_value,
52
+ "TimestampValue": handle_timestamp_value,
53
+ "ControlledTermValue": handle_controlled_term_value,
54
+ "ControlledIdentifiedTermValue": handle_controlled_identified_term_value,
55
+ "GeolocationValue": handle_geolocation_value,
56
+ "float": handle_float_value,
57
+ "string": handle_string_value,
58
+ }
59
+
60
+ def set_element(self, tag, text="", attrib=None, children=None):
61
+ attrib = attrib or {}
62
+ children = children or []
63
+ element = ET.Element(tag, attrib=attrib)
64
+ element.text = text
65
+ for child in children:
66
+ element.append(child)
67
+ return element
68
+
69
+ def set_description(self, email, user, first, last, org, date=None):
70
+ date = date or datetime.datetime.now().strftime("%Y-%m-%d")
71
+ description = self.set_element(
72
+ "Description",
73
+ children=[
74
+ self.set_element(
75
+ "Comment", f"NMDC Submission for {self.nmdc_study_id}"
76
+ ),
77
+ self.set_element("Submitter", attrib={"user_name": user}),
78
+ self.set_element(
79
+ "Organization",
80
+ attrib={"role": "owner", "type": "center"},
81
+ children=[
82
+ self.set_element("Name", org),
83
+ self.set_element(
84
+ "Contact",
85
+ attrib={"email": email},
86
+ children=[
87
+ self.set_element(
88
+ "Name",
89
+ children=[
90
+ self.set_element("First", first),
91
+ self.set_element("Last", last),
92
+ ],
93
+ )
94
+ ],
95
+ ),
96
+ ],
97
+ ),
98
+ self.set_element("Hold", attrib={"release_date": date}),
99
+ ],
100
+ )
101
+ self.root.append(description)
102
+
103
+ def set_descriptor(self, title, description):
104
+ descriptor_elements = []
105
+ descriptor_elements.append(self.set_element("Title", title))
106
+ descriptor_elements.append(
107
+ self.set_element(
108
+ "Description", children=[self.set_element("p", description)]
109
+ )
110
+ )
111
+
112
+ return descriptor_elements
113
+
114
+ def set_bioproject(self, title, project_id, description, data_type, org):
115
+ action = self.set_element("Action")
116
+ add_data = self.set_element("AddData", attrib={"target_db": "BioProject"})
117
+
118
+ data_element = self.set_element("Data", attrib={"content_type": "XML"})
119
+ xml_content = self.set_element("XmlContent")
120
+ project = self.set_element("Project", attrib={"schema_version": "2.0"})
121
+
122
+ project_id_element = self.set_element("ProjectID")
123
+ spuid = self.set_element("SPUID", project_id, {"spuid_namespace": org})
124
+ project_id_element.append(spuid)
125
+
126
+ descriptor = self.set_descriptor(title, description)
127
+ project_type = self.set_element("ProjectType")
128
+ # "sample_scope" is a enumeration feild. Docs: https://www.ncbi.nlm.nih.gov/data_specs/schema/other/bioproject/Core.xsd
129
+ # scope is "eEnvironment" when "Content of species in a sample is not known, i.e. microbiome,metagenome, etc.."
130
+ project_type_submission = self.set_element(
131
+ "ProjectTypeSubmission", attrib={"sample_scope": "eEnvironment"}
132
+ )
133
+ intended_data_type_set = self.set_element("IntendedDataTypeSet")
134
+ data_type_element = self.set_element("DataType", data_type)
135
+
136
+ intended_data_type_set.append(data_type_element)
137
+ project_type_submission.append(intended_data_type_set)
138
+ project_type.append(project_type_submission)
139
+
140
+ project.extend([project_id_element] + descriptor + [project_type])
141
+
142
+ xml_content.append(project)
143
+ data_element.append(xml_content)
144
+ add_data.append(data_element)
145
+
146
+ identifier = self.set_element("Identifier")
147
+ spuid_identifier = self.set_element(
148
+ "SPUID", project_id, {"spuid_namespace": org}
149
+ )
150
+ identifier.append(spuid_identifier)
151
+ add_data.append(identifier)
152
+
153
+ action.append(add_data)
154
+ self.root.append(action)
155
+
156
+ def set_biosample(
157
+ self,
158
+ organism_name,
159
+ org,
160
+ bioproject_id,
161
+ nmdc_biosamples,
162
+ nmdc_omics_processing,
163
+ ):
164
+ attribute_mappings, slot_range_mappings = load_mappings(
165
+ self.nmdc_ncbi_attribute_mapping_file_url
166
+ )
167
+
168
+ for biosample in nmdc_biosamples:
169
+ attributes = {}
170
+ sample_id_value = None
171
+ env_package = None
172
+
173
+ for json_key, value in biosample.items():
174
+ if isinstance(value, list):
175
+ continue # Skip processing for list values
176
+
177
+ if json_key == "env_package":
178
+ env_package = f"MIMS.me.{handle_text_value(value)}.6.0"
179
+
180
+ # Special handling for NMDC Biosample "id"
181
+ if json_key == "id":
182
+ sample_id_value = value
183
+ continue
184
+
185
+ if json_key not in attribute_mappings:
186
+ continue
187
+
188
+ xml_key = attribute_mappings[json_key]
189
+ value_type = slot_range_mappings.get(json_key, "string")
190
+ handler = self.type_handlers.get(value_type, handle_string_value)
191
+
192
+ formatted_value = handler(value)
193
+ attributes[xml_key] = formatted_value
194
+
195
+ biosample_elements = [
196
+ self.set_element(
197
+ "SampleId",
198
+ children=[
199
+ self.set_element(
200
+ "SPUID", sample_id_value, {"spuid_namespace": org}
201
+ )
202
+ ],
203
+ ),
204
+ self.set_element(
205
+ "Descriptor",
206
+ children=[
207
+ self.set_element(
208
+ "Title",
209
+ f"NMDC Biosample {sample_id_value} from {organism_name} part of {self.nmdc_study_id} study",
210
+ ),
211
+ ],
212
+ ),
213
+ self.set_element(
214
+ "Organism",
215
+ children=[self.set_element("OrganismName", organism_name)],
216
+ ),
217
+ self.set_element(
218
+ "BioProject",
219
+ children=[
220
+ self.set_element(
221
+ "PrimaryId", bioproject_id, {"db": "BioProject"}
222
+ )
223
+ ],
224
+ ),
225
+ self.set_element("Package", env_package),
226
+ self.set_element(
227
+ "Attributes",
228
+ children=[
229
+ self.set_element(
230
+ "Attribute", attributes[key], {"attribute_name": key}
231
+ )
232
+ for key in sorted(attributes)
233
+ ],
234
+ ),
235
+ ]
236
+
237
+ action = self.set_element(
238
+ "Action",
239
+ children=[
240
+ self.set_element(
241
+ "AddData",
242
+ attrib={"target_db": "BioSample"},
243
+ children=[
244
+ self.set_element(
245
+ "Data",
246
+ attrib={"content_type": "XML"},
247
+ children=[
248
+ self.set_element(
249
+ "XmlContent",
250
+ children=[
251
+ self.set_element(
252
+ "BioSample",
253
+ attrib={"schema_version": "2.0"},
254
+ children=biosample_elements,
255
+ ),
256
+ ],
257
+ ),
258
+ ],
259
+ ),
260
+ self.set_element(
261
+ "Identifier",
262
+ children=[
263
+ self.set_element(
264
+ "SPUID",
265
+ sample_id_value,
266
+ {"spuid_namespace": org},
267
+ ),
268
+ ],
269
+ ),
270
+ ],
271
+ ),
272
+ ],
273
+ )
274
+ self.root.append(action)
275
+
276
+ def set_fastq(
277
+ self,
278
+ biosample_data_objects: list,
279
+ bioproject_id: str,
280
+ org: str,
281
+ ):
282
+ for entry in biosample_data_objects:
283
+ fastq_files = []
284
+ biosample_ids = []
285
+
286
+ for biosample_id, data_objects in entry.items():
287
+ biosample_ids.append(biosample_id)
288
+ for data_object in data_objects:
289
+ if "url" in data_object:
290
+ url = urlparse(data_object["url"])
291
+ file_path = os.path.join(
292
+ os.path.basename(os.path.dirname(url.path)),
293
+ os.path.basename(url.path),
294
+ )
295
+ fastq_files.append(file_path)
296
+
297
+ if fastq_files:
298
+ files_elements = [
299
+ self.set_element(
300
+ "File",
301
+ "",
302
+ {"file_path": f},
303
+ [self.set_element("DataType", "generic-data")],
304
+ )
305
+ for f in fastq_files
306
+ ]
307
+
308
+ attribute_elements = [
309
+ self.set_element(
310
+ "AttributeRefId",
311
+ attrib={"name": "BioProject"},
312
+ children=[
313
+ self.set_element(
314
+ "RefId",
315
+ children=[
316
+ self.set_element(
317
+ "SPUID",
318
+ bioproject_id,
319
+ {"spuid_namespace": org},
320
+ )
321
+ ],
322
+ )
323
+ ],
324
+ )
325
+ ]
326
+
327
+ for biosample_id in biosample_ids:
328
+ attribute_elements.append(
329
+ self.set_element(
330
+ "AttributeRefId",
331
+ attrib={"name": "BioSample"},
332
+ children=[
333
+ self.set_element(
334
+ "RefId",
335
+ children=[
336
+ self.set_element(
337
+ "SPUID",
338
+ biosample_id,
339
+ {"spuid_namespace": org},
340
+ )
341
+ ],
342
+ )
343
+ ],
344
+ )
345
+ )
346
+
347
+ identifier_element = self.set_element(
348
+ "Identifier",
349
+ children=[
350
+ self.set_element(
351
+ "SPUID", bioproject_id, {"spuid_namespace": org}
352
+ )
353
+ ],
354
+ )
355
+
356
+ action = self.set_element(
357
+ "Action",
358
+ children=[
359
+ self.set_element(
360
+ "AddFiles",
361
+ attrib={"target_db": "SRA"},
362
+ children=files_elements
363
+ + attribute_elements
364
+ + [identifier_element],
365
+ ),
366
+ ],
367
+ )
368
+
369
+ self.root.append(action)
370
+
371
+ def get_submission_xml(
372
+ self,
373
+ biosamples_list: list,
374
+ biosample_omics_processing_list: list,
375
+ biosample_data_objects_list: list,
376
+ ):
377
+ data_type = None
378
+ ncbi_project_id = None
379
+ for bsm_omprc in biosample_omics_processing_list:
380
+ for _, omprc_list in bsm_omprc.items():
381
+ for omprc in omprc_list:
382
+ if "omics_type" in omprc:
383
+ data_type = handle_text_value(omprc["omics_type"]).capitalize()
384
+
385
+ if "ncbi_project_name" in omprc:
386
+ ncbi_project_id = omprc["ncbi_project_name"]
387
+
388
+ self.set_description(
389
+ email=self.nmdc_pi_email,
390
+ user="National Microbiome Data Collaborative (NMDC)",
391
+ first=self.first_name,
392
+ last=self.last_name,
393
+ org=self.ncbi_submission_metadata.get("organization", ""),
394
+ )
395
+
396
+ if not ncbi_project_id:
397
+ self.set_bioproject(
398
+ title=self.nmdc_study_title,
399
+ project_id=ncbi_project_id,
400
+ description=self.nmdc_study_description,
401
+ data_type=data_type,
402
+ org=self.ncbi_submission_metadata.get("organization", ""),
403
+ )
404
+
405
+ self.set_biosample(
406
+ organism_name=self.ncbi_biosample_metadata.get("organism_name", ""),
407
+ org=self.ncbi_submission_metadata.get("organization", ""),
408
+ bioproject_id=ncbi_project_id,
409
+ nmdc_biosamples=biosamples_list,
410
+ nmdc_omics_processing=biosample_omics_processing_list,
411
+ )
412
+
413
+ self.set_fastq(
414
+ biosample_data_objects=biosample_data_objects_list,
415
+ bioproject_id=ncbi_project_id,
416
+ org=self.ncbi_submission_metadata.get("organization", ""),
417
+ )
418
+
419
+ rough_string = ET.tostring(self.root, "unicode")
420
+ reparsed = xml.dom.minidom.parseString(rough_string)
421
+ submission_xml = reparsed.toprettyxml(indent=" ", newl="\n")
422
+
423
+ # ============= Uncomment the following code to validate the XML against NCBI XSDs ============ #
424
+ # submission_xsd_url = "https://www.ncbi.nlm.nih.gov/viewvc/v1/trunk/submit/public-docs/common/submission.xsd?view=co"
425
+ # validate_xml(submission_xml, submission_xsd_url)
426
+
427
+ # bioproject_xsd_url = "https://www.ncbi.nlm.nih.gov/viewvc/v1/trunk/submit/public-docs/bioproject/bioproject.xsd?view=co"
428
+ # validate_xml(submission_xml, bioproject_xsd_url)
429
+
430
+ # biosample_xsd_url = "https://www.ncbi.nlm.nih.gov/viewvc/v1/trunk/submit/public-docs/biosample/biosample.xsd?view=co"
431
+ # validate_xml(submission_xml, biosample_xsd_url)
432
+
433
+ return submission_xml
@@ -0,0 +1,206 @@
1
+ from io import BytesIO, StringIO
2
+ from nmdc_runtime.minter.config import typecodes
3
+ from lxml import etree
4
+
5
+ import csv
6
+ import requests
7
+
8
+
9
+ def _build_class_map(class_map_data):
10
+ return {
11
+ entry["name"]: entry["schema_class"].split(":")[1] for entry in class_map_data
12
+ }
13
+
14
+
15
+ def get_classname_from_typecode(doc_id):
16
+ class_map_data = typecodes()
17
+ class_map = _build_class_map(class_map_data)
18
+
19
+ typecode = doc_id.split(":")[1].split("-")[0]
20
+ return class_map.get(typecode)
21
+
22
+
23
+ def fetch_data_objects_from_biosamples(all_docs_collection, biosamples_list):
24
+ biosample_data_objects = []
25
+
26
+ for biosample in biosamples_list:
27
+ current_ids = [biosample["id"]]
28
+ collected_data_objects = []
29
+
30
+ while current_ids:
31
+ new_current_ids = []
32
+ for current_id in current_ids:
33
+ query = {"has_input": current_id}
34
+ document = all_docs_collection.find_one(query)
35
+
36
+ if not document:
37
+ continue
38
+
39
+ has_output = document.get("has_output")
40
+ if not has_output:
41
+ continue
42
+
43
+ for output_id in has_output:
44
+ if get_classname_from_typecode(output_id) == "DataObject":
45
+ data_object_doc = all_docs_collection.find_one(
46
+ {"id": output_id}
47
+ )
48
+ if data_object_doc:
49
+ collected_data_objects.append(data_object_doc)
50
+ else:
51
+ new_current_ids.append(output_id)
52
+
53
+ current_ids = new_current_ids
54
+
55
+ if collected_data_objects:
56
+ biosample_data_objects.append({biosample["id"]: collected_data_objects})
57
+
58
+ return biosample_data_objects
59
+
60
+
61
+ def fetch_omics_processing_from_biosamples(all_docs_collection, biosamples_list):
62
+ biosample_data_objects = []
63
+
64
+ for biosample in biosamples_list:
65
+ current_ids = [biosample["id"]]
66
+ collected_data_objects = []
67
+
68
+ while current_ids:
69
+ new_current_ids = []
70
+ for current_id in current_ids:
71
+ query = {"has_input": current_id}
72
+ document = all_docs_collection.find_one(query)
73
+
74
+ if not document:
75
+ continue
76
+
77
+ has_output = document.get("has_output")
78
+ if not has_output:
79
+ continue
80
+
81
+ for output_id in has_output:
82
+ if get_classname_from_typecode(output_id) == "DataObject":
83
+ omics_processing_doc = all_docs_collection.find_one(
84
+ {"id": document["id"]}
85
+ )
86
+ if omics_processing_doc:
87
+ collected_data_objects.append(omics_processing_doc)
88
+ else:
89
+ new_current_ids.append(output_id)
90
+
91
+ current_ids = new_current_ids
92
+
93
+ if collected_data_objects:
94
+ biosample_data_objects.append({biosample["id"]: collected_data_objects})
95
+
96
+ return biosample_data_objects
97
+
98
+
99
+ def handle_quantity_value(slot_value):
100
+ if "has_numeric_value" in slot_value and "has_unit" in slot_value:
101
+ return f"{slot_value['has_numeric_value']} {slot_value['has_unit']}"
102
+ elif (
103
+ "has_maximum_numeric_value" in slot_value
104
+ and "has_minimum_numeric_value" in slot_value
105
+ and "has_unit" in slot_value
106
+ ):
107
+ range_value = (
108
+ slot_value["has_maximum_numeric_value"]
109
+ - slot_value["has_minimum_numeric_value"]
110
+ )
111
+ return f"{range_value} {slot_value['has_unit']}"
112
+ elif "has_raw_value" in slot_value:
113
+ return slot_value["has_raw_value"]
114
+ return "Unknown format"
115
+
116
+
117
+ def handle_text_value(slot_value):
118
+ return slot_value.get("has_raw_value", "Unknown format")
119
+
120
+
121
+ def handle_timestamp_value(slot_value):
122
+ return slot_value.get("has_raw_value", "Unknown format")
123
+
124
+
125
+ def handle_controlled_term_value(slot_value):
126
+ if "term" in slot_value:
127
+ term = slot_value["term"]
128
+ if "name" in term and "id" in term:
129
+ return f"{term['name']} [{term['id']}]"
130
+ elif "id" in term:
131
+ return term["id"]
132
+ elif "name" in term:
133
+ return term["name"]
134
+ elif "has_raw_value" in slot_value:
135
+ return slot_value["has_raw_value"]
136
+ return "Unknown format"
137
+
138
+
139
+ def handle_controlled_identified_term_value(slot_value):
140
+ if "term" in slot_value:
141
+ term = slot_value["term"]
142
+ if "name" in term and "id" in term:
143
+ return f"{term['name']} [{term['id']}]"
144
+ elif "id" in term:
145
+ return term["id"]
146
+ elif "has_raw_value" in slot_value:
147
+ return slot_value["has_raw_value"]
148
+ return "Unknown format"
149
+
150
+
151
+ def handle_geolocation_value(slot_value):
152
+ if "latitude" in slot_value and "longitude" in slot_value:
153
+ return f"{slot_value['latitude']} {slot_value['longitude']}"
154
+ elif "has_raw_value" in slot_value:
155
+ return slot_value["has_raw_value"]
156
+ return "Unknown format"
157
+
158
+
159
+ def handle_float_value(slot_value):
160
+ return f"{slot_value:.2f}"
161
+
162
+
163
+ def handle_string_value(slot_value):
164
+ return f"{slot_value}"
165
+
166
+
167
+ def load_mappings(url):
168
+ response = requests.get(url)
169
+ response.raise_for_status()
170
+ file_content = response.text
171
+
172
+ attribute_mappings = {}
173
+ slot_range_mappings = {}
174
+ reader = csv.DictReader(StringIO(file_content), delimiter="\t")
175
+ for row in reader:
176
+ if row["ignore"].strip():
177
+ continue
178
+
179
+ json_key = row["nmdc_schema_slot"]
180
+ # attribute mappings
181
+ xml_attribute_name = row["ncbi_biosample_attribute_name"]
182
+ attribute_mappings[json_key] = (
183
+ xml_attribute_name if xml_attribute_name else json_key
184
+ )
185
+
186
+ # slot range mappings
187
+ data_type = row["nmdc_schema_slot_range"]
188
+ slot_range_mappings[json_key] = data_type if data_type else "default"
189
+
190
+ return attribute_mappings, slot_range_mappings
191
+
192
+
193
+ def validate_xml(xml, xsd_url):
194
+ response = requests.get(xsd_url)
195
+ response.raise_for_status()
196
+ xsd_content = response.text
197
+
198
+ xml_schema_doc = etree.parse(BytesIO(xsd_content.encode("utf-8")))
199
+ xml_schema = etree.XMLSchema(xml_schema_doc)
200
+
201
+ xml_doc = etree.parse(BytesIO(xml.encode("utf-8")))
202
+
203
+ if not xml_schema.validate(xml_doc):
204
+ raise ValueError(f"There were errors while validating against: {xsd_url}")
205
+
206
+ return True
@@ -5,7 +5,6 @@ Get NMDC study-associated metadata from search api
5
5
  import csv
6
6
  from io import StringIO
7
7
 
8
- import requests
9
8
  from dagster import (
10
9
  op,
11
10
  get_dagster_logger,
@@ -26,13 +25,27 @@ def get_all_docs(client, collection, filter_):
26
25
  per_page = 200
27
26
  url_base = f"/{collection}?filter={filter_}&per_page={per_page}"
28
27
  results = []
29
- rv = client.request("GET", url_base).json()
28
+ response = client.request("GET", url_base)
29
+ if response.status_code != 200:
30
+ raise Exception(
31
+ f"Runtime API request failed with status {response.status_code}."
32
+ f" Check URL: {url_base}"
33
+ )
34
+ rv = response.json()
30
35
  results.extend(rv.get("results", []))
31
36
  page, count = rv["meta"]["page"], rv["meta"]["count"]
32
37
  assert count <= 10_000
33
38
  while page * per_page < count:
34
- rv = requests.get(url_base + f"&page={page + 1}").json()
35
- results.extend(rv["results"])
39
+ page += 1
40
+ url = f"{url_base}&page={page}"
41
+ response = client.request("GET", url)
42
+ if response.status_code != 200:
43
+ raise Exception(
44
+ f"Runtime API request failed with status {response.status_code}."
45
+ f" Check URL: {url}"
46
+ )
47
+ rv = response.json()
48
+ results.extend(rv.get("results", []))
36
49
  return results
37
50
 
38
51
 
@@ -115,3 +128,10 @@ def export_study_biosamples_as_csv(context: OpExecutionContext, study_export_inf
115
128
  def export_study_biosamples_metadata():
116
129
  outputs = export_study_biosamples_as_csv(get_study_biosamples_metadata())
117
130
  add_output_run_event(outputs)
131
+
132
+
133
+ @op(required_resource_keys={"runtime_api_site_client"})
134
+ def get_biosamples_by_study_id(context: OpExecutionContext, nmdc_study: dict):
135
+ client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
136
+ biosamples = get_all_docs(client, "biosamples", f"part_of:{nmdc_study['id']}")
137
+ return biosamples