nmdc-runtime 2.4.0__py3-none-any.whl → 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

nmdc_runtime/site/ops.py CHANGED
@@ -1100,7 +1100,12 @@ def materialize_alldocs(context) -> int:
1100
1100
  write_operations = []
1101
1101
  documents_processed_counter = 0
1102
1102
  for doc in mdb[coll_name].find():
1103
- doc_type = doc["type"][5:] # lop off "nmdc:" prefix
1103
+ try:
1104
+ doc_type = doc["type"][5:] # lop off "nmdc:" prefix
1105
+ except KeyError:
1106
+ raise Exception(
1107
+ f"doc {doc['id']} in collection {coll_name} has no 'type'!"
1108
+ )
1104
1109
  slots_to_include = ["id", "type"] + document_reference_ranged_slots[
1105
1110
  doc_type
1106
1111
  ]
@@ -199,8 +199,20 @@ class DatabaseUpdater:
199
199
  if gbs.get("biosampleGoldId") not in nmdc_gold_ids
200
200
  ]
201
201
 
202
+ # use the GOLD study id to fetch all sequencing project records associated with the study
203
+ gold_sequencing_projects_for_study = (
204
+ self.gold_api_client.fetch_projects_by_study(gold_study_id)
205
+ )
206
+
207
+ # use the GOLD study id to fetch all analysis project records associated with the study
208
+ gold_analysis_projects_for_study = (
209
+ self.gold_api_client.fetch_analysis_projects_by_study(gold_study_id)
210
+ )
211
+
202
212
  gold_study_translator = GoldStudyTranslator(
203
213
  biosamples=missing_gold_biosamples,
214
+ projects=gold_sequencing_projects_for_study,
215
+ analysis_projects=gold_analysis_projects_for_study,
204
216
  gold_nmdc_instrument_map_df=self.gold_nmdc_instrument_map_df,
205
217
  )
206
218
 
@@ -744,7 +744,7 @@ def biosample_submission_ingest():
744
744
  "config": {
745
745
  "benthic_data_product": {
746
746
  "product_id": "DP1.20279.001",
747
- "product_tables": "mms_benthicMetagenomeSequencing, mms_benthicMetagenomeDnaExtraction, mms_benthicRawDataFiles, amb_fieldParent",
747
+ "product_tables": "mms_benthicMetagenomeSequencing, mms_benthicMetagenomeDnaExtraction, mms_benthicRawDataFiles, amb_fieldParent, mms_mms_benthicRawDataFiles",
748
748
  }
749
749
  }
750
750
  },
@@ -771,7 +771,7 @@ def biosample_submission_ingest():
771
771
  "config": {
772
772
  "benthic_data_product": {
773
773
  "product_id": "DP1.20279.001",
774
- "product_tables": "mms_benthicMetagenomeSequencing, mms_benthicMetagenomeDnaExtraction, mms_benthicRawDataFiles, amb_fieldParent",
774
+ "product_tables": "mms_benthicMetagenomeSequencing, mms_benthicMetagenomeDnaExtraction, mms_benthicRawDataFiles, amb_fieldParent, mms_mms_benthicRawDataFiles",
775
775
  }
776
776
  }
777
777
  },
@@ -1,6 +1,6 @@
1
1
  import re
2
2
  import sqlite3
3
- from typing import Union
3
+ from typing import Optional, Union
4
4
 
5
5
  import pandas as pd
6
6
  import requests_cache
@@ -61,6 +61,7 @@ class NeonBenthicDataTranslator(Translator):
61
61
  "mms_benthicMetagenomeSequencing",
62
62
  "mms_benthicMetagenomeDnaExtraction",
63
63
  "amb_fieldParent",
64
+ "mms_benthicRawDataFiles", # <--- ensure this is present
64
65
  )
65
66
 
66
67
  if all(k in benthic_data for k in neon_amb_data_tables):
@@ -79,6 +80,12 @@ class NeonBenthicDataTranslator(Translator):
79
80
  benthic_data["amb_fieldParent"].to_sql(
80
81
  "amb_fieldParent", self.conn, if_exists="replace", index=False
81
82
  )
83
+ benthic_data["mms_benthicRawDataFiles"].to_sql(
84
+ "mms_benthicRawDataFiles",
85
+ self.conn,
86
+ if_exists="replace",
87
+ index=False,
88
+ )
82
89
  else:
83
90
  raise ValueError(
84
91
  f"You are missing one of the aquatic benthic microbiome tables: {neon_amb_data_tables}"
@@ -88,14 +95,19 @@ class NeonBenthicDataTranslator(Translator):
88
95
  "neonEnvoTerms", self.conn, if_exists="replace", index=False
89
96
  )
90
97
 
91
- self.neon_raw_data_file_mappings_df = neon_raw_data_file_mappings_file
92
- self.neon_raw_data_file_mappings_df.to_sql(
93
- "neonRawDataFile", self.conn, if_exists="replace", index=False
94
- )
98
+ self.neon_raw_data_file_mappings_df = benthic_data["mms_benthicRawDataFiles"]
95
99
 
96
100
  self.site_code_mapping = site_code_mapping
101
+
97
102
  self.neon_nmdc_instrument_map_df = neon_nmdc_instrument_map_df
98
103
 
104
+ def _translate_manifest(self, manifest_id: str) -> nmdc.Manifest:
105
+ return nmdc.Manifest(
106
+ id=manifest_id,
107
+ manifest_category=nmdc.ManifestCategoryEnum.poolable_replicates,
108
+ type="nmdc:Manifest",
109
+ )
110
+
99
111
  def _translate_biosample(
100
112
  self, neon_id: str, nmdc_id: str, biosample_row: pd.DataFrame
101
113
  ) -> nmdc.Biosample:
@@ -313,7 +325,7 @@ class NeonBenthicDataTranslator(Translator):
313
325
  )
314
326
 
315
327
  def _translate_data_object(
316
- self, do_id: str, url: str, do_type: str, checksum: str
328
+ self, do_id: str, url: str, do_type: str, manifest_id: str
317
329
  ) -> nmdc.DataObject:
318
330
  """Create nmdc DataObject which is the output of a NucleotideSequencing process. This
319
331
  object mainly contains information about the sequencing file that was generated as
@@ -324,7 +336,6 @@ class NeonBenthicDataTranslator(Translator):
324
336
  :param url: URL of zipped FASTQ file on NEON file server. Retrieved from file provided
325
337
  by Hugh Cross at NEON.
326
338
  :param do_type: Indicate whether it is FASTQ for Read 1 or Read 2 (paired end sequencing).
327
- :param checksum: Checksum value for FASTQ in zip file, once again provided by Hugh Cross
328
339
  at NEON.
329
340
  :return: DataObject with all the sequencing file metadata.
330
341
  """
@@ -337,14 +348,14 @@ class NeonBenthicDataTranslator(Translator):
337
348
  url=url,
338
349
  description=f"sequencing results for {basename}",
339
350
  type="nmdc:DataObject",
340
- md5_checksum=checksum,
341
351
  data_object_type=do_type,
352
+ in_manifest=manifest_id,
342
353
  )
343
354
 
344
- def get_database(self):
355
+ def get_database(self) -> nmdc.Database:
345
356
  database = nmdc.Database()
346
357
 
347
- query = """
358
+ join_query = """
348
359
  SELECT
349
360
  merged.laboratoryName,
350
361
  merged.sequencingFacilityID,
@@ -372,202 +383,190 @@ class NeonBenthicDataTranslator(Translator):
372
383
  afp.siteID,
373
384
  afp.sampleID,
374
385
  afp.collectDate
375
- FROM
376
- (
377
- SELECT
378
- bs.collectDate,
379
- bs.laboratoryName,
380
- bs.sequencingFacilityID,
381
- bs.processedDate,
382
- bs.dnaSampleID,
383
- bs.dnaSampleCode,
384
- bs.internalLabID,
385
- bs.instrument_model,
386
- bs.sequencingMethod,
387
- bs.investigation_type,
388
- bs.qaqcStatus,
389
- bs.ncbiProjectID,
390
- bd.genomicsSampleID,
391
- bd.sequenceAnalysisType,
392
- bd.sampleMass,
393
- bd.nucleicAcidConcentration
394
- FROM
395
- mms_benthicMetagenomeSequencing AS bs
396
- JOIN
397
- mms_benthicMetagenomeDnaExtraction AS bd
398
- ON
399
- bs.dnaSampleID = bd.dnaSampleID
400
- ) AS merged
386
+ FROM (
387
+ SELECT
388
+ bs.collectDate,
389
+ bs.laboratoryName,
390
+ bs.sequencingFacilityID,
391
+ bs.processedDate,
392
+ bs.dnaSampleID,
393
+ bs.dnaSampleCode,
394
+ bs.internalLabID,
395
+ bs.instrument_model,
396
+ bs.sequencingMethod,
397
+ bs.investigation_type,
398
+ bs.qaqcStatus,
399
+ bs.ncbiProjectID,
400
+ bd.genomicsSampleID,
401
+ bd.sequenceAnalysisType,
402
+ bd.sampleMass,
403
+ bd.nucleicAcidConcentration
404
+ FROM mms_benthicMetagenomeSequencing AS bs
405
+ JOIN mms_benthicMetagenomeDnaExtraction AS bd
406
+ ON bs.dnaSampleID = bd.dnaSampleID
407
+ ) AS merged
401
408
  LEFT JOIN amb_fieldParent AS afp
402
- ON
403
- merged.genomicsSampleID = afp.geneticSampleID
409
+ ON merged.genomicsSampleID = afp.geneticSampleID
404
410
  """
405
- benthic_samples = pd.read_sql_query(query, self.conn)
411
+ benthic_samples = pd.read_sql_query(join_query, self.conn)
406
412
  benthic_samples.to_sql(
407
413
  "benthicSamples", self.conn, if_exists="replace", index=False
408
414
  )
409
415
 
410
- neon_biosample_ids = benthic_samples["sampleID"]
411
- nmdc_biosample_ids = self._id_minter("nmdc:Biosample", len(neon_biosample_ids))
412
- neon_to_nmdc_biosample_ids = dict(zip(neon_biosample_ids, nmdc_biosample_ids))
416
+ sample_ids = benthic_samples["sampleID"]
417
+ nmdc_biosample_ids = self._id_minter("nmdc:Biosample", len(sample_ids))
418
+ neon_to_nmdc_biosample_ids = dict(zip(sample_ids, nmdc_biosample_ids))
413
419
 
414
- neon_extraction_ids = benthic_samples["sampleID"]
415
- nmdc_extraction_ids = self._id_minter(
416
- "nmdc:Extraction", len(neon_extraction_ids)
417
- )
418
- neon_to_nmdc_extraction_ids = dict(
419
- zip(neon_extraction_ids, nmdc_extraction_ids)
420
- )
420
+ nmdc_extraction_ids = self._id_minter("nmdc:Extraction", len(sample_ids))
421
+ neon_to_nmdc_extraction_ids = dict(zip(sample_ids, nmdc_extraction_ids))
421
422
 
422
- neon_extraction_processed_ids = benthic_samples["sampleID"]
423
423
  nmdc_extraction_processed_ids = self._id_minter(
424
- "nmdc:ProcessedSample", len(neon_extraction_processed_ids)
424
+ "nmdc:ProcessedSample", len(sample_ids)
425
425
  )
426
426
  neon_to_nmdc_extraction_processed_ids = dict(
427
- zip(neon_extraction_processed_ids, nmdc_extraction_processed_ids)
427
+ zip(sample_ids, nmdc_extraction_processed_ids)
428
428
  )
429
429
 
430
- neon_lib_prep_ids = benthic_samples["sampleID"]
431
- nmdc_lib_prep_ids = self._id_minter(
432
- "nmdc:LibraryPreparation", len(neon_lib_prep_ids)
433
- )
434
- neon_to_nmdc_lib_prep_ids = dict(zip(neon_lib_prep_ids, nmdc_lib_prep_ids))
430
+ nmdc_libprep_ids = self._id_minter("nmdc:LibraryPreparation", len(sample_ids))
431
+ neon_to_nmdc_libprep_ids = dict(zip(sample_ids, nmdc_libprep_ids))
435
432
 
436
- neon_lib_prep_processed_ids = benthic_samples["sampleID"]
437
- nmdc_lib_prep_processed_ids = self._id_minter(
438
- "nmdc:ProcessedSample", len(neon_lib_prep_processed_ids)
433
+ nmdc_libprep_processed_ids = self._id_minter(
434
+ "nmdc:ProcessedSample", len(sample_ids)
439
435
  )
440
- neon_to_nmdc_lib_prep_processed_ids = dict(
441
- zip(neon_lib_prep_processed_ids, nmdc_lib_prep_processed_ids)
436
+ neon_to_nmdc_libprep_processed_ids = dict(
437
+ zip(sample_ids, nmdc_libprep_processed_ids)
442
438
  )
443
439
 
444
- neon_omprc_ids = benthic_samples["sampleID"]
445
- nmdc_omprc_ids = self._id_minter(
446
- "nmdc:NucleotideSequencing", len(neon_omprc_ids)
447
- )
448
- neon_to_nmdc_omprc_ids = dict(zip(neon_omprc_ids, nmdc_omprc_ids))
440
+ nmdc_ntseq_ids = self._id_minter("nmdc:NucleotideSequencing", len(sample_ids))
441
+ neon_to_nmdc_ntseq_ids = dict(zip(sample_ids, nmdc_ntseq_ids))
449
442
 
450
- neon_raw_data_file_mappings_df = self.neon_raw_data_file_mappings_df
451
- neon_raw_file_paths = neon_raw_data_file_mappings_df["rawDataFilePath"]
452
- nmdc_data_object_ids = self._id_minter(
453
- "nmdc:DataObject", len(neon_raw_file_paths)
454
- )
455
- neon_to_nmdc_data_object_ids = dict(
456
- zip(neon_raw_file_paths, nmdc_data_object_ids)
457
- )
443
+ raw_df = self.neon_raw_data_file_mappings_df
444
+ raw_file_paths = raw_df["rawDataFilePath"]
445
+ dataobject_ids = self._id_minter("nmdc:DataObject", len(raw_file_paths))
446
+ neon_to_nmdc_dataobject_ids = dict(zip(raw_file_paths, dataobject_ids))
458
447
 
459
- for neon_id, nmdc_id in neon_to_nmdc_biosample_ids.items():
460
- biosample_row = benthic_samples[benthic_samples["sampleID"] == neon_id]
448
+ for neon_id, biosample_id in neon_to_nmdc_biosample_ids.items():
449
+ row = benthic_samples[benthic_samples["sampleID"] == neon_id]
450
+ if row.empty:
451
+ continue
461
452
 
453
+ # Example of how you might call _translate_biosample:
462
454
  database.biosample_set.append(
463
- self._translate_biosample(neon_id, nmdc_id, biosample_row)
455
+ self._translate_biosample(neon_id, biosample_id, row)
464
456
  )
465
457
 
466
- for neon_id, nmdc_id in neon_to_nmdc_extraction_ids.items():
467
- extraction_row = benthic_samples[benthic_samples["sampleID"] == neon_id]
458
+ for neon_id, extraction_id in neon_to_nmdc_extraction_ids.items():
459
+ row = benthic_samples[benthic_samples["sampleID"] == neon_id]
460
+ if row.empty:
461
+ continue
468
462
 
469
- extraction_input = neon_to_nmdc_biosample_ids.get(neon_id)
470
- processed_sample_id = neon_to_nmdc_extraction_processed_ids.get(neon_id)
463
+ biosample_id = neon_to_nmdc_biosample_ids.get(neon_id)
464
+ extraction_ps_id = neon_to_nmdc_extraction_processed_ids.get(neon_id)
471
465
 
472
- if extraction_input is not None and processed_sample_id is not None:
466
+ if biosample_id and extraction_ps_id:
473
467
  database.material_processing_set.append(
474
468
  self._translate_extraction_process(
475
- nmdc_id,
476
- extraction_input,
477
- processed_sample_id,
478
- extraction_row,
469
+ extraction_id, biosample_id, extraction_ps_id, row
479
470
  )
480
471
  )
481
-
482
- genomics_sample_id = _get_value_or_none(
483
- extraction_row, "genomicsSampleID"
484
- )
485
-
472
+ genomics_sample_id = _get_value_or_none(row, "genomicsSampleID")
486
473
  database.processed_sample_set.append(
487
474
  self._translate_processed_sample(
488
- processed_sample_id,
475
+ extraction_ps_id,
489
476
  f"Extracted DNA from {genomics_sample_id}",
490
477
  )
491
478
  )
492
479
 
493
- query = """
480
+ query2 = """
494
481
  SELECT dnaSampleID, GROUP_CONCAT(rawDataFilePath, '|') AS rawDataFilePaths
495
- FROM neonRawDataFile
482
+ FROM mms_benthicRawDataFiles
496
483
  GROUP BY dnaSampleID
497
484
  """
498
- neon_raw_data_files = pd.read_sql_query(query, self.conn)
499
- neon_raw_data_files_dict = (
500
- neon_raw_data_files.set_index("dnaSampleID")["rawDataFilePaths"]
485
+ raw_data_files_df = pd.read_sql_query(query2, self.conn)
486
+ dna_files_dict = (
487
+ raw_data_files_df.set_index("dnaSampleID")["rawDataFilePaths"]
501
488
  .str.split("|")
502
489
  .to_dict()
503
490
  )
504
- filtered_neon_raw_data_files_dict = {
505
- key: value
506
- for key, value in neon_raw_data_files_dict.items()
507
- if len(value) <= 2
508
- }
509
491
 
510
- for neon_id, nmdc_id in neon_to_nmdc_lib_prep_ids.items():
511
- lib_prep_row = benthic_samples[benthic_samples["sampleID"] == neon_id]
492
+ dna_sample_to_manifest_id: dict[str, str] = {}
512
493
 
513
- lib_prep_input = neon_to_nmdc_extraction_processed_ids.get(neon_id)
514
- processed_sample_id = neon_to_nmdc_lib_prep_processed_ids.get(neon_id)
494
+ for neon_id, libprep_id in neon_to_nmdc_libprep_ids.items():
495
+ row = benthic_samples[benthic_samples["sampleID"] == neon_id]
496
+ if row.empty:
497
+ continue
515
498
 
516
- if lib_prep_input is not None and processed_sample_id is not None:
517
- database.material_processing_set.append(
518
- self._translate_library_preparation(
519
- nmdc_id,
520
- lib_prep_input,
521
- processed_sample_id,
522
- lib_prep_row,
523
- )
499
+ extr_ps_id = neon_to_nmdc_extraction_processed_ids.get(neon_id)
500
+ libprep_ps_id = neon_to_nmdc_libprep_processed_ids.get(neon_id)
501
+ if not extr_ps_id or not libprep_ps_id:
502
+ continue
503
+
504
+ database.material_processing_set.append(
505
+ self._translate_library_preparation(
506
+ libprep_id, extr_ps_id, libprep_ps_id, row
524
507
  )
508
+ )
525
509
 
526
- dna_sample_id = _get_value_or_none(lib_prep_row, "dnaSampleID")
510
+ dna_sample_id = _get_value_or_none(row, "dnaSampleID")
511
+ database.processed_sample_set.append(
512
+ self._translate_processed_sample(
513
+ libprep_ps_id,
514
+ f"Library preparation for {dna_sample_id}",
515
+ )
516
+ )
527
517
 
528
- database.processed_sample_set.append(
529
- self._translate_processed_sample(
530
- processed_sample_id,
531
- f"Library preparation for {dna_sample_id}",
518
+ filepaths_for_dna: list[str] = dna_files_dict.get(dna_sample_id, [])
519
+ if not filepaths_for_dna:
520
+ # no raw files => skip
521
+ ntseq_id = neon_to_nmdc_ntseq_ids.get(neon_id)
522
+ if ntseq_id:
523
+ continue
524
+ continue
525
+
526
+ # If multiple => we create a Manifest
527
+ manifest_id: Optional[str] = None
528
+ if len(filepaths_for_dna) > 2:
529
+ if dna_sample_id not in dna_sample_to_manifest_id:
530
+ new_man_id = self._id_minter("nmdc:Manifest", 1)[0]
531
+ dna_sample_to_manifest_id[dna_sample_id] = new_man_id
532
+ database.manifest_set.append(self._translate_manifest(new_man_id))
533
+ manifest_id = dna_sample_to_manifest_id[dna_sample_id]
534
+
535
+ has_input_value = self.samp_procsm_dict.get(neon_id)
536
+ if not has_input_value:
537
+ continue
538
+
539
+ dataobject_ids_for_run: list[str] = []
540
+ for fp in filepaths_for_dna:
541
+ if fp not in neon_to_nmdc_dataobject_ids:
542
+ continue
543
+ do_id = neon_to_nmdc_dataobject_ids[fp]
544
+
545
+ do_type = None
546
+ if "_R1.fastq.gz" in fp:
547
+ do_type = "Metagenome Raw Read 1"
548
+ elif "_R2.fastq.gz" in fp:
549
+ do_type = "Metagenome Raw Read 2"
550
+
551
+ database.data_object_set.append(
552
+ self._translate_data_object(
553
+ do_id=do_id,
554
+ url=fp,
555
+ do_type=do_type,
556
+ manifest_id=manifest_id,
532
557
  )
533
558
  )
534
-
535
- has_output = None
536
- has_output_do_ids = []
537
-
538
- if dna_sample_id in filtered_neon_raw_data_files_dict:
539
- has_output = filtered_neon_raw_data_files_dict[dna_sample_id]
540
- for item in has_output:
541
- if item in neon_to_nmdc_data_object_ids:
542
- has_output_do_ids.append(neon_to_nmdc_data_object_ids[item])
543
-
544
- checksum = None
545
- do_type = None
546
-
547
- checksum = neon_raw_data_file_mappings_df[
548
- neon_raw_data_file_mappings_df["rawDataFilePath"] == item
549
- ]["checkSum"].values[0]
550
- if "_R1.fastq.gz" in item:
551
- do_type = "Metagenome Raw Read 1"
552
- elif "_R2.fastq.gz" in item:
553
- do_type = "Metagenome Raw Read 2"
554
-
555
- database.data_object_set.append(
556
- self._translate_data_object(
557
- neon_to_nmdc_data_object_ids.get(item),
558
- item,
559
- do_type,
560
- checksum,
561
- )
562
- )
563
-
564
- database.data_generation_set.append(
565
- self._translate_nucleotide_sequencing(
566
- neon_to_nmdc_omprc_ids.get(neon_id),
567
- processed_sample_id,
568
- has_output_do_ids,
569
- lib_prep_row,
570
- )
559
+ dataobject_ids_for_run.append(do_id)
560
+
561
+ ntseq_id = neon_to_nmdc_ntseq_ids.get(neon_id)
562
+ if ntseq_id:
563
+ database.data_generation_set.append(
564
+ self._translate_nucleotide_sequencing(
565
+ ntseq_id,
566
+ has_input_value, # <--- from self.samp_procsm_dict
567
+ dataobject_ids_for_run,
568
+ row,
571
569
  )
570
+ )
572
571
 
573
572
  return database
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: nmdc_runtime
3
- Version: 2.4.0
3
+ Version: 2.5.0
4
4
  Summary: A runtime system for NMDC data management and orchestration
5
5
  Home-page: https://github.com/microbiomedata/nmdc-runtime
6
6
  Author: Donny Winston
@@ -17,6 +17,7 @@ Dynamic: classifier
17
17
  Dynamic: description
18
18
  Dynamic: description-content-type
19
19
  Dynamic: home-page
20
+ Dynamic: license-file
20
21
  Dynamic: requires-python
21
22
  Dynamic: summary
22
23
 
@@ -37,8 +38,7 @@ houses the LinkML schema specification, as well as generated artifacts (e.g. JSO
37
38
  * [nmdc-server](https://github.com/microbiomedata/nmdc-server)
38
39
  houses code specific to the data portal -- its database, back-end API, and front-end application.
39
40
 
40
- * [workflow_documentation](https://docs.microbiomedata.org/workflows/)
41
- references workflow code spread across several repositories, that take source data and produce computed data.
41
+ * Workflows — documented in the [workflows](https://docs.microbiomedata.org/workflows/) section of the NMDC documentation website — take source data and produce computed data.
42
42
 
43
43
  * This repo (nmdc-runtime)
44
44
  * houses code that takes source data and computed data, and transforms it
@@ -156,6 +156,9 @@ Tests can be found in `tests` and are run with the following commands:
156
156
  ```bash
157
157
  make up-test
158
158
  make test
159
+
160
+ # Run a Specific test file eg. tests/test_api/test_endpoints.py
161
+ make test ARGS="tests/test_api/test_endpoints.py"
159
162
  ```
160
163
 
161
164
  As you create Dagster solids and pipelines, add tests in `tests/` to check that your code behaves as
@@ -164,6 +167,16 @@ desired and does not break over time.
164
167
  [For hints on how to write tests for solids and pipelines in Dagster, see their documentation
165
168
  tutorial on Testing](https://docs.dagster.io/tutorial/testable).
166
169
 
170
+ ### RAM usage
171
+
172
+ The `dagster-daemon` and `dagster-dagit` containers can consume a lot of RAM. If tests are failing and the console of
173
+ the `test` container shows "Error 137," here is something you can try as a workaround: In Docker Desktop, go to
174
+ "Settings > Resources > Advanced," and increase the memory limit. One of our team members has
175
+ found **12 GB** to be sufficient for running the tests.
176
+
177
+ > Dedicating 12 GB of RAM to Docker may be prohibitive for some prospective developers.
178
+ > There is an open [issue](https://github.com/microbiomedata/nmdc-runtime/issues/928) about the memory requirement.
179
+
167
180
  ## Publish to PyPI
168
181
 
169
182
  This repository contains a GitHub Actions workflow that publishes a Python package to [PyPI](https://pypi.org/project/nmdc-runtime/).
@@ -37,8 +37,8 @@ nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
37
37
  nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=JC4thvzfFwRc1mhWQ-kHy3yvs0SYxF6ktE7LXNCwqlI,4031
38
38
  nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
39
  nmdc_runtime/site/graphs.py,sha256=SA4Ibb1TenVgiYD4wNrMiOCwkMKrv6BTcQ8mbG4VXPs,15666
40
- nmdc_runtime/site/ops.py,sha256=p4F5SrDbFdKOrAHu1TUhWQA33QB7hdoQmCCuU-00Eqo,46445
41
- nmdc_runtime/site/repository.py,sha256=pfx7WAVgdNaPhtfF2pak-tllqPMf4-yUeOXSpr4hu30,43861
40
+ nmdc_runtime/site/ops.py,sha256=tg-zRlVSUSJ7B0cJbBsUwmMRmpIUmK5tsL8ABnY0wnY,46626
41
+ nmdc_runtime/site/repository.py,sha256=kVCoIMF2rgAMUAf9a6jk0WbejFpmWgxh6nN4U37Mgc8,43919
42
42
  nmdc_runtime/site/resources.py,sha256=sqtRWb4ewU61U-JZTphsC4wBvYT5B0wj33WU70vjq_k,19677
43
43
  nmdc_runtime/site/util.py,sha256=-DnNv15r6XmFZrDG3d3Z-bFn2pFo0xpyyudhKlYPJKc,1559
44
44
  nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -57,13 +57,13 @@ nmdc_runtime/site/export/study_metadata.py,sha256=yR5pXL6JG8d7cAtqcF-60Hp7bLD3dJ
57
57
  nmdc_runtime/site/normalization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
58
  nmdc_runtime/site/normalization/gold.py,sha256=iISDD4qs4d6uLhv631WYNeQVOzY5DO201ZpPtxHdkVk,1311
59
59
  nmdc_runtime/site/repair/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
- nmdc_runtime/site/repair/database_updater.py,sha256=EMuY8MfwQEfdejJHp0Y-Gb1eb1zOgKgfJxbtm6wM3YU,10943
60
+ nmdc_runtime/site/repair/database_updater.py,sha256=eTNAPtgAc_xQodADBfgomwow9-14j5rBqQWF8R7BheY,11525
61
61
  nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
62
  nmdc_runtime/site/translation/emsl.py,sha256=-aCTJTSCNaK-Koh8BE_4fTf5nyxP1KkquR6lloLEJl0,1245
63
63
  nmdc_runtime/site/translation/gold.py,sha256=R3W99sdQb7Pgu_esN7ruIC-tyREQD_idJ4xCzkqWuGw,1622
64
64
  nmdc_runtime/site/translation/gold_translator.py,sha256=Zw4IjxMWJ1wdLyxX44-faq-Nfmr42-Na1gw-Xn4tg6I,32272
65
65
  nmdc_runtime/site/translation/jgi.py,sha256=qk878KhIw674TkrVfbl2x1QJrKi3zlvE0vesIpe9slM,876
66
- nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=QIDqYLuf-NlGY9_88gy_5qTswkei3OfgJ5AOFpEXzJo,23985
66
+ nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=VxN7yCziQE-ZP9mtrzqI-yaS9taEgTy0EnIEattYeKo,23727
67
67
  nmdc_runtime/site/translation/neon_soil_translator.py,sha256=Rol0g67nVBGSBySUzpfdW4Fwes7bKtvnlv2g5cB0aTI,38550
68
68
  nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=k06eULMTYx0sQ00UlyeNJvCJMcX-neClnES1G6zpPKg,30517
69
69
  nmdc_runtime/site/translation/neon_utils.py,sha256=d00o7duKKugpLHmsEifNbp4WjeC4GOqcgw0b5qlCg4I,5549
@@ -75,9 +75,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=OG20mv_3E2rkQqTQtYO0_SVRqFb-Z_zKCiAV
75
75
  nmdc_runtime/site/validation/gold.py,sha256=Z5ZzYdjERbrJ2Tu06d0TDTBSfwaFdL1Z23Rl-YkZ2Ow,803
76
76
  nmdc_runtime/site/validation/jgi.py,sha256=LdJfhqBVHWCDp0Kzyk8eJZMwEI5NQ-zuTda31BcGwOA,1299
77
77
  nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
78
- nmdc_runtime-2.4.0.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
79
- nmdc_runtime-2.4.0.dist-info/METADATA,sha256=CeZZbucd3jrD0ZqGdreH2x7ALrM9pt4ksGV2olkkpPI,7401
80
- nmdc_runtime-2.4.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
81
- nmdc_runtime-2.4.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
82
- nmdc_runtime-2.4.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
83
- nmdc_runtime-2.4.0.dist-info/RECORD,,
78
+ nmdc_runtime-2.5.0.dist-info/licenses/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
79
+ nmdc_runtime-2.5.0.dist-info/METADATA,sha256=tli66QKJC-48TzLXbI9iHMzTLyugbRBKj9CJEeKHXLY,8139
80
+ nmdc_runtime-2.5.0.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
81
+ nmdc_runtime-2.5.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
82
+ nmdc_runtime-2.5.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
83
+ nmdc_runtime-2.5.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (78.0.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5