nmdc-runtime 1.3.1__py3-none-any.whl → 2.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. nmdc_runtime/Dockerfile +177 -0
  2. nmdc_runtime/api/analytics.py +90 -0
  3. nmdc_runtime/api/boot/capabilities.py +9 -0
  4. nmdc_runtime/api/boot/object_types.py +126 -0
  5. nmdc_runtime/api/boot/triggers.py +84 -0
  6. nmdc_runtime/api/boot/workflows.py +116 -0
  7. nmdc_runtime/api/core/auth.py +212 -0
  8. nmdc_runtime/api/core/idgen.py +200 -0
  9. nmdc_runtime/api/core/metadata.py +777 -0
  10. nmdc_runtime/api/core/util.py +114 -0
  11. nmdc_runtime/api/db/mongo.py +436 -0
  12. nmdc_runtime/api/db/s3.py +37 -0
  13. nmdc_runtime/api/endpoints/capabilities.py +25 -0
  14. nmdc_runtime/api/endpoints/find.py +634 -0
  15. nmdc_runtime/api/endpoints/jobs.py +206 -0
  16. nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
  17. nmdc_runtime/api/endpoints/lib/linked_instances.py +193 -0
  18. nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
  19. nmdc_runtime/api/endpoints/metadata.py +260 -0
  20. nmdc_runtime/api/endpoints/nmdcschema.py +515 -0
  21. nmdc_runtime/api/endpoints/object_types.py +38 -0
  22. nmdc_runtime/api/endpoints/objects.py +277 -0
  23. nmdc_runtime/api/endpoints/operations.py +78 -0
  24. nmdc_runtime/api/endpoints/queries.py +701 -0
  25. nmdc_runtime/api/endpoints/runs.py +98 -0
  26. nmdc_runtime/api/endpoints/search.py +38 -0
  27. nmdc_runtime/api/endpoints/sites.py +205 -0
  28. nmdc_runtime/api/endpoints/triggers.py +25 -0
  29. nmdc_runtime/api/endpoints/users.py +214 -0
  30. nmdc_runtime/api/endpoints/util.py +817 -0
  31. nmdc_runtime/api/endpoints/wf_file_staging.py +307 -0
  32. nmdc_runtime/api/endpoints/workflows.py +353 -0
  33. nmdc_runtime/api/entrypoint.sh +7 -0
  34. nmdc_runtime/api/main.py +495 -0
  35. nmdc_runtime/api/middleware.py +43 -0
  36. nmdc_runtime/api/models/capability.py +14 -0
  37. nmdc_runtime/api/models/id.py +92 -0
  38. nmdc_runtime/api/models/job.py +57 -0
  39. nmdc_runtime/api/models/lib/helpers.py +78 -0
  40. nmdc_runtime/api/models/metadata.py +11 -0
  41. nmdc_runtime/api/models/nmdc_schema.py +146 -0
  42. nmdc_runtime/api/models/object.py +180 -0
  43. nmdc_runtime/api/models/object_type.py +20 -0
  44. nmdc_runtime/api/models/operation.py +66 -0
  45. nmdc_runtime/api/models/query.py +246 -0
  46. nmdc_runtime/api/models/query_continuation.py +111 -0
  47. nmdc_runtime/api/models/run.py +161 -0
  48. nmdc_runtime/api/models/site.py +87 -0
  49. nmdc_runtime/api/models/trigger.py +13 -0
  50. nmdc_runtime/api/models/user.py +207 -0
  51. nmdc_runtime/api/models/util.py +260 -0
  52. nmdc_runtime/api/models/wfe_file_stages.py +122 -0
  53. nmdc_runtime/api/models/workflow.py +15 -0
  54. nmdc_runtime/api/openapi.py +178 -0
  55. nmdc_runtime/api/swagger_ui/assets/EllipsesButton.js +146 -0
  56. nmdc_runtime/api/swagger_ui/assets/EndpointSearchWidget.js +369 -0
  57. nmdc_runtime/api/swagger_ui/assets/script.js +252 -0
  58. nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
  59. nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
  60. nmdc_runtime/config.py +56 -0
  61. nmdc_runtime/minter/adapters/repository.py +22 -2
  62. nmdc_runtime/minter/config.py +30 -4
  63. nmdc_runtime/minter/domain/model.py +55 -1
  64. nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
  65. nmdc_runtime/mongo_util.py +89 -0
  66. nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
  67. nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
  68. nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
  69. nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
  70. nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
  71. nmdc_runtime/site/dagster.yaml +53 -0
  72. nmdc_runtime/site/entrypoint-daemon.sh +29 -0
  73. nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
  74. nmdc_runtime/site/entrypoint-dagit.sh +29 -0
  75. nmdc_runtime/site/export/ncbi_xml.py +1331 -0
  76. nmdc_runtime/site/export/ncbi_xml_utils.py +405 -0
  77. nmdc_runtime/site/export/study_metadata.py +27 -4
  78. nmdc_runtime/site/graphs.py +294 -45
  79. nmdc_runtime/site/ops.py +1008 -230
  80. nmdc_runtime/site/repair/database_updater.py +451 -0
  81. nmdc_runtime/site/repository.py +368 -133
  82. nmdc_runtime/site/resources.py +154 -80
  83. nmdc_runtime/site/translation/gold_translator.py +235 -83
  84. nmdc_runtime/site/translation/neon_benthic_translator.py +212 -188
  85. nmdc_runtime/site/translation/neon_soil_translator.py +82 -58
  86. nmdc_runtime/site/translation/neon_surface_water_translator.py +698 -0
  87. nmdc_runtime/site/translation/neon_utils.py +24 -7
  88. nmdc_runtime/site/translation/submission_portal_translator.py +616 -162
  89. nmdc_runtime/site/translation/translator.py +73 -3
  90. nmdc_runtime/site/util.py +26 -7
  91. nmdc_runtime/site/validation/emsl.py +1 -0
  92. nmdc_runtime/site/validation/gold.py +1 -0
  93. nmdc_runtime/site/validation/util.py +16 -12
  94. nmdc_runtime/site/workspace.yaml +13 -0
  95. nmdc_runtime/static/NMDC_logo.svg +1073 -0
  96. nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
  97. nmdc_runtime/static/README.md +5 -0
  98. nmdc_runtime/static/favicon.ico +0 -0
  99. nmdc_runtime/util.py +236 -192
  100. nmdc_runtime-2.12.0.dist-info/METADATA +45 -0
  101. nmdc_runtime-2.12.0.dist-info/RECORD +131 -0
  102. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/WHEEL +1 -2
  103. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info}/entry_points.txt +0 -1
  104. nmdc_runtime/containers.py +0 -14
  105. nmdc_runtime/core/db/Database.py +0 -15
  106. nmdc_runtime/core/exceptions/__init__.py +0 -23
  107. nmdc_runtime/core/exceptions/base.py +0 -47
  108. nmdc_runtime/core/exceptions/token.py +0 -13
  109. nmdc_runtime/domain/users/queriesInterface.py +0 -18
  110. nmdc_runtime/domain/users/userSchema.py +0 -37
  111. nmdc_runtime/domain/users/userService.py +0 -14
  112. nmdc_runtime/infrastructure/database/db.py +0 -3
  113. nmdc_runtime/infrastructure/database/models/user.py +0 -10
  114. nmdc_runtime/lib/__init__.py +0 -1
  115. nmdc_runtime/lib/extract_nmdc_data.py +0 -41
  116. nmdc_runtime/lib/load_nmdc_data.py +0 -121
  117. nmdc_runtime/lib/nmdc_dataframes.py +0 -829
  118. nmdc_runtime/lib/nmdc_etl_class.py +0 -402
  119. nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
  120. nmdc_runtime/site/drsobjects/ingest.py +0 -93
  121. nmdc_runtime/site/drsobjects/registration.py +0 -131
  122. nmdc_runtime/site/terminusdb/generate.py +0 -198
  123. nmdc_runtime/site/terminusdb/ingest.py +0 -44
  124. nmdc_runtime/site/terminusdb/schema.py +0 -1671
  125. nmdc_runtime/site/translation/emsl.py +0 -42
  126. nmdc_runtime/site/translation/gold.py +0 -53
  127. nmdc_runtime/site/translation/jgi.py +0 -31
  128. nmdc_runtime/site/translation/util.py +0 -132
  129. nmdc_runtime/site/validation/jgi.py +0 -42
  130. nmdc_runtime-1.3.1.dist-info/METADATA +0 -181
  131. nmdc_runtime-1.3.1.dist-info/RECORD +0 -81
  132. nmdc_runtime-1.3.1.dist-info/top_level.txt +0 -1
  133. /nmdc_runtime/{client → api}/__init__.py +0 -0
  134. /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
  135. /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
  136. /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
  137. /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
  138. /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
  139. /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
  140. /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
  141. /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
  142. /nmdc_runtime/site/{terminusdb → repair}/__init__.py +0 -0
  143. {nmdc_runtime-1.3.1.dist-info → nmdc_runtime-2.12.0.dist-info/licenses}/LICENSE +0 -0
@@ -1,6 +1,6 @@
1
1
  import re
2
2
  import sqlite3
3
- from typing import List
3
+ from typing import List, Union
4
4
 
5
5
  import pandas as pd
6
6
 
@@ -10,7 +10,6 @@ from nmdc_runtime.site.util import get_basename
10
10
  from nmdc_runtime.site.translation.neon_utils import (
11
11
  _get_value_or_none,
12
12
  _create_controlled_identified_term_value,
13
- _create_controlled_term_value,
14
13
  _create_geolocation_value,
15
14
  _create_quantity_value,
16
15
  _create_timestamp_value,
@@ -26,6 +25,7 @@ class NeonSoilDataTranslator(Translator):
26
25
  sls_data: dict,
27
26
  neon_envo_mappings_file: pd.DataFrame,
28
27
  neon_raw_data_file_mappings_file: pd.DataFrame,
28
+ neon_nmdc_instrument_map_df: pd.DataFrame = pd.DataFrame(),
29
29
  *args,
30
30
  **kwargs,
31
31
  ) -> None:
@@ -99,6 +99,23 @@ class NeonSoilDataTranslator(Translator):
99
99
  "neonRawDataFile", self.conn, if_exists="replace", index=False
100
100
  )
101
101
 
102
+ self.neon_nmdc_instrument_map_df = neon_nmdc_instrument_map_df
103
+
104
+ def _get_instrument_id(self, instrument_model: Union[str | None]) -> str:
105
+ if not instrument_model:
106
+ raise ValueError(
107
+ f"instrument_model '{instrument_model}' could not be found in the NEON-NMDC instrument mapping TSV file."
108
+ )
109
+
110
+ df = self.neon_nmdc_instrument_map_df
111
+ matching_row = df[
112
+ df["NEON sequencingMethod"].str.contains(instrument_model, case=False)
113
+ ]
114
+
115
+ if not matching_row.empty:
116
+ nmdc_instrument_id = matching_row["NMDC instrument_set id"].values[0]
117
+ return nmdc_instrument_id
118
+
102
119
  def _translate_biosample(
103
120
  self, neon_id: str, nmdc_id: str, biosample_row: pd.DataFrame
104
121
  ) -> nmdc.Biosample:
@@ -116,7 +133,6 @@ class NeonSoilDataTranslator(Translator):
116
133
  """
117
134
  return nmdc.Biosample(
118
135
  id=nmdc_id,
119
- part_of="nmdc:sty-11-34xj1150",
120
136
  env_broad_scale=_create_controlled_identified_term_value(
121
137
  "ENVO:00000446", "terrestrial biome"
122
138
  ),
@@ -136,7 +152,7 @@ class NeonSoilDataTranslator(Translator):
136
152
  collection_date=_create_timestamp_value(
137
153
  biosample_row["collectDate"].values[0]
138
154
  ),
139
- temp=_create_quantity_value(biosample_row["soilTemp"].values[0], "Celsius"),
155
+ temp=_create_quantity_value(biosample_row["soilTemp"].values[0], "Cel"),
140
156
  depth=nmdc.QuantityValue(
141
157
  has_minimum_numeric_value=_get_value_or_none(
142
158
  biosample_row, "sampleTopDepth"
@@ -145,26 +161,27 @@ class NeonSoilDataTranslator(Translator):
145
161
  biosample_row, "sampleBottomDepth"
146
162
  ),
147
163
  has_unit="m",
164
+ type="nmdc:QuantityValue",
148
165
  ),
149
166
  samp_collec_device=_get_value_or_none(biosample_row, "soilSamplingDevice"),
150
167
  soil_horizon=_get_value_or_none(biosample_row, "horizon"),
151
168
  analysis_type=_get_value_or_none(biosample_row, "sequenceAnalysisType"),
152
169
  env_package=_create_text_value(biosample_row["sampleType"].values[0]),
153
170
  nitro=_create_quantity_value(
154
- biosample_row["nitrogenPercent"].values[0], "percent"
171
+ biosample_row["nitrogenPercent"].values[0], "%"
155
172
  ),
156
173
  org_carb=_create_quantity_value(
157
- biosample_row["organicCPercent"].values[0], "percent"
174
+ biosample_row["organicCPercent"].values[0], "%"
158
175
  ),
159
176
  carb_nitro_ratio=_create_quantity_value(
160
- biosample_row["CNratio"].values[0], None
177
+ biosample_row["CNratio"].values[0], "ratio"
161
178
  ),
162
179
  ph=_create_double_value(biosample_row["soilInWaterpH"].values[0]),
163
- water_content=[
164
- f"{biosample_row['soilMoisture'].values[0]} g of water/g of dry soil"
165
- ]
166
- if not biosample_row["soilMoisture"].isna().any()
167
- else None,
180
+ water_content=(
181
+ [f"{biosample_row['soilMoisture'].values[0]} g of water/g of dry soil"]
182
+ if not biosample_row["soilMoisture"].isna().any()
183
+ else None
184
+ ),
168
185
  ammonium_nitrogen=_create_quantity_value(
169
186
  biosample_row["kclAmmoniumNConc"].values[0], "mg/L"
170
187
  ),
@@ -172,6 +189,7 @@ class NeonSoilDataTranslator(Translator):
172
189
  biosample_row["kclNitrateNitriteNConc"].values[0], "mg/L"
173
190
  ),
174
191
  type="nmdc:Biosample",
192
+ associated_studies=["nmdc:sty-11-34xj1150"],
175
193
  )
176
194
 
177
195
  def _translate_pooling_process(
@@ -198,6 +216,7 @@ class NeonSoilDataTranslator(Translator):
198
216
  has_input=bsm_input_values_list,
199
217
  start_date=_get_value_or_none(pooling_row, "startDate"),
200
218
  end_date=_get_value_or_none(pooling_row, "collectDate"),
219
+ type="nmdc:Pooling",
201
220
  )
202
221
 
203
222
  def _translate_processed_sample(
@@ -214,12 +233,14 @@ class NeonSoilDataTranslator(Translator):
214
233
  :param sample_id: Value from `genomicsSampleID` or `dnaSampleID` column.
215
234
  :return: ProcessedSample objects to be stored in `processed_sample_set`.
216
235
  """
217
- return nmdc.ProcessedSample(id=processed_sample_id, name=sample_id)
236
+ return nmdc.ProcessedSample(
237
+ id=processed_sample_id, name=sample_id, type="nmdc:ProcessedSample"
238
+ )
218
239
 
219
240
  def _translate_data_object(
220
241
  self, do_id: str, url: str, do_type: str, checksum: str
221
242
  ) -> nmdc.DataObject:
222
- """Create nmdc DataObject which is the output of an OmicsProcessing process. This
243
+ """Create nmdc DataObject which is the output of a NucleotideSequencing process. This
223
244
  object mainly contains information about the sequencing file that was generated as
224
245
  the result of running a Bioinformatics workflow on a certain ProcessedSample, which
225
246
  is the result of a LibraryPreparation process.
@@ -242,6 +263,7 @@ class NeonSoilDataTranslator(Translator):
242
263
  description=f"sequencing results for {basename}",
243
264
  type="nmdc:DataObject",
244
265
  md5_checksum=checksum,
266
+ data_category=nmdc.DataCategoryEnum.instrument_data.text,
245
267
  data_object_type=do_type,
246
268
  )
247
269
 
@@ -280,10 +302,9 @@ class NeonSoilDataTranslator(Translator):
280
302
  input_mass=_create_quantity_value(
281
303
  _get_value_or_none(extraction_row, "sampleMass"), "g"
282
304
  ),
283
- quality_control_report=nmdc.QualityControlReport(
284
- status=_get_value_or_none(extraction_row, "qaqcStatus")
285
- ),
305
+ qc_status=_get_value_or_none(extraction_row, "qaqcStatus"),
286
306
  processing_institution=processing_institution,
307
+ type="nmdc:Extraction",
287
308
  )
288
309
 
289
310
  def _translate_library_preparation(
@@ -296,13 +317,13 @@ class NeonSoilDataTranslator(Translator):
296
317
  """
297
318
  Create LibraryPreparation process object. The input to LibraryPreparation process
298
319
  is the output ProcessedSample from an Extraction process. The output of LibraryPreparation
299
- process is fed as input to an OmicsProcessing object.
320
+ process is fed as input to an NucleotideSequencing object.
300
321
 
301
322
  :param library_preparation_id: Minted id for LibraryPreparation process.
302
323
  :param library_preparation_input: Input to LibraryPreparation process is output from
303
324
  Extraction process.
304
325
  :param processed_sample_id: Minted ProcessedSample id which is output of LibraryPreparation
305
- is also input to OmicsProcessing.
326
+ is also input to NucleotideSequencing.
306
327
  :param library_preparation_row: Metadata required to populate LibraryPreparation.
307
328
  :return: Object that using LibraryPreparation process model.
308
329
  """
@@ -321,31 +342,32 @@ class NeonSoilDataTranslator(Translator):
321
342
  start_date=_get_value_or_none(library_preparation_row, "collectDate"),
322
343
  end_date=_get_value_or_none(library_preparation_row, "processedDate"),
323
344
  processing_institution=processing_institution,
345
+ type="nmdc:LibraryPreparation",
324
346
  )
325
347
 
326
- def _translate_omics_processing(
348
+ def _translate_nucleotide_sequencing(
327
349
  self,
328
- omics_processing_id: str,
350
+ nucleotide_sequencing_id: str,
329
351
  processed_sample_id: str,
330
352
  raw_data_file_data: str,
331
- omics_processing_row: pd.DataFrame,
332
- ) -> nmdc.OmicsProcessing:
333
- """Create nmdc OmicsProcessing object. This class typically models the run of a
334
- Bioinformatics workflow on sequence data from a biosample. The input to an OmicsProcessing
335
- process is the output from a LibraryPreparation process, and the output of OmicsProcessing
353
+ nucleotide_sequencing_row: pd.DataFrame,
354
+ ):
355
+ """Create nmdc NucleotideSequencing object. This class typically models the run of a
356
+ Bioinformatics workflow on sequence data from a biosample. The input to an NucleotideSequencing
357
+ process is the output from a LibraryPreparation process, and the output of NucleotideSequencing
336
358
  is a DataObject which has the FASTQ sequence file URLs embedded in them.
337
359
 
338
- :param omics_processing_id: Minted id for an OmicsProcessing process.
360
+ :param nucleotide_sequencing_id: Minted id for an NucleotideSequencing process.
339
361
  :param processed_sample_id: ProcessedSample that is the output of LibraryPreparation.
340
362
  :param raw_data_file_data: R1/R2 DataObjects which have links to workflow processed output
341
363
  files embedded in them.
342
- :param omics_processing_row: DataFrame with metadata for an OmicsProcessing workflow
364
+ :param nucleotide_sequencing_row: DataFrame with metadata for an NucleotideSequencing workflow
343
365
  process/run.
344
- :return: OmicsProcessing object that models a Bioinformatics workflow process/run.
366
+ :return: NucleotideSequencing object that models a Bioinformatics workflow process/run.
345
367
  """
346
368
  processing_institution = None
347
369
  sequencing_facility = _get_value_or_none(
348
- omics_processing_row, "sequencingFacilityID"
370
+ nucleotide_sequencing_row, "sequencingFacilityID"
349
371
  )
350
372
  if sequencing_facility is not None:
351
373
  if re.search("Battelle", sequencing_facility, re.IGNORECASE):
@@ -353,19 +375,21 @@ class NeonSoilDataTranslator(Translator):
353
375
  elif re.search("Argonne", sequencing_facility, re.IGNORECASE):
354
376
  processing_institution = "ANL"
355
377
 
356
- return nmdc.OmicsProcessing(
357
- id=omics_processing_id,
378
+ return nmdc.NucleotideSequencing(
379
+ id=nucleotide_sequencing_id,
358
380
  has_input=processed_sample_id,
359
381
  has_output=raw_data_file_data,
360
382
  processing_institution=processing_institution,
361
- ncbi_project_name=_get_value_or_none(omics_processing_row, "ncbiProjectID"),
362
- omics_type=_create_controlled_term_value(
363
- omics_processing_row["investigation_type"].values[0]
383
+ ncbi_project_name=_get_value_or_none(
384
+ nucleotide_sequencing_row, "ncbiProjectID"
364
385
  ),
365
- instrument_name=f"{_get_value_or_none(omics_processing_row, 'sequencingMethod')} {_get_value_or_none(omics_processing_row, 'instrument_model')}",
366
- part_of="nmdc:sty-11-34xj1150",
367
- name=f"Terrestrial soil microbial communities - {_get_value_or_none(omics_processing_row, 'dnaSampleID')}",
368
- type="nmdc:OmicsProcessing",
386
+ instrument_used=self._get_instrument_id(
387
+ _get_value_or_none(nucleotide_sequencing_row, "instrument_model")
388
+ ),
389
+ name=f"Terrestrial soil microbial communities - {_get_value_or_none(nucleotide_sequencing_row, 'dnaSampleID')}",
390
+ type="nmdc:NucleotideSequencing",
391
+ associated_studies=["nmdc:sty-11-34xj1150"],
392
+ analyte_category="metagenome",
369
393
  )
370
394
 
371
395
  def get_database(self) -> nmdc.Database:
@@ -373,10 +397,9 @@ class NeonSoilDataTranslator(Translator):
373
397
  nmdc object creation methods as well as the nmdc type (QuantityValue, GeolocationValue, etc.)
374
398
  creation methods, to make an nmdc Database object. It populates multiple sets in the Mongo database -
375
399
  * `biosample_set`: uses `_translate_biosample()`
376
- * `pooling_set`: uses `_translate_pooling_process()`
377
- * `extraction_set`: uses `_translate_extraction_process()`
378
- * `library_preparation_set`: uses `_translate_library_preparation()`
379
- * `omics_processing_set`: uses `_translate_omics_processing()`
400
+ * `material_processing_set`: uses `_translate_pooling_process()`, `_translate_extraction_process()`,
401
+ `_translate_library_preparation()`
402
+ * `data_generation_set`: uses `_translate_nucleotide_sequencing()`
380
403
  * `processed_sample_set`: uses `_translate_processed_sample()`
381
404
  * `data_object_set`: uses `_translate_data_object()`
382
405
  The core Biosample information is in the `sls_soilCoreCollection` table. However, we
@@ -607,14 +630,13 @@ class NeonSoilDataTranslator(Translator):
607
630
  mms_metagenomeDnaExtraction.processedDate,
608
631
  mms_metagenomeSequencing.sequencingFacilityID,
609
632
  mms_metagenomeSequencing.ncbiProjectID,
610
- mms_metagenomeSequencing.investigation_type,
611
633
  mms_metagenomeSequencing.sequencingMethod,
612
634
  mms_metagenomeSequencing.instrument_model
613
635
  FROM mms_metagenomeSequencing
614
636
  LEFT JOIN mms_metagenomeDnaExtraction ON mms_metagenomeDnaExtraction.dnaSampleID = mms_metagenomeSequencing.dnaSampleID
615
637
  """
616
638
  library_preparation_table = pd.read_sql_query(query, self.conn)
617
- omics_processing_table = pd.read_sql_query(query, self.conn)
639
+ nucleotide_sequencing_table = pd.read_sql_query(query, self.conn)
618
640
 
619
641
  nmdc_pooling_ids = self._id_minter("nmdc:Pooling", len(pooling_ids_dict))
620
642
  neon_to_nmdc_pooling_ids = dict(
@@ -653,12 +675,12 @@ class NeonSoilDataTranslator(Translator):
653
675
  zip(library_prepration_ids, nmdc_library_preparation_processed_sample_ids)
654
676
  )
655
677
 
656
- omics_processing_ids = omics_processing_table["dnaSampleID"]
657
- nmdc_omics_processing_ids = self._id_minter(
658
- "nmdc:OmicsProcessing", len(omics_processing_ids)
678
+ nucleotide_sequencing_ids = nucleotide_sequencing_table["dnaSampleID"]
679
+ nmdc_nucleotide_sequencing_ids = self._id_minter(
680
+ "nmdc:NucleotideSequencing", len(nucleotide_sequencing_ids)
659
681
  )
660
- neon_to_nmdc_omics_processing_ids = dict(
661
- zip(omics_processing_ids, nmdc_omics_processing_ids)
682
+ neon_to_nmdc_nucleotide_sequencing_ids = dict(
683
+ zip(nucleotide_sequencing_ids, nmdc_nucleotide_sequencing_ids)
662
684
  )
663
685
 
664
686
  neon_raw_data_file_mappings_df = self.neon_raw_data_file_mappings_df
@@ -701,7 +723,7 @@ class NeonSoilDataTranslator(Translator):
701
723
  # if the number of biosamples that are input to a pooling process
702
724
  # is one or less, then ignore it and go straight to extraction
703
725
  if len(bsm_values_list) > 1:
704
- database.pooling_set.append(
726
+ database.material_processing_set.append(
705
727
  self._translate_pooling_process(
706
728
  pooling_process_id,
707
729
  processed_sample_id,
@@ -734,7 +756,7 @@ class NeonSoilDataTranslator(Translator):
734
756
  # handler for creating extraction process records
735
757
  # for both pooled and non-pooled samples
736
758
  if "|" in genomics_pooled_id_list:
737
- database.extraction_set.append(
759
+ database.material_processing_set.append(
738
760
  self._translate_extraction_process(
739
761
  extraction_id,
740
762
  extraction_input,
@@ -755,7 +777,7 @@ class NeonSoilDataTranslator(Translator):
755
777
 
756
778
  extraction_input = neon_to_nmdc_biosample_ids[neon_biosample_id]
757
779
 
758
- database.extraction_set.append(
780
+ database.material_processing_set.append(
759
781
  self._translate_extraction_process(
760
782
  extraction_id,
761
783
  extraction_input,
@@ -772,7 +794,9 @@ class NeonSoilDataTranslator(Translator):
772
794
  dna_sample_id
773
795
  ]
774
796
 
775
- omics_processing_id = neon_to_nmdc_omics_processing_ids[dna_sample_id]
797
+ nucleotide_sequencing_id = neon_to_nmdc_nucleotide_sequencing_ids[
798
+ dna_sample_id
799
+ ]
776
800
 
777
801
  genomics_sample_id = library_preparation_table[
778
802
  library_preparation_table["dnaSampleID"] == dna_sample_id
@@ -787,7 +811,7 @@ class NeonSoilDataTranslator(Translator):
787
811
  library_preparation_table["dnaSampleID"] == dna_sample_id
788
812
  ]
789
813
 
790
- database.library_preparation_set.append(
814
+ database.material_processing_set.append(
791
815
  self._translate_library_preparation(
792
816
  library_preparation_id,
793
817
  library_preparation_input,
@@ -809,9 +833,9 @@ class NeonSoilDataTranslator(Translator):
809
833
  if item in neon_to_nmdc_data_object_ids:
810
834
  has_output_do_ids.append(neon_to_nmdc_data_object_ids[item])
811
835
 
812
- database.omics_processing_set.append(
813
- self._translate_omics_processing(
814
- omics_processing_id,
836
+ database.data_generation_set.append(
837
+ self._translate_nucleotide_sequencing(
838
+ nucleotide_sequencing_id,
815
839
  processed_sample_id,
816
840
  has_output_do_ids,
817
841
  library_preparation_row,