nmdc-runtime 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nmdc_runtime/containers.py +0 -1
- nmdc_runtime/lib/nmdc_etl_class.py +0 -1
- nmdc_runtime/lib/transform_nmdc_data.py +0 -1
- nmdc_runtime/site/graphs.py +97 -5
- nmdc_runtime/site/ops.py +100 -4
- nmdc_runtime/site/repository.py +121 -6
- nmdc_runtime/site/translation/gold_translator.py +1 -1
- nmdc_runtime/site/translation/neon_benthic_translator.py +548 -0
- nmdc_runtime/site/translation/{neon_translator.py → neon_soil_translator.py} +59 -192
- nmdc_runtime/site/translation/neon_utils.py +146 -0
- {nmdc_runtime-1.1.0.dist-info → nmdc_runtime-1.3.0.dist-info}/METADATA +2 -2
- {nmdc_runtime-1.1.0.dist-info → nmdc_runtime-1.3.0.dist-info}/RECORD +16 -14
- {nmdc_runtime-1.1.0.dist-info → nmdc_runtime-1.3.0.dist-info}/LICENSE +0 -0
- {nmdc_runtime-1.1.0.dist-info → nmdc_runtime-1.3.0.dist-info}/WHEEL +0 -0
- {nmdc_runtime-1.1.0.dist-info → nmdc_runtime-1.3.0.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-1.1.0.dist-info → nmdc_runtime-1.3.0.dist-info}/top_level.txt +0 -0
|
@@ -1,17 +1,34 @@
|
|
|
1
1
|
import re
|
|
2
|
-
import math
|
|
3
2
|
import sqlite3
|
|
4
|
-
from typing import
|
|
3
|
+
from typing import List
|
|
5
4
|
|
|
6
5
|
import pandas as pd
|
|
7
6
|
|
|
8
7
|
from nmdc_schema import nmdc
|
|
9
8
|
from nmdc_runtime.site.translation.translator import Translator
|
|
10
9
|
from nmdc_runtime.site.util import get_basename
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
10
|
+
from nmdc_runtime.site.translation.neon_utils import (
|
|
11
|
+
_get_value_or_none,
|
|
12
|
+
_create_controlled_identified_term_value,
|
|
13
|
+
_create_controlled_term_value,
|
|
14
|
+
_create_geolocation_value,
|
|
15
|
+
_create_quantity_value,
|
|
16
|
+
_create_timestamp_value,
|
|
17
|
+
_create_text_value,
|
|
18
|
+
_create_double_value,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class NeonSoilDataTranslator(Translator):
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
mms_data: dict,
|
|
26
|
+
sls_data: dict,
|
|
27
|
+
neon_envo_mappings_file: pd.DataFrame,
|
|
28
|
+
neon_raw_data_file_mappings_file: pd.DataFrame,
|
|
29
|
+
*args,
|
|
30
|
+
**kwargs,
|
|
31
|
+
) -> None:
|
|
15
32
|
super().__init__(*args, **kwargs)
|
|
16
33
|
|
|
17
34
|
self.conn = sqlite3.connect("neon.db")
|
|
@@ -73,155 +90,15 @@ class NeonDataTranslator(Translator):
|
|
|
73
90
|
f"You are missing one of the soil periodic tables: {neon_sls_data_tables}"
|
|
74
91
|
)
|
|
75
92
|
|
|
76
|
-
neon_envo_mappings_file
|
|
77
|
-
neon_envo_terms = pd.read_csv(neon_envo_mappings_file, delimiter="\t")
|
|
78
|
-
neon_envo_terms.to_sql(
|
|
93
|
+
neon_envo_mappings_file.to_sql(
|
|
79
94
|
"neonEnvoTerms", self.conn, if_exists="replace", index=False
|
|
80
95
|
)
|
|
81
96
|
|
|
82
|
-
|
|
83
|
-
self.neon_raw_data_file_mappings_df = pd.read_csv(
|
|
84
|
-
neon_raw_data_file_mappings_file, delimiter="\t"
|
|
85
|
-
)
|
|
97
|
+
self.neon_raw_data_file_mappings_df = neon_raw_data_file_mappings_file
|
|
86
98
|
self.neon_raw_data_file_mappings_df.to_sql(
|
|
87
99
|
"neonRawDataFile", self.conn, if_exists="replace", index=False
|
|
88
100
|
)
|
|
89
101
|
|
|
90
|
-
def _get_value_or_none(
|
|
91
|
-
self, data: pd.DataFrame, column_name: str
|
|
92
|
-
) -> Union[str, float, None]:
|
|
93
|
-
"""
|
|
94
|
-
Get the value from the specified column in the data DataFrame.
|
|
95
|
-
If the column value is NaN, return None. However, there are handlers
|
|
96
|
-
for a select set of columns - horizon, qaqcStatus, sampleTopDepth,
|
|
97
|
-
and sampleBottomDepth.
|
|
98
|
-
|
|
99
|
-
:param data: DataFrame to read the column value from.
|
|
100
|
-
:return: Either a string, float or None depending on the column/column values.
|
|
101
|
-
"""
|
|
102
|
-
if column_name in data and not data[column_name].isna().any():
|
|
103
|
-
if column_name == "horizon":
|
|
104
|
-
return f"{data[column_name].values[0]} horizon"
|
|
105
|
-
elif column_name == "qaqcStatus":
|
|
106
|
-
return data[column_name].values[0].lower()
|
|
107
|
-
elif column_name == "sampleTopDepth":
|
|
108
|
-
return float(data[column_name].values[0]) / 100
|
|
109
|
-
elif column_name == "sampleBottomDepth":
|
|
110
|
-
return float(data[column_name].values[0]) / 100
|
|
111
|
-
else:
|
|
112
|
-
return data[column_name].values[0]
|
|
113
|
-
|
|
114
|
-
return None
|
|
115
|
-
|
|
116
|
-
def _create_controlled_identified_term_value(
|
|
117
|
-
self, id: str = None, name: str = None
|
|
118
|
-
) -> nmdc.ControlledIdentifiedTermValue:
|
|
119
|
-
"""
|
|
120
|
-
Create a ControlledIdentifiedTermValue object with the specified id and name.
|
|
121
|
-
|
|
122
|
-
:param id: CURIE (with defined prefix expansion) or full URI of term.
|
|
123
|
-
:param name: Name of term.
|
|
124
|
-
:return: ControlledIdentifiedTermValue with mandatorily specified value for `id`.
|
|
125
|
-
"""
|
|
126
|
-
if id is None or name is None:
|
|
127
|
-
return None
|
|
128
|
-
return nmdc.ControlledIdentifiedTermValue(
|
|
129
|
-
term=nmdc.OntologyClass(id=id, name=name)
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
def _create_controlled_term_value(
|
|
133
|
-
self, name: str = None
|
|
134
|
-
) -> nmdc.ControlledTermValue:
|
|
135
|
-
"""
|
|
136
|
-
Create a ControlledIdentifiedTermValue object with the specified id and name.
|
|
137
|
-
|
|
138
|
-
:param name: Name of term. This may or may not have an `id` associated with it,
|
|
139
|
-
hence the decision to record it in `has_raw_value` meaning, record as it is
|
|
140
|
-
in the data source.
|
|
141
|
-
:return: ControlledTermValue object with name in `has_raw_value`.
|
|
142
|
-
"""
|
|
143
|
-
if id is None or name is None:
|
|
144
|
-
return None
|
|
145
|
-
return nmdc.ControlledTermValue(has_raw_value=name)
|
|
146
|
-
|
|
147
|
-
def _create_timestamp_value(self, value: str = None) -> nmdc.TimestampValue:
|
|
148
|
-
"""
|
|
149
|
-
Create a TimestampValue object with the specified value.
|
|
150
|
-
|
|
151
|
-
:param value: Timestamp value recorded in ISO-8601 format.
|
|
152
|
-
Example: 2021-07-07T20:14Z.
|
|
153
|
-
:return: ISO-8601 timestamp wrapped in TimestampValue object.
|
|
154
|
-
"""
|
|
155
|
-
if value is None:
|
|
156
|
-
return None
|
|
157
|
-
return nmdc.TimestampValue(has_raw_value=value)
|
|
158
|
-
|
|
159
|
-
def _create_quantity_value(
|
|
160
|
-
self, numeric_value: Union[str, int, float] = None, unit: str = None
|
|
161
|
-
) -> nmdc.QuantityValue:
|
|
162
|
-
"""
|
|
163
|
-
Create a QuantityValue object with the specified numeric value and unit.
|
|
164
|
-
|
|
165
|
-
:param numeric_value: Numeric value from a dataframe column that typically
|
|
166
|
-
records numerical values.
|
|
167
|
-
:param unit: Unit corresponding to the numeric value. Example: biogeochemical
|
|
168
|
-
measurement value like organic Carbon Nitrogen ratio.
|
|
169
|
-
:return: Numeric value and unit stored together in nested QuantityValue object.
|
|
170
|
-
"""
|
|
171
|
-
if numeric_value is None or math.isnan(numeric_value):
|
|
172
|
-
return None
|
|
173
|
-
return nmdc.QuantityValue(has_numeric_value=float(numeric_value), has_unit=unit)
|
|
174
|
-
|
|
175
|
-
def _create_text_value(self, value: str = None) -> nmdc.TextValue:
|
|
176
|
-
"""
|
|
177
|
-
Create a TextValue object with the specified value.
|
|
178
|
-
|
|
179
|
-
:param value: column that we expect to primarily have text values.
|
|
180
|
-
:return: Text wrapped in TextValue object.
|
|
181
|
-
"""
|
|
182
|
-
if value is None:
|
|
183
|
-
return None
|
|
184
|
-
return nmdc.TextValue(has_raw_value=value)
|
|
185
|
-
|
|
186
|
-
def _create_double_value(self, value: str = None) -> nmdc.Double:
|
|
187
|
-
"""
|
|
188
|
-
Create a Double object with the specified value.
|
|
189
|
-
|
|
190
|
-
:param value: Values from a column which typically records numeric
|
|
191
|
-
(double) values like pH.
|
|
192
|
-
:return: String (possibly) cast/converted to nmdc Double object.
|
|
193
|
-
"""
|
|
194
|
-
if value is None or math.isnan(value):
|
|
195
|
-
return None
|
|
196
|
-
return nmdc.Double(value)
|
|
197
|
-
|
|
198
|
-
def _create_geolocation_value(
|
|
199
|
-
self, latitude: str = None, longitude: str = None
|
|
200
|
-
) -> nmdc.GeolocationValue:
|
|
201
|
-
"""
|
|
202
|
-
Create a GeolocationValue object with latitude and longitude from the
|
|
203
|
-
biosample DataFrame. Takes in values from the NEON API table with
|
|
204
|
-
latitude (decimalLatitude) and longitude (decimalLongitude) values and
|
|
205
|
-
puts it in the respective slots in the GeolocationValue class object.
|
|
206
|
-
|
|
207
|
-
:param latitude: Value corresponding to `decimalLatitude` column.
|
|
208
|
-
:param longitude: Value corresponding to `decimalLongitude` column.
|
|
209
|
-
:return: Latitude and Longitude values wrapped in nmdc GeolocationValue
|
|
210
|
-
object.
|
|
211
|
-
"""
|
|
212
|
-
if (
|
|
213
|
-
latitude is None
|
|
214
|
-
or math.isnan(latitude)
|
|
215
|
-
or longitude is None
|
|
216
|
-
or math.isnan(longitude)
|
|
217
|
-
):
|
|
218
|
-
return None
|
|
219
|
-
|
|
220
|
-
return nmdc.GeolocationValue(
|
|
221
|
-
latitude=nmdc.DecimalDegree(latitude),
|
|
222
|
-
longitude=nmdc.DecimalDegree(longitude),
|
|
223
|
-
)
|
|
224
|
-
|
|
225
102
|
def _translate_biosample(
|
|
226
103
|
self, neon_id: str, nmdc_id: str, biosample_row: pd.DataFrame
|
|
227
104
|
) -> nmdc.Biosample:
|
|
@@ -240,64 +117,58 @@ class NeonDataTranslator(Translator):
|
|
|
240
117
|
return nmdc.Biosample(
|
|
241
118
|
id=nmdc_id,
|
|
242
119
|
part_of="nmdc:sty-11-34xj1150",
|
|
243
|
-
env_broad_scale=
|
|
120
|
+
env_broad_scale=_create_controlled_identified_term_value(
|
|
244
121
|
"ENVO:00000446", "terrestrial biome"
|
|
245
122
|
),
|
|
246
|
-
env_local_scale=
|
|
123
|
+
env_local_scale=_create_controlled_identified_term_value(
|
|
247
124
|
biosample_row["envo_id"].values[0],
|
|
248
125
|
biosample_row["envo_label"].values[0],
|
|
249
126
|
),
|
|
250
|
-
env_medium=
|
|
127
|
+
env_medium=_create_controlled_identified_term_value(
|
|
251
128
|
"ENVO:00001998", "soil"
|
|
252
129
|
),
|
|
253
130
|
name=neon_id,
|
|
254
|
-
lat_lon=
|
|
131
|
+
lat_lon=_create_geolocation_value(
|
|
255
132
|
biosample_row["decimalLatitude"].values[0],
|
|
256
133
|
biosample_row["decimalLongitude"].values[0],
|
|
257
134
|
),
|
|
258
135
|
elev=nmdc.Float(biosample_row["elevation"].values[0]),
|
|
259
|
-
collection_date=
|
|
136
|
+
collection_date=_create_timestamp_value(
|
|
260
137
|
biosample_row["collectDate"].values[0]
|
|
261
138
|
),
|
|
262
|
-
temp=
|
|
263
|
-
biosample_row["soilTemp"].values[0], "Celsius"
|
|
264
|
-
),
|
|
139
|
+
temp=_create_quantity_value(biosample_row["soilTemp"].values[0], "Celsius"),
|
|
265
140
|
depth=nmdc.QuantityValue(
|
|
266
|
-
has_minimum_numeric_value=
|
|
141
|
+
has_minimum_numeric_value=_get_value_or_none(
|
|
267
142
|
biosample_row, "sampleTopDepth"
|
|
268
143
|
),
|
|
269
|
-
has_maximum_numeric_value=
|
|
144
|
+
has_maximum_numeric_value=_get_value_or_none(
|
|
270
145
|
biosample_row, "sampleBottomDepth"
|
|
271
146
|
),
|
|
272
147
|
has_unit="m",
|
|
273
148
|
),
|
|
274
|
-
samp_collec_device=
|
|
275
|
-
|
|
276
|
-
),
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
biosample_row, "sequenceAnalysisType"
|
|
280
|
-
),
|
|
281
|
-
env_package=self._create_text_value(biosample_row["sampleType"].values[0]),
|
|
282
|
-
nitro=self._create_quantity_value(
|
|
149
|
+
samp_collec_device=_get_value_or_none(biosample_row, "soilSamplingDevice"),
|
|
150
|
+
soil_horizon=_get_value_or_none(biosample_row, "horizon"),
|
|
151
|
+
analysis_type=_get_value_or_none(biosample_row, "sequenceAnalysisType"),
|
|
152
|
+
env_package=_create_text_value(biosample_row["sampleType"].values[0]),
|
|
153
|
+
nitro=_create_quantity_value(
|
|
283
154
|
biosample_row["nitrogenPercent"].values[0], "percent"
|
|
284
155
|
),
|
|
285
|
-
org_carb=
|
|
156
|
+
org_carb=_create_quantity_value(
|
|
286
157
|
biosample_row["organicCPercent"].values[0], "percent"
|
|
287
158
|
),
|
|
288
|
-
carb_nitro_ratio=
|
|
159
|
+
carb_nitro_ratio=_create_quantity_value(
|
|
289
160
|
biosample_row["CNratio"].values[0], None
|
|
290
161
|
),
|
|
291
|
-
ph=
|
|
162
|
+
ph=_create_double_value(biosample_row["soilInWaterpH"].values[0]),
|
|
292
163
|
water_content=[
|
|
293
164
|
f"{biosample_row['soilMoisture'].values[0]} g of water/g of dry soil"
|
|
294
165
|
]
|
|
295
166
|
if not biosample_row["soilMoisture"].isna().any()
|
|
296
167
|
else None,
|
|
297
|
-
ammonium_nitrogen=
|
|
168
|
+
ammonium_nitrogen=_create_quantity_value(
|
|
298
169
|
biosample_row["kclAmmoniumNConc"].values[0], "mg/L"
|
|
299
170
|
),
|
|
300
|
-
tot_nitro_content=
|
|
171
|
+
tot_nitro_content=_create_quantity_value(
|
|
301
172
|
biosample_row["kclNitrateNitriteNConc"].values[0], "mg/L"
|
|
302
173
|
),
|
|
303
174
|
type="nmdc:Biosample",
|
|
@@ -325,8 +196,8 @@ class NeonDataTranslator(Translator):
|
|
|
325
196
|
id=nmdc_id,
|
|
326
197
|
has_output=processed_sample_id,
|
|
327
198
|
has_input=bsm_input_values_list,
|
|
328
|
-
start_date=
|
|
329
|
-
end_date=
|
|
199
|
+
start_date=_get_value_or_none(pooling_row, "startDate"),
|
|
200
|
+
end_date=_get_value_or_none(pooling_row, "collectDate"),
|
|
330
201
|
)
|
|
331
202
|
|
|
332
203
|
def _translate_processed_sample(
|
|
@@ -393,7 +264,7 @@ class NeonDataTranslator(Translator):
|
|
|
393
264
|
:return: Extraction process object.
|
|
394
265
|
"""
|
|
395
266
|
processing_institution = None
|
|
396
|
-
laboratory_name =
|
|
267
|
+
laboratory_name = _get_value_or_none(extraction_row, "laboratoryName")
|
|
397
268
|
if laboratory_name is not None:
|
|
398
269
|
if re.search("Battelle", laboratory_name, re.IGNORECASE):
|
|
399
270
|
processing_institution = "Battelle"
|
|
@@ -404,13 +275,13 @@ class NeonDataTranslator(Translator):
|
|
|
404
275
|
id=extraction_id,
|
|
405
276
|
has_input=extraction_input,
|
|
406
277
|
has_output=processed_sample_id,
|
|
407
|
-
start_date=
|
|
408
|
-
end_date=
|
|
409
|
-
|
|
410
|
-
|
|
278
|
+
start_date=_get_value_or_none(extraction_row, "collectDate"),
|
|
279
|
+
end_date=_get_value_or_none(extraction_row, "processedDate"),
|
|
280
|
+
input_mass=_create_quantity_value(
|
|
281
|
+
_get_value_or_none(extraction_row, "sampleMass"), "g"
|
|
411
282
|
),
|
|
412
283
|
quality_control_report=nmdc.QualityControlReport(
|
|
413
|
-
status=
|
|
284
|
+
status=_get_value_or_none(extraction_row, "qaqcStatus")
|
|
414
285
|
),
|
|
415
286
|
processing_institution=processing_institution,
|
|
416
287
|
)
|
|
@@ -436,9 +307,7 @@ class NeonDataTranslator(Translator):
|
|
|
436
307
|
:return: Object that using LibraryPreparation process model.
|
|
437
308
|
"""
|
|
438
309
|
processing_institution = None
|
|
439
|
-
laboratory_name =
|
|
440
|
-
library_preparation_row, "laboratoryName"
|
|
441
|
-
)
|
|
310
|
+
laboratory_name = _get_value_or_none(library_preparation_row, "laboratoryName")
|
|
442
311
|
if laboratory_name is not None:
|
|
443
312
|
if re.search("Battelle", laboratory_name, re.IGNORECASE):
|
|
444
313
|
processing_institution = "Battelle"
|
|
@@ -449,8 +318,8 @@ class NeonDataTranslator(Translator):
|
|
|
449
318
|
id=library_preparation_id,
|
|
450
319
|
has_input=library_preparation_input,
|
|
451
320
|
has_output=processed_sample_id,
|
|
452
|
-
start_date=
|
|
453
|
-
end_date=
|
|
321
|
+
start_date=_get_value_or_none(library_preparation_row, "collectDate"),
|
|
322
|
+
end_date=_get_value_or_none(library_preparation_row, "processedDate"),
|
|
454
323
|
processing_institution=processing_institution,
|
|
455
324
|
)
|
|
456
325
|
|
|
@@ -475,7 +344,7 @@ class NeonDataTranslator(Translator):
|
|
|
475
344
|
:return: OmicsProcessing object that models a Bioinformatics workflow process/run.
|
|
476
345
|
"""
|
|
477
346
|
processing_institution = None
|
|
478
|
-
sequencing_facility =
|
|
347
|
+
sequencing_facility = _get_value_or_none(
|
|
479
348
|
omics_processing_row, "sequencingFacilityID"
|
|
480
349
|
)
|
|
481
350
|
if sequencing_facility is not None:
|
|
@@ -489,15 +358,13 @@ class NeonDataTranslator(Translator):
|
|
|
489
358
|
has_input=processed_sample_id,
|
|
490
359
|
has_output=raw_data_file_data,
|
|
491
360
|
processing_institution=processing_institution,
|
|
492
|
-
ncbi_project_name=
|
|
493
|
-
|
|
494
|
-
),
|
|
495
|
-
omics_type=self._create_controlled_term_value(
|
|
361
|
+
ncbi_project_name=_get_value_or_none(omics_processing_row, "ncbiProjectID"),
|
|
362
|
+
omics_type=_create_controlled_term_value(
|
|
496
363
|
omics_processing_row["investigation_type"].values[0]
|
|
497
364
|
),
|
|
498
|
-
instrument_name=f"{
|
|
365
|
+
instrument_name=f"{_get_value_or_none(omics_processing_row, 'sequencingMethod')} {_get_value_or_none(omics_processing_row, 'instrument_model')}",
|
|
499
366
|
part_of="nmdc:sty-11-34xj1150",
|
|
500
|
-
name=f"Terrestrial soil microbial communities - {
|
|
367
|
+
name=f"Terrestrial soil microbial communities - {_get_value_or_none(omics_processing_row, 'dnaSampleID')}",
|
|
501
368
|
type="nmdc:OmicsProcessing",
|
|
502
369
|
)
|
|
503
370
|
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from typing import Union
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from nmdc_schema import nmdc
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _get_value_or_none(data: pd.DataFrame, column_name: str) -> Union[str, float, None]:
|
|
9
|
+
"""
|
|
10
|
+
Get the value from the specified column in the data DataFrame.
|
|
11
|
+
If the column value is NaN, return None. However, there are handlers
|
|
12
|
+
for a select set of columns - horizon, qaqcStatus, sampleTopDepth,
|
|
13
|
+
and sampleBottomDepth.
|
|
14
|
+
|
|
15
|
+
:param data: DataFrame to read the column value from.
|
|
16
|
+
:return: Either a string, float or None depending on the column/column values.
|
|
17
|
+
"""
|
|
18
|
+
if (
|
|
19
|
+
column_name in data
|
|
20
|
+
and not data[column_name].isna().any()
|
|
21
|
+
and not data[column_name].empty
|
|
22
|
+
):
|
|
23
|
+
if column_name == "horizon":
|
|
24
|
+
return f"{data[column_name].values[0]} horizon"
|
|
25
|
+
elif column_name == "qaqcStatus":
|
|
26
|
+
return data[column_name].values[0].lower()
|
|
27
|
+
elif column_name == "sampleTopDepth":
|
|
28
|
+
return float(data[column_name].values[0]) / 100
|
|
29
|
+
elif column_name == "sampleBottomDepth":
|
|
30
|
+
return float(data[column_name].values[0]) / 100
|
|
31
|
+
else:
|
|
32
|
+
return data[column_name].values[0]
|
|
33
|
+
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _create_controlled_identified_term_value(
|
|
38
|
+
id: str = None, name: str = None
|
|
39
|
+
) -> nmdc.ControlledIdentifiedTermValue:
|
|
40
|
+
"""
|
|
41
|
+
Create a ControlledIdentifiedTermValue object with the specified id and name.
|
|
42
|
+
|
|
43
|
+
:param id: CURIE (with defined prefix expansion) or full URI of term.
|
|
44
|
+
:param name: Name of term.
|
|
45
|
+
:return: ControlledIdentifiedTermValue with mandatorily specified value for `id`.
|
|
46
|
+
"""
|
|
47
|
+
if id is None or name is None:
|
|
48
|
+
return None
|
|
49
|
+
return nmdc.ControlledIdentifiedTermValue(term=nmdc.OntologyClass(id=id, name=name))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _create_controlled_term_value(name: str = None) -> nmdc.ControlledTermValue:
|
|
53
|
+
"""
|
|
54
|
+
Create a ControlledIdentifiedTermValue object with the specified id and name.
|
|
55
|
+
|
|
56
|
+
:param name: Name of term. This may or may not have an `id` associated with it,
|
|
57
|
+
hence the decision to record it in `has_raw_value` meaning, record as it is
|
|
58
|
+
in the data source.
|
|
59
|
+
:return: ControlledTermValue object with name in `has_raw_value`.
|
|
60
|
+
"""
|
|
61
|
+
if name is None:
|
|
62
|
+
return None
|
|
63
|
+
return nmdc.ControlledTermValue(has_raw_value=name)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _create_timestamp_value(value: str = None) -> nmdc.TimestampValue:
|
|
67
|
+
"""
|
|
68
|
+
Create a TimestampValue object with the specified value.
|
|
69
|
+
|
|
70
|
+
:param value: Timestamp value recorded in ISO-8601 format.
|
|
71
|
+
Example: 2021-07-07T20:14Z.
|
|
72
|
+
:return: ISO-8601 timestamp wrapped in TimestampValue object.
|
|
73
|
+
"""
|
|
74
|
+
if value is None:
|
|
75
|
+
return None
|
|
76
|
+
return nmdc.TimestampValue(has_raw_value=value)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _create_quantity_value(
|
|
80
|
+
numeric_value: Union[str, int, float] = None, unit: str = None
|
|
81
|
+
) -> nmdc.QuantityValue:
|
|
82
|
+
"""
|
|
83
|
+
Create a QuantityValue object with the specified numeric value and unit.
|
|
84
|
+
|
|
85
|
+
:param numeric_value: Numeric value from a dataframe column that typically
|
|
86
|
+
records numerical values.
|
|
87
|
+
:param unit: Unit corresponding to the numeric value. Example: biogeochemical
|
|
88
|
+
measurement value like organic Carbon Nitrogen ratio.
|
|
89
|
+
:return: Numeric value and unit stored together in nested QuantityValue object.
|
|
90
|
+
"""
|
|
91
|
+
if numeric_value is None or math.isnan(numeric_value):
|
|
92
|
+
return None
|
|
93
|
+
return nmdc.QuantityValue(has_numeric_value=float(numeric_value), has_unit=unit)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _create_text_value(value: str = None) -> nmdc.TextValue:
|
|
97
|
+
"""
|
|
98
|
+
Create a TextValue object with the specified value.
|
|
99
|
+
|
|
100
|
+
:param value: column that we expect to primarily have text values.
|
|
101
|
+
:return: Text wrapped in TextValue object.
|
|
102
|
+
"""
|
|
103
|
+
if value is None:
|
|
104
|
+
return None
|
|
105
|
+
return nmdc.TextValue(has_raw_value=value)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _create_double_value(value: str = None) -> nmdc.Double:
|
|
109
|
+
"""
|
|
110
|
+
Create a Double object with the specified value.
|
|
111
|
+
|
|
112
|
+
:param value: Values from a column which typically records numeric
|
|
113
|
+
(double) values like pH.
|
|
114
|
+
:return: String (possibly) cast/converted to nmdc Double object.
|
|
115
|
+
"""
|
|
116
|
+
if value is None or math.isnan(value):
|
|
117
|
+
return None
|
|
118
|
+
return nmdc.Double(value)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _create_geolocation_value(
|
|
122
|
+
latitude: str = None, longitude: str = None
|
|
123
|
+
) -> nmdc.GeolocationValue:
|
|
124
|
+
"""
|
|
125
|
+
Create a GeolocationValue object with latitude and longitude from the
|
|
126
|
+
biosample DataFrame. Takes in values from the NEON API table with
|
|
127
|
+
latitude (decimalLatitude) and longitude (decimalLongitude) values and
|
|
128
|
+
puts it in the respective slots in the GeolocationValue class object.
|
|
129
|
+
|
|
130
|
+
:param latitude: Value corresponding to `decimalLatitude` column.
|
|
131
|
+
:param longitude: Value corresponding to `decimalLongitude` column.
|
|
132
|
+
:return: Latitude and Longitude values wrapped in nmdc GeolocationValue
|
|
133
|
+
object.
|
|
134
|
+
"""
|
|
135
|
+
if (
|
|
136
|
+
latitude is None
|
|
137
|
+
or math.isnan(latitude)
|
|
138
|
+
or longitude is None
|
|
139
|
+
or math.isnan(longitude)
|
|
140
|
+
):
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
return nmdc.GeolocationValue(
|
|
144
|
+
latitude=nmdc.DecimalDegree(latitude),
|
|
145
|
+
longitude=nmdc.DecimalDegree(longitude),
|
|
146
|
+
)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
nmdc_runtime/containers.py,sha256=
|
|
2
|
+
nmdc_runtime/containers.py,sha256=8m_S1wiFu8VOWvY7tyqzf-02X9gXY83YGc8FgjWzLGA,418
|
|
3
3
|
nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
nmdc_runtime/util.py,sha256=o74ZKOmSD79brPFAcQFsYpA6wh9287m0hDhDlIpn9VM,19872
|
|
5
5
|
nmdc_runtime/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -23,8 +23,8 @@ nmdc_runtime/lib/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,
|
|
|
23
23
|
nmdc_runtime/lib/extract_nmdc_data.py,sha256=xDFPoYsgkauN48R4v-tJIF0cP_p3J-sBjnyHd0InD9Y,1177
|
|
24
24
|
nmdc_runtime/lib/load_nmdc_data.py,sha256=KO2cIqkY3cBCVcFIwsGokZNOKntOejZVG8ecq43NjFM,3934
|
|
25
25
|
nmdc_runtime/lib/nmdc_dataframes.py,sha256=rVTczY2Jey1yE3x3nZ-RTgtdc2XkzLtKhB_PM3FIb-E,28849
|
|
26
|
-
nmdc_runtime/lib/nmdc_etl_class.py,sha256=
|
|
27
|
-
nmdc_runtime/lib/transform_nmdc_data.py,sha256=
|
|
26
|
+
nmdc_runtime/lib/nmdc_etl_class.py,sha256=tVh3rKVMkBHQE65_LhKeIjCsaCZQk_HJzbc9K4xUNCs,13522
|
|
27
|
+
nmdc_runtime/lib/transform_nmdc_data.py,sha256=hij4lR3IMQRJQdL-rsP_I-m_WyFPsBMchV2MNFUkh0M,39906
|
|
28
28
|
nmdc_runtime/minter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
nmdc_runtime/minter/bootstrap.py,sha256=5Ej6pJVBRryRIi0ZwEloY78Zky7iE2okF6tPwRI2axM,822
|
|
30
30
|
nmdc_runtime/minter/config.py,sha256=mq_s0xjLZK-zwjwk3IGgnk9ZIvvejyyZ7_qZkLt3V-c,1409
|
|
@@ -35,9 +35,9 @@ nmdc_runtime/minter/domain/model.py,sha256=WMOuKub3dVzkOt_EZSRDLeTsJPqFbKx01SMQ5
|
|
|
35
35
|
nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=JC4thvzfFwRc1mhWQ-kHy3yvs0SYxF6ktE7LXNCwqlI,4031
|
|
37
37
|
nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
nmdc_runtime/site/graphs.py,sha256=
|
|
39
|
-
nmdc_runtime/site/ops.py,sha256=
|
|
40
|
-
nmdc_runtime/site/repository.py,sha256=
|
|
38
|
+
nmdc_runtime/site/graphs.py,sha256=siHlRnD2eS9nw3Ne049TcGG6I6IYFvjgWQuuSHzEOqc,9492
|
|
39
|
+
nmdc_runtime/site/ops.py,sha256=YzDm7Dm2sELptwTew8DTOcS3nYBH_JegXhu3wzZuuiY,32482
|
|
40
|
+
nmdc_runtime/site/repository.py,sha256=UgY9eMnNgZxa-Y0QeDyENh4KHtxuBWkYCjxltM4mTzA,30938
|
|
41
41
|
nmdc_runtime/site/resources.py,sha256=pQSwg1dRpL_D91gYLzzaOIDZ3qa69rPqSlsq5dS9i_M,17783
|
|
42
42
|
nmdc_runtime/site/util.py,sha256=6hyVPpb6ZkWEG8Nm7uQxnZ-QmuPOG9hgWvl0mUBr5JU,1303
|
|
43
43
|
nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -60,9 +60,11 @@ nmdc_runtime/site/terminusdb/schema.py,sha256=3e39rHUSZsNbN_F0SHHNsvcEGRWtYa6O9K
|
|
|
60
60
|
nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
61
|
nmdc_runtime/site/translation/emsl.py,sha256=l6Q9Jj3RNJFQNYAU_TtKTJ7cyFcR93xBRs_lLdX0bMQ,1244
|
|
62
62
|
nmdc_runtime/site/translation/gold.py,sha256=R3W99sdQb7Pgu_esN7ruIC-tyREQD_idJ4xCzkqWuGw,1622
|
|
63
|
-
nmdc_runtime/site/translation/gold_translator.py,sha256=
|
|
63
|
+
nmdc_runtime/site/translation/gold_translator.py,sha256=8i5FxrgAG4rLbM0mcCSBaZEzyReht6xwmpm4xeX4HwI,26451
|
|
64
64
|
nmdc_runtime/site/translation/jgi.py,sha256=bh73r0uq5BT3ywXwIa1OEKKtz9LbFsSng472tdr-xtg,875
|
|
65
|
-
nmdc_runtime/site/translation/
|
|
65
|
+
nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=e_7tXFrP0PpdhqUCxXmOaFViSuG36IIMDqyj3FHLcgQ,23069
|
|
66
|
+
nmdc_runtime/site/translation/neon_soil_translator.py,sha256=x-FfNKsIv0efgxty9v4wOxNu5nrrS-N8phx12IqfLOI,37624
|
|
67
|
+
nmdc_runtime/site/translation/neon_utils.py,sha256=k8JYMnm-L981BTOdAMomR1CulS_Hz5v7aYxrJ94KEJc,5086
|
|
66
68
|
nmdc_runtime/site/translation/submission_portal_translator.py,sha256=lHcrfPR5wk3BcZ0Uw5zUyWu5XRVikgOzdzSb5nFVS9I,27964
|
|
67
69
|
nmdc_runtime/site/translation/translator.py,sha256=xM9dM-nTgSWwu5HFoUVNHf8kqk9iiH4PgWdSx4OKxEk,601
|
|
68
70
|
nmdc_runtime/site/translation/util.py,sha256=w_l3SiExGsl6cXRqto0a_ssDmHkP64ITvrOVfPxmNpY,4366
|
|
@@ -71,9 +73,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=TgckqKkFquHDLso77sn-jZRu5ZaBevGCt5p8
|
|
|
71
73
|
nmdc_runtime/site/validation/gold.py,sha256=kJ1L081SZb-8qKpF731r5aQOueM206SUfUYMTTNTFMc,802
|
|
72
74
|
nmdc_runtime/site/validation/jgi.py,sha256=lBo-FCtEYedT74CpW-Kdj512Ib963ik-4YIYmY5puDo,1298
|
|
73
75
|
nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
|
|
74
|
-
nmdc_runtime-1.
|
|
75
|
-
nmdc_runtime-1.
|
|
76
|
-
nmdc_runtime-1.
|
|
77
|
-
nmdc_runtime-1.
|
|
78
|
-
nmdc_runtime-1.
|
|
79
|
-
nmdc_runtime-1.
|
|
76
|
+
nmdc_runtime-1.3.0.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
|
|
77
|
+
nmdc_runtime-1.3.0.dist-info/METADATA,sha256=XiOhRGoaBESF48sWor9SMpTdCL8X9yPKIh6mnA9xZtY,7424
|
|
78
|
+
nmdc_runtime-1.3.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
79
|
+
nmdc_runtime-1.3.0.dist-info/entry_points.txt,sha256=nfH6-K9tDKv7va8ENfShsBnxVQoYJdEe7HHdwtkbh1Y,289
|
|
80
|
+
nmdc_runtime-1.3.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
|
|
81
|
+
nmdc_runtime-1.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|