nmdc-schema 11.12.0rc2__py3-none-any.whl → 11.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nmdc_schema/__init__.py +1 -0
- nmdc_schema/migration_recursion.py +2 -0
- nmdc_schema/migrators/cli/run_migrator.py +10 -14
- nmdc_schema/migrators/helpers.py +24 -0
- nmdc_schema/migrators/migrator_from_11_11_0_to_11_12_0.py +35 -0
- nmdc_schema/migrators/partials/migrator_from_11_11_0_to_11_12_0/__init__.py +28 -0
- nmdc_schema/migrators/partials/migrator_from_11_11_0_to_11_12_0/migrator_from_11_11_0_to_11_12_0_part_1.py +187 -0
- nmdc_schema/migrators/partials/migrator_from_11_11_0_to_11_12_0/migrator_from_11_11_0_to_11_12_0_part_2.py +113 -0
- nmdc_schema/nmdc-pydantic.py +194 -12
- nmdc_schema/nmdc.py +149 -7
- nmdc_schema/nmdc.schema.json +148 -4
- nmdc_schema/nmdc_materialized_patterns.json +291 -51
- nmdc_schema/nmdc_materialized_patterns.schema.json +148 -4
- nmdc_schema/nmdc_materialized_patterns.yaml +243 -41
- nmdc_schema/nmdc_schema_validation_plugin.py +103 -0
- {nmdc_schema-11.12.0rc2.dist-info → nmdc_schema-11.12.1.dist-info}/METADATA +1 -1
- {nmdc_schema-11.12.0rc2.dist-info → nmdc_schema-11.12.1.dist-info}/RECORD +20 -15
- {nmdc_schema-11.12.0rc2.dist-info → nmdc_schema-11.12.1.dist-info}/entry_points.txt +0 -1
- {nmdc_schema-11.12.0rc2.dist-info → nmdc_schema-11.12.1.dist-info}/WHEEL +0 -0
- {nmdc_schema-11.12.0rc2.dist-info → nmdc_schema-11.12.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -2649,9 +2649,9 @@
|
|
|
2649
2649
|
]
|
|
2650
2650
|
},
|
|
2651
2651
|
"misc_param": {
|
|
2652
|
-
"description": "
|
|
2652
|
+
"description": "Structured miscellaneous property assertions for this Biosample. Use when a value cannot cleanly fit an existing, policy-governed slot.",
|
|
2653
2653
|
"items": {
|
|
2654
|
-
"$ref": "#/$defs/
|
|
2654
|
+
"$ref": "#/$defs/PropertyAssertion"
|
|
2655
2655
|
},
|
|
2656
2656
|
"type": [
|
|
2657
2657
|
"array",
|
|
@@ -8540,7 +8540,7 @@
|
|
|
8540
8540
|
},
|
|
8541
8541
|
"Instrument": {
|
|
8542
8542
|
"additionalProperties": false,
|
|
8543
|
-
"description": "A material entity that is designed to perform a function in a scientific investigation, but is not a reagent.",
|
|
8543
|
+
"description": "A material entity that is designed to perform a function in a scientific investigation, but is not a reagent. This class models the make and model of the instrument, not the specific instance of the instrument.",
|
|
8544
8544
|
"properties": {
|
|
8545
8545
|
"alternative_identifiers": {
|
|
8546
8546
|
"description": "A list of alternative identifiers for the entity.",
|
|
@@ -13202,10 +13202,113 @@
|
|
|
13202
13202
|
"title": "ProfilePositionEnum",
|
|
13203
13203
|
"type": "string"
|
|
13204
13204
|
},
|
|
13205
|
+
"PropertyAssertion": {
|
|
13206
|
+
"additionalProperties": false,
|
|
13207
|
+
"description": "A structured record of data that doesn't fit nicely within the constraints of other NMDC AttributeValues. Uses primitive ranges only.",
|
|
13208
|
+
"properties": {
|
|
13209
|
+
"has_attribute_id": {
|
|
13210
|
+
"description": "CURIE or IRI for the property (MIxS slot, NMDC slot, ENVO/OBI term, etc.).",
|
|
13211
|
+
"type": [
|
|
13212
|
+
"string",
|
|
13213
|
+
"null"
|
|
13214
|
+
]
|
|
13215
|
+
},
|
|
13216
|
+
"has_attribute_label": {
|
|
13217
|
+
"description": "Human-readable label for the property (e.g., MIxS label, ENVO term label).",
|
|
13218
|
+
"type": [
|
|
13219
|
+
"string",
|
|
13220
|
+
"null"
|
|
13221
|
+
]
|
|
13222
|
+
},
|
|
13223
|
+
"has_boolean_value": {
|
|
13224
|
+
"description": "Links a quantity value to a boolean",
|
|
13225
|
+
"type": [
|
|
13226
|
+
"boolean",
|
|
13227
|
+
"null"
|
|
13228
|
+
]
|
|
13229
|
+
},
|
|
13230
|
+
"has_datetime_value": {
|
|
13231
|
+
"description": "Date-time value for the property in ISO-8601 format.",
|
|
13232
|
+
"type": [
|
|
13233
|
+
"string",
|
|
13234
|
+
"null"
|
|
13235
|
+
]
|
|
13236
|
+
},
|
|
13237
|
+
"has_maximum_numeric_value": {
|
|
13238
|
+
"description": "The maximum value part, expressed as number, of the quantity value when the value covers a range.",
|
|
13239
|
+
"type": [
|
|
13240
|
+
"number",
|
|
13241
|
+
"null"
|
|
13242
|
+
]
|
|
13243
|
+
},
|
|
13244
|
+
"has_minimum_numeric_value": {
|
|
13245
|
+
"description": "The minimum value part, expressed as number, of the quantity value when the value covers a range.",
|
|
13246
|
+
"type": [
|
|
13247
|
+
"number",
|
|
13248
|
+
"null"
|
|
13249
|
+
]
|
|
13250
|
+
},
|
|
13251
|
+
"has_numeric_value": {
|
|
13252
|
+
"description": "Links a quantity value to a number",
|
|
13253
|
+
"type": [
|
|
13254
|
+
"number",
|
|
13255
|
+
"null"
|
|
13256
|
+
]
|
|
13257
|
+
},
|
|
13258
|
+
"has_quantity_kind_id": {
|
|
13259
|
+
"description": "Optional CURIE or IRI for the physical quantity kind (e.g., qudt:QuantityKind).",
|
|
13260
|
+
"type": [
|
|
13261
|
+
"string",
|
|
13262
|
+
"null"
|
|
13263
|
+
]
|
|
13264
|
+
},
|
|
13265
|
+
"has_raw_value": {
|
|
13266
|
+
"description": "Original contributor string representation (unparsed)",
|
|
13267
|
+
"type": "string"
|
|
13268
|
+
},
|
|
13269
|
+
"has_unit": {
|
|
13270
|
+
"description": "UCUM unit code (required only when numeric value is present)",
|
|
13271
|
+
"type": [
|
|
13272
|
+
"string",
|
|
13273
|
+
"null"
|
|
13274
|
+
]
|
|
13275
|
+
},
|
|
13276
|
+
"has_value_term_id": {
|
|
13277
|
+
"description": "CURIE or IRI for categorical values (ENVO, PATO, METPO, etc.).",
|
|
13278
|
+
"type": [
|
|
13279
|
+
"string",
|
|
13280
|
+
"null"
|
|
13281
|
+
]
|
|
13282
|
+
},
|
|
13283
|
+
"type": {
|
|
13284
|
+
"description": "the class_uri of the class that has been instantiated",
|
|
13285
|
+
"enum": [
|
|
13286
|
+
"nmdc:PropertyAssertion"
|
|
13287
|
+
],
|
|
13288
|
+
"type": "string"
|
|
13289
|
+
}
|
|
13290
|
+
},
|
|
13291
|
+
"required": [
|
|
13292
|
+
"has_raw_value",
|
|
13293
|
+
"type"
|
|
13294
|
+
],
|
|
13295
|
+
"title": "PropertyAssertion",
|
|
13296
|
+
"type": "object"
|
|
13297
|
+
},
|
|
13205
13298
|
"Protocol": {
|
|
13206
13299
|
"additionalProperties": false,
|
|
13207
13300
|
"description": "",
|
|
13208
13301
|
"properties": {
|
|
13302
|
+
"analysis_type": {
|
|
13303
|
+
"description": "Select all the data types associated or available for this biosample",
|
|
13304
|
+
"items": {
|
|
13305
|
+
"$ref": "#/$defs/AnalysisTypeEnum"
|
|
13306
|
+
},
|
|
13307
|
+
"type": [
|
|
13308
|
+
"array",
|
|
13309
|
+
"null"
|
|
13310
|
+
]
|
|
13311
|
+
},
|
|
13209
13312
|
"description": {
|
|
13210
13313
|
"description": "a human-readable description of a thing",
|
|
13211
13314
|
"type": [
|
|
@@ -13220,6 +13323,10 @@
|
|
|
13220
13323
|
"null"
|
|
13221
13324
|
]
|
|
13222
13325
|
},
|
|
13326
|
+
"protocol_for": {
|
|
13327
|
+
"$ref": "#/$defs/ProtocolForEnum",
|
|
13328
|
+
"description": "The type of planned process that the protocol describes."
|
|
13329
|
+
},
|
|
13223
13330
|
"type": {
|
|
13224
13331
|
"description": "the class_uri of the class that has been instantiated",
|
|
13225
13332
|
"enum": [
|
|
@@ -13258,6 +13365,43 @@
|
|
|
13258
13365
|
"title": "ProtocolCategoryEnum",
|
|
13259
13366
|
"type": "string"
|
|
13260
13367
|
},
|
|
13368
|
+
"ProtocolForEnum": {
|
|
13369
|
+
"description": "The permitted values for describing the type of planned process that a protocol describes.",
|
|
13370
|
+
"enum": [
|
|
13371
|
+
"AnnotatingWorkflow",
|
|
13372
|
+
"ChemicalConversionProcess",
|
|
13373
|
+
"ChromatographicSeparationProcess",
|
|
13374
|
+
"CollectingBiosamplesFromSite",
|
|
13375
|
+
"DataEmitterProcess",
|
|
13376
|
+
"DataGeneration",
|
|
13377
|
+
"DissolvingProcess",
|
|
13378
|
+
"Extraction",
|
|
13379
|
+
"FiltrationProcess",
|
|
13380
|
+
"LibraryPreparation",
|
|
13381
|
+
"MagsAnalysis",
|
|
13382
|
+
"MassSpectrometry",
|
|
13383
|
+
"MaterialProcessing",
|
|
13384
|
+
"MetabolomicsAnalysis",
|
|
13385
|
+
"MetagenomeAnnotation",
|
|
13386
|
+
"MetagenomeAssembly",
|
|
13387
|
+
"MetaproteomicsAnalysis",
|
|
13388
|
+
"MetatranscriptomeAnnotation",
|
|
13389
|
+
"MetatranscriptomeAssembly",
|
|
13390
|
+
"MetatranscriptomeExpressionAnalysis",
|
|
13391
|
+
"MixingProcess",
|
|
13392
|
+
"NomAnalysis",
|
|
13393
|
+
"NucleotideSequencing",
|
|
13394
|
+
"PlannedProcess",
|
|
13395
|
+
"Pooling",
|
|
13396
|
+
"ReadBasedTaxonomyAnalysis",
|
|
13397
|
+
"ReadQcAnalysis",
|
|
13398
|
+
"StorageProcess",
|
|
13399
|
+
"SubSamplingProcess",
|
|
13400
|
+
"WorkflowExecution"
|
|
13401
|
+
],
|
|
13402
|
+
"title": "ProtocolForEnum",
|
|
13403
|
+
"type": "string"
|
|
13404
|
+
},
|
|
13261
13405
|
"QuadPosEnum": {
|
|
13262
13406
|
"description": "",
|
|
13263
13407
|
"enum": [
|
|
@@ -15665,5 +15809,5 @@
|
|
|
15665
15809
|
},
|
|
15666
15810
|
"title": "NMDC",
|
|
15667
15811
|
"type": "object",
|
|
15668
|
-
"version": "11.12.
|
|
15812
|
+
"version": "11.12.1"
|
|
15669
15813
|
}
|
|
@@ -13,7 +13,7 @@ notes:
|
|
|
13
13
|
- not importing any MIxS terms where the relationship between the name (SCN) and the
|
|
14
14
|
id isn't 1:1
|
|
15
15
|
id: https://w3id.org/nmdc/nmdc
|
|
16
|
-
version: 11.12.
|
|
16
|
+
version: 11.12.1
|
|
17
17
|
license: https://creativecommons.org/publicdomain/zero/1.0/
|
|
18
18
|
prefixes:
|
|
19
19
|
BFO:
|
|
@@ -325,39 +325,6 @@ emit_prefixes:
|
|
|
325
325
|
default_prefix: nmdc
|
|
326
326
|
default_range: string
|
|
327
327
|
types:
|
|
328
|
-
external_identifier:
|
|
329
|
-
name: external_identifier
|
|
330
|
-
description: A CURIE representing an external identifier
|
|
331
|
-
from_schema: https://w3id.org/nmdc/nmdc
|
|
332
|
-
see_also:
|
|
333
|
-
- https://microbiomedata.github.io/nmdc-schema/identifiers/
|
|
334
|
-
typeof: uriorcurie
|
|
335
|
-
uri: xsd:anyURI
|
|
336
|
-
pattern: ^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$
|
|
337
|
-
bytes:
|
|
338
|
-
name: bytes
|
|
339
|
-
description: An integer value that corresponds to a size in bytes
|
|
340
|
-
from_schema: https://w3id.org/nmdc/nmdc
|
|
341
|
-
see_also:
|
|
342
|
-
- UO:0000233
|
|
343
|
-
base: int
|
|
344
|
-
uri: xsd:long
|
|
345
|
-
decimal_degree:
|
|
346
|
-
name: decimal_degree
|
|
347
|
-
description: A decimal degree expresses latitude or longitude as decimal fractions.
|
|
348
|
-
from_schema: https://w3id.org/nmdc/nmdc
|
|
349
|
-
see_also:
|
|
350
|
-
- https://en.wikipedia.org/wiki/Decimal_degrees
|
|
351
|
-
base: float
|
|
352
|
-
uri: xsd:decimal
|
|
353
|
-
language_code:
|
|
354
|
-
name: language_code
|
|
355
|
-
description: A language code conforming to ISO_639-1
|
|
356
|
-
from_schema: https://w3id.org/nmdc/nmdc
|
|
357
|
-
see_also:
|
|
358
|
-
- https://en.wikipedia.org/wiki/ISO_639-1
|
|
359
|
-
base: str
|
|
360
|
-
uri: xsd:language
|
|
361
328
|
string:
|
|
362
329
|
name: string
|
|
363
330
|
description: A character string
|
|
@@ -588,6 +555,39 @@ types:
|
|
|
588
555
|
base: str
|
|
589
556
|
uri: xsd:string
|
|
590
557
|
repr: str
|
|
558
|
+
external_identifier:
|
|
559
|
+
name: external_identifier
|
|
560
|
+
description: A CURIE representing an external identifier
|
|
561
|
+
from_schema: https://w3id.org/nmdc/nmdc
|
|
562
|
+
see_also:
|
|
563
|
+
- https://microbiomedata.github.io/nmdc-schema/identifiers/
|
|
564
|
+
typeof: uriorcurie
|
|
565
|
+
uri: xsd:anyURI
|
|
566
|
+
pattern: ^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$
|
|
567
|
+
bytes:
|
|
568
|
+
name: bytes
|
|
569
|
+
description: An integer value that corresponds to a size in bytes
|
|
570
|
+
from_schema: https://w3id.org/nmdc/nmdc
|
|
571
|
+
see_also:
|
|
572
|
+
- UO:0000233
|
|
573
|
+
base: int
|
|
574
|
+
uri: xsd:long
|
|
575
|
+
decimal_degree:
|
|
576
|
+
name: decimal_degree
|
|
577
|
+
description: A decimal degree expresses latitude or longitude as decimal fractions.
|
|
578
|
+
from_schema: https://w3id.org/nmdc/nmdc
|
|
579
|
+
see_also:
|
|
580
|
+
- https://en.wikipedia.org/wiki/Decimal_degrees
|
|
581
|
+
base: float
|
|
582
|
+
uri: xsd:decimal
|
|
583
|
+
language_code:
|
|
584
|
+
name: language_code
|
|
585
|
+
description: A language code conforming to ISO_639-1
|
|
586
|
+
from_schema: https://w3id.org/nmdc/nmdc
|
|
587
|
+
see_also:
|
|
588
|
+
- https://en.wikipedia.org/wiki/ISO_639-1
|
|
589
|
+
base: str
|
|
590
|
+
uri: xsd:language
|
|
591
591
|
enums:
|
|
592
592
|
CalibrationTargetEnum:
|
|
593
593
|
name: CalibrationTargetEnum
|
|
@@ -5548,6 +5548,74 @@ enums:
|
|
|
5548
5548
|
NomAnalysis:
|
|
5549
5549
|
text: NomAnalysis
|
|
5550
5550
|
description: A failure has occurred in analyzing NOM data.
|
|
5551
|
+
ProtocolForEnum:
|
|
5552
|
+
name: ProtocolForEnum
|
|
5553
|
+
description: The permitted values for describing the type of planned process that
|
|
5554
|
+
a protocol describes.
|
|
5555
|
+
comments:
|
|
5556
|
+
- These are the non-abstract class descendants of PlannedProcess.
|
|
5557
|
+
from_schema: https://w3id.org/nmdc/nmdc
|
|
5558
|
+
permissible_values:
|
|
5559
|
+
AnnotatingWorkflow:
|
|
5560
|
+
text: AnnotatingWorkflow
|
|
5561
|
+
ChemicalConversionProcess:
|
|
5562
|
+
text: ChemicalConversionProcess
|
|
5563
|
+
ChromatographicSeparationProcess:
|
|
5564
|
+
text: ChromatographicSeparationProcess
|
|
5565
|
+
CollectingBiosamplesFromSite:
|
|
5566
|
+
text: CollectingBiosamplesFromSite
|
|
5567
|
+
DataEmitterProcess:
|
|
5568
|
+
text: DataEmitterProcess
|
|
5569
|
+
DataGeneration:
|
|
5570
|
+
text: DataGeneration
|
|
5571
|
+
DissolvingProcess:
|
|
5572
|
+
text: DissolvingProcess
|
|
5573
|
+
Extraction:
|
|
5574
|
+
text: Extraction
|
|
5575
|
+
FiltrationProcess:
|
|
5576
|
+
text: FiltrationProcess
|
|
5577
|
+
LibraryPreparation:
|
|
5578
|
+
text: LibraryPreparation
|
|
5579
|
+
MagsAnalysis:
|
|
5580
|
+
text: MagsAnalysis
|
|
5581
|
+
MassSpectrometry:
|
|
5582
|
+
text: MassSpectrometry
|
|
5583
|
+
MaterialProcessing:
|
|
5584
|
+
text: MaterialProcessing
|
|
5585
|
+
MetabolomicsAnalysis:
|
|
5586
|
+
text: MetabolomicsAnalysis
|
|
5587
|
+
MetagenomeAnnotation:
|
|
5588
|
+
text: MetagenomeAnnotation
|
|
5589
|
+
MetagenomeAssembly:
|
|
5590
|
+
text: MetagenomeAssembly
|
|
5591
|
+
MetaproteomicsAnalysis:
|
|
5592
|
+
text: MetaproteomicsAnalysis
|
|
5593
|
+
MetatranscriptomeAnnotation:
|
|
5594
|
+
text: MetatranscriptomeAnnotation
|
|
5595
|
+
MetatranscriptomeAssembly:
|
|
5596
|
+
text: MetatranscriptomeAssembly
|
|
5597
|
+
MetatranscriptomeExpressionAnalysis:
|
|
5598
|
+
text: MetatranscriptomeExpressionAnalysis
|
|
5599
|
+
MixingProcess:
|
|
5600
|
+
text: MixingProcess
|
|
5601
|
+
NomAnalysis:
|
|
5602
|
+
text: NomAnalysis
|
|
5603
|
+
NucleotideSequencing:
|
|
5604
|
+
text: NucleotideSequencing
|
|
5605
|
+
PlannedProcess:
|
|
5606
|
+
text: PlannedProcess
|
|
5607
|
+
Pooling:
|
|
5608
|
+
text: Pooling
|
|
5609
|
+
ReadBasedTaxonomyAnalysis:
|
|
5610
|
+
text: ReadBasedTaxonomyAnalysis
|
|
5611
|
+
ReadQcAnalysis:
|
|
5612
|
+
text: ReadQcAnalysis
|
|
5613
|
+
StorageProcess:
|
|
5614
|
+
text: StorageProcess
|
|
5615
|
+
SubSamplingProcess:
|
|
5616
|
+
text: SubSamplingProcess
|
|
5617
|
+
WorkflowExecution:
|
|
5618
|
+
text: WorkflowExecution
|
|
5551
5619
|
SampleTypeEnum:
|
|
5552
5620
|
name: SampleTypeEnum
|
|
5553
5621
|
from_schema: https://w3id.org/nmdc/nmdc
|
|
@@ -6541,7 +6609,7 @@ slots:
|
|
|
6541
6609
|
annotations:
|
|
6542
6610
|
storage_units:
|
|
6543
6611
|
tag: storage_units
|
|
6544
|
-
value: '%|mmol/L|umol/L'
|
|
6612
|
+
value: '%|mmol/L|umol/L|mg/L|g/L'
|
|
6545
6613
|
description: When solutions A (containing substance X) and B are combined together,
|
|
6546
6614
|
this slot captures the concentration of X in the combination
|
|
6547
6615
|
from_schema: https://w3id.org/nmdc/nmdc
|
|
@@ -6926,8 +6994,7 @@ slots:
|
|
|
6926
6994
|
mappings:
|
|
6927
6995
|
- qud:unit
|
|
6928
6996
|
- schema:unitCode
|
|
6929
|
-
range:
|
|
6930
|
-
required: true
|
|
6997
|
+
range: string
|
|
6931
6998
|
type:
|
|
6932
6999
|
name: type
|
|
6933
7000
|
description: the class_uri of the class that has been instantiated
|
|
@@ -7024,6 +7091,56 @@ slots:
|
|
|
7024
7091
|
range: string
|
|
7025
7092
|
multivalued: true
|
|
7026
7093
|
pattern: ^[Hh][Tt][Tt][Pp][Ss]?:\/\/(?!.*[Dd][Oo][Ii]\.[Oo][Rr][Gg]).*$
|
|
7094
|
+
has_attribute_label:
|
|
7095
|
+
name: has_attribute_label
|
|
7096
|
+
description: Human-readable label for the property (e.g., MIxS label, ENVO term
|
|
7097
|
+
label).
|
|
7098
|
+
comments:
|
|
7099
|
+
- This provides a human-friendly name for the asserted property. For example,
|
|
7100
|
+
"bicarbonate ion concentration" or "total phosphorus". The label helps with
|
|
7101
|
+
readability and data discovery.
|
|
7102
|
+
from_schema: https://w3id.org/nmdc/nmdc
|
|
7103
|
+
range: string
|
|
7104
|
+
has_attribute_id:
|
|
7105
|
+
name: has_attribute_id
|
|
7106
|
+
description: CURIE or IRI for the property (MIxS slot, NMDC slot, ENVO/OBI term,
|
|
7107
|
+
etc.).
|
|
7108
|
+
comments:
|
|
7109
|
+
- This provides a resolvable identifier for the property being asserted. Examples
|
|
7110
|
+
include MIXS:0000117 for total phosphorus, or ENVO:01001357 for bicarbonate
|
|
7111
|
+
ion concentration.
|
|
7112
|
+
- Prefer using standard ontology terms (ENVO, PATO, OBI, etc.) or MIxS identifiers
|
|
7113
|
+
when available to enhance interoperability.
|
|
7114
|
+
from_schema: https://w3id.org/nmdc/nmdc
|
|
7115
|
+
range: uriorcurie
|
|
7116
|
+
has_quantity_kind_id:
|
|
7117
|
+
name: has_quantity_kind_id
|
|
7118
|
+
description: Optional CURIE or IRI for the physical quantity kind (e.g., qudt:QuantityKind).
|
|
7119
|
+
comments:
|
|
7120
|
+
- This slot enables precise semantic description of what physical quantity is
|
|
7121
|
+
being measured, independent of the specific units used. For example, qudt:MassConcentration
|
|
7122
|
+
or qudt:Temperature.
|
|
7123
|
+
- Using quantity kind identifiers from QUDT or similar vocabularies improves data
|
|
7124
|
+
integration and enables automated unit conversion.
|
|
7125
|
+
from_schema: https://w3id.org/nmdc/nmdc
|
|
7126
|
+
range: uriorcurie
|
|
7127
|
+
has_value_term_id:
|
|
7128
|
+
name: has_value_term_id
|
|
7129
|
+
description: CURIE or IRI for categorical values (ENVO, PATO, METPO, etc.).
|
|
7130
|
+
comments:
|
|
7131
|
+
- Use this slot when the value of the property is a controlled vocabulary term
|
|
7132
|
+
rather than a numeric or free-text value. For example, ENVO:00002297 for "desert
|
|
7133
|
+
ecosystem" or PATO:0001199 for "dry".
|
|
7134
|
+
from_schema: https://w3id.org/nmdc/nmdc
|
|
7135
|
+
range: uriorcurie
|
|
7136
|
+
has_datetime_value:
|
|
7137
|
+
name: has_datetime_value
|
|
7138
|
+
description: Date-time value for the property in ISO-8601 format.
|
|
7139
|
+
comments:
|
|
7140
|
+
- Use this slot for temporal properties. The value should follow ISO-8601 format
|
|
7141
|
+
(e.g., "2025-06-12T14:30:00Z").
|
|
7142
|
+
from_schema: https://w3id.org/nmdc/nmdc
|
|
7143
|
+
range: string
|
|
7027
7144
|
processing_institution_workflow_metadata:
|
|
7028
7145
|
name: processing_institution_workflow_metadata
|
|
7029
7146
|
description: Information about how workflow results were generated when the processing
|
|
@@ -13524,9 +13641,11 @@ slots:
|
|
|
13524
13641
|
occurrence:
|
|
13525
13642
|
tag: occurrence
|
|
13526
13643
|
value: m
|
|
13527
|
-
description:
|
|
13528
|
-
|
|
13644
|
+
description: Structured miscellaneous property assertions for this Biosample.
|
|
13645
|
+
Use when a value cannot cleanly fit an existing, policy-governed slot.
|
|
13529
13646
|
title: miscellaneous parameter
|
|
13647
|
+
todos:
|
|
13648
|
+
- This slot should not be available in the submission portal.
|
|
13530
13649
|
examples:
|
|
13531
13650
|
- value: Bicarbonate ion concentration;2075 micromole per kilogram
|
|
13532
13651
|
from_schema: https://w3id.org/nmdc/nmdc
|
|
@@ -13535,7 +13654,7 @@ slots:
|
|
|
13535
13654
|
is_a: core field
|
|
13536
13655
|
string_serialization: '{text};{float} {unit}'
|
|
13537
13656
|
slot_uri: MIXS:0000752
|
|
13538
|
-
range:
|
|
13657
|
+
range: PropertyAssertion
|
|
13539
13658
|
multivalued: true
|
|
13540
13659
|
inlined_as_list: true
|
|
13541
13660
|
n_alkanes:
|
|
@@ -19756,6 +19875,11 @@ slots:
|
|
|
19756
19875
|
- For example, low read count from a sequencer, malformed fastq files, etc.
|
|
19757
19876
|
from_schema: https://w3id.org/nmdc/nmdc
|
|
19758
19877
|
range: FailureWhatEnum
|
|
19878
|
+
protocol_for:
|
|
19879
|
+
name: protocol_for
|
|
19880
|
+
description: The type of planned process that the protocol describes.
|
|
19881
|
+
from_schema: https://w3id.org/nmdc/nmdc
|
|
19882
|
+
range: ProtocolForEnum
|
|
19759
19883
|
emsl_store_temp:
|
|
19760
19884
|
name: emsl_store_temp
|
|
19761
19885
|
description: The temperature at which the sample should be stored upon delivery
|
|
@@ -22608,6 +22732,8 @@ classes:
|
|
|
22608
22732
|
has_unit:
|
|
22609
22733
|
name: has_unit
|
|
22610
22734
|
description: The unit of the quantity
|
|
22735
|
+
range: UnitEnum
|
|
22736
|
+
required: true
|
|
22611
22737
|
has_numeric_value:
|
|
22612
22738
|
name: has_numeric_value
|
|
22613
22739
|
description: The number part of the quantity
|
|
@@ -22720,6 +22846,72 @@ classes:
|
|
|
22720
22846
|
name: longitude
|
|
22721
22847
|
required: true
|
|
22722
22848
|
class_uri: nmdc:GeolocationValue
|
|
22849
|
+
PropertyAssertion:
|
|
22850
|
+
name: PropertyAssertion
|
|
22851
|
+
description: A structured record of data that doesn't fit nicely within the constraints
|
|
22852
|
+
of other NMDC AttributeValues. Uses primitive ranges only.
|
|
22853
|
+
comments:
|
|
22854
|
+
- This class enables flexible metadata capture for properties that don't align
|
|
22855
|
+
with existing, policy-governed slots in NMDC schema.
|
|
22856
|
+
- 'Interoperability note: This approach is aligned with the BERtron schema''s
|
|
22857
|
+
''properties'' slot pattern (see https://github.com/ber-data/bertron-schema),
|
|
22858
|
+
which uses any_of to support both TextValue and QuantityValue ranges. NMDC''s
|
|
22859
|
+
PropertyAssertion provides a more granular approach by using primitive types
|
|
22860
|
+
with optional semantic annotations, enabling better validation while maintaining
|
|
22861
|
+
flexibility for diverse metadata types.'
|
|
22862
|
+
- PropertyAssertion supports both categorical values (via has_value_term_id) and
|
|
22863
|
+
numeric values (via has_numeric_value with optional min/max ranges), along with
|
|
22864
|
+
temporal (has_datetime_value) and boolean (has_boolean_value) data types.
|
|
22865
|
+
examples:
|
|
22866
|
+
- description: Data provided from submission that doesn't conform to required
|
|
22867
|
+
abs_air_humidity units
|
|
22868
|
+
object:
|
|
22869
|
+
type: nmdc:PropertyAssertion
|
|
22870
|
+
has_raw_value: 50 kPa
|
|
22871
|
+
has_attribute_id: MIXS:0000122
|
|
22872
|
+
has_attribute_label: absolute air humidity
|
|
22873
|
+
has_numeric_value: 50
|
|
22874
|
+
has_unit: kPa
|
|
22875
|
+
- description: Data provided from submission that doesn't conform to required
|
|
22876
|
+
UCUM unit
|
|
22877
|
+
object:
|
|
22878
|
+
type: nmdc:PropertyAssertion
|
|
22879
|
+
has_raw_value: 5.5 mL/L
|
|
22880
|
+
has_attribute_id: MIXS:0000119
|
|
22881
|
+
has_attribute_label: dissolved oxygen
|
|
22882
|
+
has_numeric_value: 5.5
|
|
22883
|
+
has_unit: mL/L
|
|
22884
|
+
- description: Unit provided is invalid for UCUM and standard
|
|
22885
|
+
object:
|
|
22886
|
+
type: nmdc:PropertyAssertion
|
|
22887
|
+
has_raw_value: 250 W/m2
|
|
22888
|
+
has_attribute_id: MIXS:0000112
|
|
22889
|
+
has_attribute_label: solar irradiance
|
|
22890
|
+
has_numeric_value: 250
|
|
22891
|
+
has_unit: W/m2
|
|
22892
|
+
from_schema: https://w3id.org/nmdc/nmdc
|
|
22893
|
+
is_a: AttributeValue
|
|
22894
|
+
slots:
|
|
22895
|
+
- has_attribute_label
|
|
22896
|
+
- has_attribute_id
|
|
22897
|
+
- has_quantity_kind_id
|
|
22898
|
+
- has_value_term_id
|
|
22899
|
+
- has_boolean_value
|
|
22900
|
+
- has_datetime_value
|
|
22901
|
+
- has_numeric_value
|
|
22902
|
+
- has_minimum_numeric_value
|
|
22903
|
+
- has_maximum_numeric_value
|
|
22904
|
+
- has_unit
|
|
22905
|
+
slot_usage:
|
|
22906
|
+
has_raw_value:
|
|
22907
|
+
name: has_raw_value
|
|
22908
|
+
description: Original contributor string representation (unparsed)
|
|
22909
|
+
required: true
|
|
22910
|
+
has_unit:
|
|
22911
|
+
name: has_unit
|
|
22912
|
+
description: UCUM unit code (required only when numeric value is present)
|
|
22913
|
+
required: false
|
|
22914
|
+
class_uri: nmdc:PropertyAssertion
|
|
22723
22915
|
NamedThing:
|
|
22724
22916
|
name: NamedThing
|
|
22725
22917
|
description: a databased entity or concept/class
|
|
@@ -22824,7 +23016,8 @@ classes:
|
|
|
22824
23016
|
Instrument:
|
|
22825
23017
|
name: Instrument
|
|
22826
23018
|
description: A material entity that is designed to perform a function in a scientific
|
|
22827
|
-
investigation, but is not a reagent.
|
|
23019
|
+
investigation, but is not a reagent. This class models the make and model of
|
|
23020
|
+
the instrument, not the specific instance of the instrument.
|
|
22828
23021
|
from_schema: https://w3id.org/nmdc/nmdc
|
|
22829
23022
|
aliases:
|
|
22830
23023
|
- device
|
|
@@ -22842,6 +23035,13 @@ classes:
|
|
|
22842
23035
|
syntax: '{id_nmdc_prefix}:inst-{id_shoulder}-{id_blade}$'
|
|
22843
23036
|
interpolated: true
|
|
22844
23037
|
class_uri: nmdc:Instrument
|
|
23038
|
+
unique_keys:
|
|
23039
|
+
main:
|
|
23040
|
+
unique_key_name: main
|
|
23041
|
+
unique_key_slots:
|
|
23042
|
+
- vendor
|
|
23043
|
+
- model
|
|
23044
|
+
description: A unique instrument is defined by its vendor and model.
|
|
22845
23045
|
PlannedProcess:
|
|
22846
23046
|
name: PlannedProcess
|
|
22847
23047
|
title: Planned Process
|
|
@@ -22867,6 +23067,8 @@ classes:
|
|
|
22867
23067
|
- name
|
|
22868
23068
|
- type
|
|
22869
23069
|
- description
|
|
23070
|
+
- protocol_for
|
|
23071
|
+
- analysis_type
|
|
22870
23072
|
class_uri: nmdc:Protocol
|
|
22871
23073
|
CreditAssociation:
|
|
22872
23074
|
name: CreditAssociation
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
from typing import Any, Optional, Iterator, Union
|
|
2
|
+
|
|
3
|
+
from linkml.validator.plugins import ValidationPlugin
|
|
4
|
+
from linkml.validator.report import ValidationResult, Severity
|
|
5
|
+
from linkml.validator.validation_context import ValidationContext
|
|
6
|
+
from linkml_runtime import SchemaView
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _yield_quantity_value_objects(data: Any, path: Optional[list[Union[str, int]]] = None):
|
|
10
|
+
"""Recursively yield QuantityValue objects from data."""
|
|
11
|
+
if path is None:
|
|
12
|
+
path = []
|
|
13
|
+
if isinstance(data, dict):
|
|
14
|
+
if data.get("type") == "nmdc:QuantityValue":
|
|
15
|
+
yield path, data
|
|
16
|
+
else:
|
|
17
|
+
# Recursively search nested dictionaries
|
|
18
|
+
for key, value in data.items():
|
|
19
|
+
yield from _yield_quantity_value_objects(value, path + [key])
|
|
20
|
+
elif isinstance(data, list):
|
|
21
|
+
# Handle lists of objects
|
|
22
|
+
for i, item in enumerate(data):
|
|
23
|
+
yield from _yield_quantity_value_objects(item, path + [i])
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class NmdcSchemaValidationPlugin(ValidationPlugin):
|
|
27
|
+
"""A validation plugin which validates instances using NMDC-specific validation logic.
|
|
28
|
+
|
|
29
|
+
This plugin is designed to be used as a part of LinkML's validation framework and in conjunction
|
|
30
|
+
with the `JsonSchemaValidationPlugin` provided by LinkML. This plugin performs the following
|
|
31
|
+
additional checks:
|
|
32
|
+
|
|
33
|
+
1. Ensure that values for slots with range `QuantityValue` have a `unit` property that is in
|
|
34
|
+
agreement with the `storage_unit` annotation on the slot
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
38
|
+
super().__init__(*args, **kwargs)
|
|
39
|
+
self._slot_storage_units_cache = {}
|
|
40
|
+
|
|
41
|
+
def _slot_storage_units(self, schema_view: SchemaView, slot_name: str) -> Optional[list[str]]:
|
|
42
|
+
"""Get allowed storage_units for a slot."""
|
|
43
|
+
if slot_name in self._slot_storage_units_cache:
|
|
44
|
+
return self._slot_storage_units_cache[slot_name]
|
|
45
|
+
|
|
46
|
+
slot = schema_view.get_slot(slot_name)
|
|
47
|
+
if not slot or not slot.annotations:
|
|
48
|
+
self._slot_storage_units_cache[slot_name] = None
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
storage_units = None
|
|
52
|
+
if "storage_units" in slot.annotations:
|
|
53
|
+
annotation_obj = slot.annotations["storage_units"]
|
|
54
|
+
if annotation_obj and hasattr(annotation_obj, "value"):
|
|
55
|
+
# Split on pipes for multiple units
|
|
56
|
+
storage_units = str(annotation_obj.value).split("|")
|
|
57
|
+
|
|
58
|
+
self._slot_storage_units_cache[slot_name] = storage_units
|
|
59
|
+
return storage_units
|
|
60
|
+
|
|
61
|
+
def process(self, instance: Any, context: ValidationContext) -> Iterator[ValidationResult]:
|
|
62
|
+
"""Perform NMDC-specific schema validation on the provided instance
|
|
63
|
+
|
|
64
|
+
:param instance: The instance to validate
|
|
65
|
+
:param context: The validation context which provides a SchemaView artifact
|
|
66
|
+
:return: Iterator over validation results
|
|
67
|
+
:rtype: Iterator[ValidationResult]
|
|
68
|
+
"""
|
|
69
|
+
for data_path, quantity_value in _yield_quantity_value_objects(instance):
|
|
70
|
+
# Get the `has_unit` property from the QuantityValue instance. If it is missing, yield a
|
|
71
|
+
# validation error and continue. This is slightly redundant with JSON Schema validation,
|
|
72
|
+
# but it ensures that we do not attempt further validation on the instance.
|
|
73
|
+
unit = quantity_value.get("has_unit")
|
|
74
|
+
str_data_path = '/'.join(str(p) for p in data_path)
|
|
75
|
+
if unit is None:
|
|
76
|
+
yield ValidationResult(
|
|
77
|
+
type="nmdc-schema validation",
|
|
78
|
+
severity=Severity.ERROR,
|
|
79
|
+
instance=instance,
|
|
80
|
+
instantiates=context.target_class,
|
|
81
|
+
message=f"QuantityValue at /{str_data_path} is missing required 'has_unit' property",
|
|
82
|
+
)
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
# Find the slot name by looking for the last string in the data path. If one cannot be
|
|
86
|
+
# found, skip further validation because we cannot determine the relevant slot.
|
|
87
|
+
try:
|
|
88
|
+
slot_name = next(key for key in reversed(data_path) if isinstance(key, str))
|
|
89
|
+
except StopIteration:
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
storage_units = self._slot_storage_units(context.schema_view, slot_name)
|
|
93
|
+
if storage_units and unit not in storage_units:
|
|
94
|
+
yield ValidationResult(
|
|
95
|
+
type="nmdc-schema validation",
|
|
96
|
+
severity=Severity.ERROR,
|
|
97
|
+
instance=instance,
|
|
98
|
+
instantiates=context.target_class,
|
|
99
|
+
message=(
|
|
100
|
+
f"QuantityValue at /{str_data_path} has unit '{unit}' which is not allowed for "
|
|
101
|
+
f"slot '{slot_name}' (allowed: {', '.join(storage_units)})"
|
|
102
|
+
)
|
|
103
|
+
)
|