nmdc-schema 11.12.0rc2__py3-none-any.whl → 11.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2649,9 +2649,9 @@
2649
2649
  ]
2650
2650
  },
2651
2651
  "misc_param": {
2652
- "description": "Any other measurement performed or parameter collected, that is not listed here",
2652
+ "description": "Structured miscellaneous property assertions for this Biosample. Use when a value cannot cleanly fit an existing, policy-governed slot.",
2653
2653
  "items": {
2654
- "$ref": "#/$defs/TextValue"
2654
+ "$ref": "#/$defs/PropertyAssertion"
2655
2655
  },
2656
2656
  "type": [
2657
2657
  "array",
@@ -8540,7 +8540,7 @@
8540
8540
  },
8541
8541
  "Instrument": {
8542
8542
  "additionalProperties": false,
8543
- "description": "A material entity that is designed to perform a function in a scientific investigation, but is not a reagent.",
8543
+ "description": "A material entity that is designed to perform a function in a scientific investigation, but is not a reagent. This class models the make and model of the instrument, not the specific instance of the instrument.",
8544
8544
  "properties": {
8545
8545
  "alternative_identifiers": {
8546
8546
  "description": "A list of alternative identifiers for the entity.",
@@ -13202,10 +13202,113 @@
13202
13202
  "title": "ProfilePositionEnum",
13203
13203
  "type": "string"
13204
13204
  },
13205
+ "PropertyAssertion": {
13206
+ "additionalProperties": false,
13207
+ "description": "A structured record of data that doesn't fit nicely within the constraints of other NMDC AttributeValues. Uses primitive ranges only.",
13208
+ "properties": {
13209
+ "has_attribute_id": {
13210
+ "description": "CURIE or IRI for the property (MIxS slot, NMDC slot, ENVO/OBI term, etc.).",
13211
+ "type": [
13212
+ "string",
13213
+ "null"
13214
+ ]
13215
+ },
13216
+ "has_attribute_label": {
13217
+ "description": "Human-readable label for the property (e.g., MIxS label, ENVO term label).",
13218
+ "type": [
13219
+ "string",
13220
+ "null"
13221
+ ]
13222
+ },
13223
+ "has_boolean_value": {
13224
+ "description": "Links a quantity value to a boolean",
13225
+ "type": [
13226
+ "boolean",
13227
+ "null"
13228
+ ]
13229
+ },
13230
+ "has_datetime_value": {
13231
+ "description": "Date-time value for the property in ISO-8601 format.",
13232
+ "type": [
13233
+ "string",
13234
+ "null"
13235
+ ]
13236
+ },
13237
+ "has_maximum_numeric_value": {
13238
+ "description": "The maximum value part, expressed as number, of the quantity value when the value covers a range.",
13239
+ "type": [
13240
+ "number",
13241
+ "null"
13242
+ ]
13243
+ },
13244
+ "has_minimum_numeric_value": {
13245
+ "description": "The minimum value part, expressed as number, of the quantity value when the value covers a range.",
13246
+ "type": [
13247
+ "number",
13248
+ "null"
13249
+ ]
13250
+ },
13251
+ "has_numeric_value": {
13252
+ "description": "Links a quantity value to a number",
13253
+ "type": [
13254
+ "number",
13255
+ "null"
13256
+ ]
13257
+ },
13258
+ "has_quantity_kind_id": {
13259
+ "description": "Optional CURIE or IRI for the physical quantity kind (e.g., qudt:QuantityKind).",
13260
+ "type": [
13261
+ "string",
13262
+ "null"
13263
+ ]
13264
+ },
13265
+ "has_raw_value": {
13266
+ "description": "Original contributor string representation (unparsed)",
13267
+ "type": "string"
13268
+ },
13269
+ "has_unit": {
13270
+ "description": "UCUM unit code (required only when numeric value is present)",
13271
+ "type": [
13272
+ "string",
13273
+ "null"
13274
+ ]
13275
+ },
13276
+ "has_value_term_id": {
13277
+ "description": "CURIE or IRI for categorical values (ENVO, PATO, METPO, etc.).",
13278
+ "type": [
13279
+ "string",
13280
+ "null"
13281
+ ]
13282
+ },
13283
+ "type": {
13284
+ "description": "the class_uri of the class that has been instantiated",
13285
+ "enum": [
13286
+ "nmdc:PropertyAssertion"
13287
+ ],
13288
+ "type": "string"
13289
+ }
13290
+ },
13291
+ "required": [
13292
+ "has_raw_value",
13293
+ "type"
13294
+ ],
13295
+ "title": "PropertyAssertion",
13296
+ "type": "object"
13297
+ },
13205
13298
  "Protocol": {
13206
13299
  "additionalProperties": false,
13207
13300
  "description": "",
13208
13301
  "properties": {
13302
+ "analysis_type": {
13303
+ "description": "Select all the data types associated or available for this biosample",
13304
+ "items": {
13305
+ "$ref": "#/$defs/AnalysisTypeEnum"
13306
+ },
13307
+ "type": [
13308
+ "array",
13309
+ "null"
13310
+ ]
13311
+ },
13209
13312
  "description": {
13210
13313
  "description": "a human-readable description of a thing",
13211
13314
  "type": [
@@ -13220,6 +13323,10 @@
13220
13323
  "null"
13221
13324
  ]
13222
13325
  },
13326
+ "protocol_for": {
13327
+ "$ref": "#/$defs/ProtocolForEnum",
13328
+ "description": "The type of planned process that the protocol describes."
13329
+ },
13223
13330
  "type": {
13224
13331
  "description": "the class_uri of the class that has been instantiated",
13225
13332
  "enum": [
@@ -13258,6 +13365,43 @@
13258
13365
  "title": "ProtocolCategoryEnum",
13259
13366
  "type": "string"
13260
13367
  },
13368
+ "ProtocolForEnum": {
13369
+ "description": "The permitted values for describing the type of planned process that a protocol describes.",
13370
+ "enum": [
13371
+ "AnnotatingWorkflow",
13372
+ "ChemicalConversionProcess",
13373
+ "ChromatographicSeparationProcess",
13374
+ "CollectingBiosamplesFromSite",
13375
+ "DataEmitterProcess",
13376
+ "DataGeneration",
13377
+ "DissolvingProcess",
13378
+ "Extraction",
13379
+ "FiltrationProcess",
13380
+ "LibraryPreparation",
13381
+ "MagsAnalysis",
13382
+ "MassSpectrometry",
13383
+ "MaterialProcessing",
13384
+ "MetabolomicsAnalysis",
13385
+ "MetagenomeAnnotation",
13386
+ "MetagenomeAssembly",
13387
+ "MetaproteomicsAnalysis",
13388
+ "MetatranscriptomeAnnotation",
13389
+ "MetatranscriptomeAssembly",
13390
+ "MetatranscriptomeExpressionAnalysis",
13391
+ "MixingProcess",
13392
+ "NomAnalysis",
13393
+ "NucleotideSequencing",
13394
+ "PlannedProcess",
13395
+ "Pooling",
13396
+ "ReadBasedTaxonomyAnalysis",
13397
+ "ReadQcAnalysis",
13398
+ "StorageProcess",
13399
+ "SubSamplingProcess",
13400
+ "WorkflowExecution"
13401
+ ],
13402
+ "title": "ProtocolForEnum",
13403
+ "type": "string"
13404
+ },
13261
13405
  "QuadPosEnum": {
13262
13406
  "description": "",
13263
13407
  "enum": [
@@ -15665,5 +15809,5 @@
15665
15809
  },
15666
15810
  "title": "NMDC",
15667
15811
  "type": "object",
15668
- "version": "11.12.0rc2"
15812
+ "version": "11.12.1"
15669
15813
  }
@@ -13,7 +13,7 @@ notes:
13
13
  - not importing any MIxS terms where the relationship between the name (SCN) and the
14
14
  id isn't 1:1
15
15
  id: https://w3id.org/nmdc/nmdc
16
- version: 11.12.0rc2
16
+ version: 11.12.1
17
17
  license: https://creativecommons.org/publicdomain/zero/1.0/
18
18
  prefixes:
19
19
  BFO:
@@ -325,39 +325,6 @@ emit_prefixes:
325
325
  default_prefix: nmdc
326
326
  default_range: string
327
327
  types:
328
- external_identifier:
329
- name: external_identifier
330
- description: A CURIE representing an external identifier
331
- from_schema: https://w3id.org/nmdc/nmdc
332
- see_also:
333
- - https://microbiomedata.github.io/nmdc-schema/identifiers/
334
- typeof: uriorcurie
335
- uri: xsd:anyURI
336
- pattern: ^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$
337
- bytes:
338
- name: bytes
339
- description: An integer value that corresponds to a size in bytes
340
- from_schema: https://w3id.org/nmdc/nmdc
341
- see_also:
342
- - UO:0000233
343
- base: int
344
- uri: xsd:long
345
- decimal_degree:
346
- name: decimal_degree
347
- description: A decimal degree expresses latitude or longitude as decimal fractions.
348
- from_schema: https://w3id.org/nmdc/nmdc
349
- see_also:
350
- - https://en.wikipedia.org/wiki/Decimal_degrees
351
- base: float
352
- uri: xsd:decimal
353
- language_code:
354
- name: language_code
355
- description: A language code conforming to ISO_639-1
356
- from_schema: https://w3id.org/nmdc/nmdc
357
- see_also:
358
- - https://en.wikipedia.org/wiki/ISO_639-1
359
- base: str
360
- uri: xsd:language
361
328
  string:
362
329
  name: string
363
330
  description: A character string
@@ -588,6 +555,39 @@ types:
588
555
  base: str
589
556
  uri: xsd:string
590
557
  repr: str
558
+ external_identifier:
559
+ name: external_identifier
560
+ description: A CURIE representing an external identifier
561
+ from_schema: https://w3id.org/nmdc/nmdc
562
+ see_also:
563
+ - https://microbiomedata.github.io/nmdc-schema/identifiers/
564
+ typeof: uriorcurie
565
+ uri: xsd:anyURI
566
+ pattern: ^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$
567
+ bytes:
568
+ name: bytes
569
+ description: An integer value that corresponds to a size in bytes
570
+ from_schema: https://w3id.org/nmdc/nmdc
571
+ see_also:
572
+ - UO:0000233
573
+ base: int
574
+ uri: xsd:long
575
+ decimal_degree:
576
+ name: decimal_degree
577
+ description: A decimal degree expresses latitude or longitude as decimal fractions.
578
+ from_schema: https://w3id.org/nmdc/nmdc
579
+ see_also:
580
+ - https://en.wikipedia.org/wiki/Decimal_degrees
581
+ base: float
582
+ uri: xsd:decimal
583
+ language_code:
584
+ name: language_code
585
+ description: A language code conforming to ISO_639-1
586
+ from_schema: https://w3id.org/nmdc/nmdc
587
+ see_also:
588
+ - https://en.wikipedia.org/wiki/ISO_639-1
589
+ base: str
590
+ uri: xsd:language
591
591
  enums:
592
592
  CalibrationTargetEnum:
593
593
  name: CalibrationTargetEnum
@@ -5548,6 +5548,74 @@ enums:
5548
5548
  NomAnalysis:
5549
5549
  text: NomAnalysis
5550
5550
  description: A failure has occurred in analyzing NOM data.
5551
+ ProtocolForEnum:
5552
+ name: ProtocolForEnum
5553
+ description: The permitted values for describing the type of planned process that
5554
+ a protocol describes.
5555
+ comments:
5556
+ - These are the non-abstract class descendants of PlannedProcess.
5557
+ from_schema: https://w3id.org/nmdc/nmdc
5558
+ permissible_values:
5559
+ AnnotatingWorkflow:
5560
+ text: AnnotatingWorkflow
5561
+ ChemicalConversionProcess:
5562
+ text: ChemicalConversionProcess
5563
+ ChromatographicSeparationProcess:
5564
+ text: ChromatographicSeparationProcess
5565
+ CollectingBiosamplesFromSite:
5566
+ text: CollectingBiosamplesFromSite
5567
+ DataEmitterProcess:
5568
+ text: DataEmitterProcess
5569
+ DataGeneration:
5570
+ text: DataGeneration
5571
+ DissolvingProcess:
5572
+ text: DissolvingProcess
5573
+ Extraction:
5574
+ text: Extraction
5575
+ FiltrationProcess:
5576
+ text: FiltrationProcess
5577
+ LibraryPreparation:
5578
+ text: LibraryPreparation
5579
+ MagsAnalysis:
5580
+ text: MagsAnalysis
5581
+ MassSpectrometry:
5582
+ text: MassSpectrometry
5583
+ MaterialProcessing:
5584
+ text: MaterialProcessing
5585
+ MetabolomicsAnalysis:
5586
+ text: MetabolomicsAnalysis
5587
+ MetagenomeAnnotation:
5588
+ text: MetagenomeAnnotation
5589
+ MetagenomeAssembly:
5590
+ text: MetagenomeAssembly
5591
+ MetaproteomicsAnalysis:
5592
+ text: MetaproteomicsAnalysis
5593
+ MetatranscriptomeAnnotation:
5594
+ text: MetatranscriptomeAnnotation
5595
+ MetatranscriptomeAssembly:
5596
+ text: MetatranscriptomeAssembly
5597
+ MetatranscriptomeExpressionAnalysis:
5598
+ text: MetatranscriptomeExpressionAnalysis
5599
+ MixingProcess:
5600
+ text: MixingProcess
5601
+ NomAnalysis:
5602
+ text: NomAnalysis
5603
+ NucleotideSequencing:
5604
+ text: NucleotideSequencing
5605
+ PlannedProcess:
5606
+ text: PlannedProcess
5607
+ Pooling:
5608
+ text: Pooling
5609
+ ReadBasedTaxonomyAnalysis:
5610
+ text: ReadBasedTaxonomyAnalysis
5611
+ ReadQcAnalysis:
5612
+ text: ReadQcAnalysis
5613
+ StorageProcess:
5614
+ text: StorageProcess
5615
+ SubSamplingProcess:
5616
+ text: SubSamplingProcess
5617
+ WorkflowExecution:
5618
+ text: WorkflowExecution
5551
5619
  SampleTypeEnum:
5552
5620
  name: SampleTypeEnum
5553
5621
  from_schema: https://w3id.org/nmdc/nmdc
@@ -6541,7 +6609,7 @@ slots:
6541
6609
  annotations:
6542
6610
  storage_units:
6543
6611
  tag: storage_units
6544
- value: '%|mmol/L|umol/L'
6612
+ value: '%|mmol/L|umol/L|mg/L|g/L'
6545
6613
  description: When solutions A (containing substance X) and B are combined together,
6546
6614
  this slot captures the concentration of X in the combination
6547
6615
  from_schema: https://w3id.org/nmdc/nmdc
@@ -6926,8 +6994,7 @@ slots:
6926
6994
  mappings:
6927
6995
  - qud:unit
6928
6996
  - schema:unitCode
6929
- range: UnitEnum
6930
- required: true
6997
+ range: string
6931
6998
  type:
6932
6999
  name: type
6933
7000
  description: the class_uri of the class that has been instantiated
@@ -7024,6 +7091,56 @@ slots:
7024
7091
  range: string
7025
7092
  multivalued: true
7026
7093
  pattern: ^[Hh][Tt][Tt][Pp][Ss]?:\/\/(?!.*[Dd][Oo][Ii]\.[Oo][Rr][Gg]).*$
7094
+ has_attribute_label:
7095
+ name: has_attribute_label
7096
+ description: Human-readable label for the property (e.g., MIxS label, ENVO term
7097
+ label).
7098
+ comments:
7099
+ - This provides a human-friendly name for the asserted property. For example,
7100
+ "bicarbonate ion concentration" or "total phosphorus". The label helps with
7101
+ readability and data discovery.
7102
+ from_schema: https://w3id.org/nmdc/nmdc
7103
+ range: string
7104
+ has_attribute_id:
7105
+ name: has_attribute_id
7106
+ description: CURIE or IRI for the property (MIxS slot, NMDC slot, ENVO/OBI term,
7107
+ etc.).
7108
+ comments:
7109
+ - This provides a resolvable identifier for the property being asserted. Examples
7110
+ include MIXS:0000117 for total phosphorus, or ENVO:01001357 for bicarbonate
7111
+ ion concentration.
7112
+ - Prefer using standard ontology terms (ENVO, PATO, OBI, etc.) or MIxS identifiers
7113
+ when available to enhance interoperability.
7114
+ from_schema: https://w3id.org/nmdc/nmdc
7115
+ range: uriorcurie
7116
+ has_quantity_kind_id:
7117
+ name: has_quantity_kind_id
7118
+ description: Optional CURIE or IRI for the physical quantity kind (e.g., qudt:QuantityKind).
7119
+ comments:
7120
+ - This slot enables precise semantic description of what physical quantity is
7121
+ being measured, independent of the specific units used. For example, qudt:MassConcentration
7122
+ or qudt:Temperature.
7123
+ - Using quantity kind identifiers from QUDT or similar vocabularies improves data
7124
+ integration and enables automated unit conversion.
7125
+ from_schema: https://w3id.org/nmdc/nmdc
7126
+ range: uriorcurie
7127
+ has_value_term_id:
7128
+ name: has_value_term_id
7129
+ description: CURIE or IRI for categorical values (ENVO, PATO, METPO, etc.).
7130
+ comments:
7131
+ - Use this slot when the value of the property is a controlled vocabulary term
7132
+ rather than a numeric or free-text value. For example, ENVO:00002297 for "desert
7133
+ ecosystem" or PATO:0001199 for "dry".
7134
+ from_schema: https://w3id.org/nmdc/nmdc
7135
+ range: uriorcurie
7136
+ has_datetime_value:
7137
+ name: has_datetime_value
7138
+ description: Date-time value for the property in ISO-8601 format.
7139
+ comments:
7140
+ - Use this slot for temporal properties. The value should follow ISO-8601 format
7141
+ (e.g., "2025-06-12T14:30:00Z").
7142
+ from_schema: https://w3id.org/nmdc/nmdc
7143
+ range: string
7027
7144
  processing_institution_workflow_metadata:
7028
7145
  name: processing_institution_workflow_metadata
7029
7146
  description: Information about how workflow results were generated when the processing
@@ -13524,9 +13641,11 @@ slots:
13524
13641
  occurrence:
13525
13642
  tag: occurrence
13526
13643
  value: m
13527
- description: Any other measurement performed or parameter collected, that is not
13528
- listed here
13644
+ description: Structured miscellaneous property assertions for this Biosample.
13645
+ Use when a value cannot cleanly fit an existing, policy-governed slot.
13529
13646
  title: miscellaneous parameter
13647
+ todos:
13648
+ - This slot should not be available in the submission portal.
13530
13649
  examples:
13531
13650
  - value: Bicarbonate ion concentration;2075 micromole per kilogram
13532
13651
  from_schema: https://w3id.org/nmdc/nmdc
@@ -13535,7 +13654,7 @@ slots:
13535
13654
  is_a: core field
13536
13655
  string_serialization: '{text};{float} {unit}'
13537
13656
  slot_uri: MIXS:0000752
13538
- range: TextValue
13657
+ range: PropertyAssertion
13539
13658
  multivalued: true
13540
13659
  inlined_as_list: true
13541
13660
  n_alkanes:
@@ -19756,6 +19875,11 @@ slots:
19756
19875
  - For example, low read count from a sequencer, malformed fastq files, etc.
19757
19876
  from_schema: https://w3id.org/nmdc/nmdc
19758
19877
  range: FailureWhatEnum
19878
+ protocol_for:
19879
+ name: protocol_for
19880
+ description: The type of planned process that the protocol describes.
19881
+ from_schema: https://w3id.org/nmdc/nmdc
19882
+ range: ProtocolForEnum
19759
19883
  emsl_store_temp:
19760
19884
  name: emsl_store_temp
19761
19885
  description: The temperature at which the sample should be stored upon delivery
@@ -22608,6 +22732,8 @@ classes:
22608
22732
  has_unit:
22609
22733
  name: has_unit
22610
22734
  description: The unit of the quantity
22735
+ range: UnitEnum
22736
+ required: true
22611
22737
  has_numeric_value:
22612
22738
  name: has_numeric_value
22613
22739
  description: The number part of the quantity
@@ -22720,6 +22846,72 @@ classes:
22720
22846
  name: longitude
22721
22847
  required: true
22722
22848
  class_uri: nmdc:GeolocationValue
22849
+ PropertyAssertion:
22850
+ name: PropertyAssertion
22851
+ description: A structured record of data that doesn't fit nicely within the constraints
22852
+ of other NMDC AttributeValues. Uses primitive ranges only.
22853
+ comments:
22854
+ - This class enables flexible metadata capture for properties that don't align
22855
+ with existing, policy-governed slots in NMDC schema.
22856
+ - 'Interoperability note: This approach is aligned with the BERtron schema''s
22857
+ ''properties'' slot pattern (see https://github.com/ber-data/bertron-schema),
22858
+ which uses any_of to support both TextValue and QuantityValue ranges. NMDC''s
22859
+ PropertyAssertion provides a more granular approach by using primitive types
22860
+ with optional semantic annotations, enabling better validation while maintaining
22861
+ flexibility for diverse metadata types.'
22862
+ - PropertyAssertion supports both categorical values (via has_value_term_id) and
22863
+ numeric values (via has_numeric_value with optional min/max ranges), along with
22864
+ temporal (has_datetime_value) and boolean (has_boolean_value) data types.
22865
+ examples:
22866
+ - description: Data provided from submission that doesn't conform to required
22867
+ abs_air_humidity units
22868
+ object:
22869
+ type: nmdc:PropertyAssertion
22870
+ has_raw_value: 50 kPa
22871
+ has_attribute_id: MIXS:0000122
22872
+ has_attribute_label: absolute air humidity
22873
+ has_numeric_value: 50
22874
+ has_unit: kPa
22875
+ - description: Data provided from submission that doesn't conform to required
22876
+ UCUM unit
22877
+ object:
22878
+ type: nmdc:PropertyAssertion
22879
+ has_raw_value: 5.5 mL/L
22880
+ has_attribute_id: MIXS:0000119
22881
+ has_attribute_label: dissolved oxygen
22882
+ has_numeric_value: 5.5
22883
+ has_unit: mL/L
22884
+ - description: Unit provided is invalid for UCUM and standard
22885
+ object:
22886
+ type: nmdc:PropertyAssertion
22887
+ has_raw_value: 250 W/m2
22888
+ has_attribute_id: MIXS:0000112
22889
+ has_attribute_label: solar irradiance
22890
+ has_numeric_value: 250
22891
+ has_unit: W/m2
22892
+ from_schema: https://w3id.org/nmdc/nmdc
22893
+ is_a: AttributeValue
22894
+ slots:
22895
+ - has_attribute_label
22896
+ - has_attribute_id
22897
+ - has_quantity_kind_id
22898
+ - has_value_term_id
22899
+ - has_boolean_value
22900
+ - has_datetime_value
22901
+ - has_numeric_value
22902
+ - has_minimum_numeric_value
22903
+ - has_maximum_numeric_value
22904
+ - has_unit
22905
+ slot_usage:
22906
+ has_raw_value:
22907
+ name: has_raw_value
22908
+ description: Original contributor string representation (unparsed)
22909
+ required: true
22910
+ has_unit:
22911
+ name: has_unit
22912
+ description: UCUM unit code (required only when numeric value is present)
22913
+ required: false
22914
+ class_uri: nmdc:PropertyAssertion
22723
22915
  NamedThing:
22724
22916
  name: NamedThing
22725
22917
  description: a databased entity or concept/class
@@ -22824,7 +23016,8 @@ classes:
22824
23016
  Instrument:
22825
23017
  name: Instrument
22826
23018
  description: A material entity that is designed to perform a function in a scientific
22827
- investigation, but is not a reagent.
23019
+ investigation, but is not a reagent. This class models the make and model of
23020
+ the instrument, not the specific instance of the instrument.
22828
23021
  from_schema: https://w3id.org/nmdc/nmdc
22829
23022
  aliases:
22830
23023
  - device
@@ -22842,6 +23035,13 @@ classes:
22842
23035
  syntax: '{id_nmdc_prefix}:inst-{id_shoulder}-{id_blade}$'
22843
23036
  interpolated: true
22844
23037
  class_uri: nmdc:Instrument
23038
+ unique_keys:
23039
+ main:
23040
+ unique_key_name: main
23041
+ unique_key_slots:
23042
+ - vendor
23043
+ - model
23044
+ description: A unique instrument is defined by its vendor and model.
22845
23045
  PlannedProcess:
22846
23046
  name: PlannedProcess
22847
23047
  title: Planned Process
@@ -22867,6 +23067,8 @@ classes:
22867
23067
  - name
22868
23068
  - type
22869
23069
  - description
23070
+ - protocol_for
23071
+ - analysis_type
22870
23072
  class_uri: nmdc:Protocol
22871
23073
  CreditAssociation:
22872
23074
  name: CreditAssociation
@@ -0,0 +1,103 @@
1
+ from typing import Any, Optional, Iterator, Union
2
+
3
+ from linkml.validator.plugins import ValidationPlugin
4
+ from linkml.validator.report import ValidationResult, Severity
5
+ from linkml.validator.validation_context import ValidationContext
6
+ from linkml_runtime import SchemaView
7
+
8
+
9
+ def _yield_quantity_value_objects(data: Any, path: Optional[list[Union[str, int]]] = None):
10
+ """Recursively yield QuantityValue objects from data."""
11
+ if path is None:
12
+ path = []
13
+ if isinstance(data, dict):
14
+ if data.get("type") == "nmdc:QuantityValue":
15
+ yield path, data
16
+ else:
17
+ # Recursively search nested dictionaries
18
+ for key, value in data.items():
19
+ yield from _yield_quantity_value_objects(value, path + [key])
20
+ elif isinstance(data, list):
21
+ # Handle lists of objects
22
+ for i, item in enumerate(data):
23
+ yield from _yield_quantity_value_objects(item, path + [i])
24
+
25
+
26
+ class NmdcSchemaValidationPlugin(ValidationPlugin):
27
+ """A validation plugin which validates instances using NMDC-specific validation logic.
28
+
29
+ This plugin is designed to be used as a part of LinkML's validation framework and in conjunction
30
+ with the `JsonSchemaValidationPlugin` provided by LinkML. This plugin performs the following
31
+ additional checks:
32
+
33
+ 1. Ensure that values for slots with range `QuantityValue` have a `unit` property that is in
34
+ agreement with the `storage_unit` annotation on the slot
35
+ """
36
+
37
+ def __init__(self, *args, **kwargs) -> None:
38
+ super().__init__(*args, **kwargs)
39
+ self._slot_storage_units_cache = {}
40
+
41
+ def _slot_storage_units(self, schema_view: SchemaView, slot_name: str) -> Optional[list[str]]:
42
+ """Get allowed storage_units for a slot."""
43
+ if slot_name in self._slot_storage_units_cache:
44
+ return self._slot_storage_units_cache[slot_name]
45
+
46
+ slot = schema_view.get_slot(slot_name)
47
+ if not slot or not slot.annotations:
48
+ self._slot_storage_units_cache[slot_name] = None
49
+ return None
50
+
51
+ storage_units = None
52
+ if "storage_units" in slot.annotations:
53
+ annotation_obj = slot.annotations["storage_units"]
54
+ if annotation_obj and hasattr(annotation_obj, "value"):
55
+ # Split on pipes for multiple units
56
+ storage_units = str(annotation_obj.value).split("|")
57
+
58
+ self._slot_storage_units_cache[slot_name] = storage_units
59
+ return storage_units
60
+
61
+ def process(self, instance: Any, context: ValidationContext) -> Iterator[ValidationResult]:
62
+ """Perform NMDC-specific schema validation on the provided instance
63
+
64
+ :param instance: The instance to validate
65
+ :param context: The validation context which provides a SchemaView artifact
66
+ :return: Iterator over validation results
67
+ :rtype: Iterator[ValidationResult]
68
+ """
69
+ for data_path, quantity_value in _yield_quantity_value_objects(instance):
70
+ # Get the `has_unit` property from the QuantityValue instance. If it is missing, yield a
71
+ # validation error and continue. This is slightly redundant with JSON Schema validation,
72
+ # but it ensures that we do not attempt further validation on the instance.
73
+ unit = quantity_value.get("has_unit")
74
+ str_data_path = '/'.join(str(p) for p in data_path)
75
+ if unit is None:
76
+ yield ValidationResult(
77
+ type="nmdc-schema validation",
78
+ severity=Severity.ERROR,
79
+ instance=instance,
80
+ instantiates=context.target_class,
81
+ message=f"QuantityValue at /{str_data_path} is missing required 'has_unit' property",
82
+ )
83
+ continue
84
+
85
+ # Find the slot name by looking for the last string in the data path. If one cannot be
86
+ # found, skip further validation because we cannot determine the relevant slot.
87
+ try:
88
+ slot_name = next(key for key in reversed(data_path) if isinstance(key, str))
89
+ except StopIteration:
90
+ continue
91
+
92
+ storage_units = self._slot_storage_units(context.schema_view, slot_name)
93
+ if storage_units and unit not in storage_units:
94
+ yield ValidationResult(
95
+ type="nmdc-schema validation",
96
+ severity=Severity.ERROR,
97
+ instance=instance,
98
+ instantiates=context.target_class,
99
+ message=(
100
+ f"QuantityValue at /{str_data_path} has unit '{unit}' which is not allowed for "
101
+ f"slot '{slot_name}' (allowed: {', '.join(storage_units)})"
102
+ )
103
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nmdc_schema
3
- Version: 11.12.0rc2
3
+ Version: 11.12.1
4
4
  Summary: Schema resources for the National Microbiome Data Collaborative (NMDC)
5
5
  License: MIT
6
6
  License-File: LICENSE