nmdc-schema 11.14.0__py3-none-any.whl → 11.14.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nmdc_schema/migrators/migrator_from_11_13_0_to_11_14_0.py +104 -26
- nmdc_schema/nmdc-pydantic.py +8244 -7091
- nmdc_schema/nmdc.py +17 -177
- nmdc_schema/nmdc.schema.json +14 -82
- nmdc_schema/nmdc_materialized_patterns.json +13982 -14101
- nmdc_schema/nmdc_materialized_patterns.schema.json +13 -82
- nmdc_schema/nmdc_materialized_patterns.yaml +10496 -10584
- {nmdc_schema-11.14.0.dist-info → nmdc_schema-11.14.0rc1.dist-info}/METADATA +3 -4
- {nmdc_schema-11.14.0.dist-info → nmdc_schema-11.14.0rc1.dist-info}/RECORD +12 -15
- {nmdc_schema-11.14.0.dist-info → nmdc_schema-11.14.0rc1.dist-info}/entry_points.txt +8 -0
- nmdc_schema/migrators/partials/migrator_from_11_13_0_to_11_14_0/__init__.py +0 -28
- nmdc_schema/migrators/partials/migrator_from_11_13_0_to_11_14_0/migrator_from_11_13_0_to_11_14_0_part_1.py +0 -113
- nmdc_schema/migrators/partials/migrator_from_11_13_0_to_11_14_0/migrator_from_11_13_0_to_11_14_0_part_2.py +0 -40
- {nmdc_schema-11.14.0.dist-info → nmdc_schema-11.14.0rc1.dist-info}/WHEEL +0 -0
- {nmdc_schema-11.14.0.dist-info → nmdc_schema-11.14.0rc1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,35 +1,113 @@
|
|
|
1
|
+
from linkml.validator import Validator
|
|
2
|
+
|
|
1
3
|
from nmdc_schema.migrators.migrator_base import MigratorBase
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from nmdc_schema.migrators.helpers import create_schema_view, get_classes_with_slots_by_range, \
|
|
7
|
+
get_database_collections_for_class
|
|
8
|
+
from nmdc_schema import NmdcSchemaValidationPlugin
|
|
9
|
+
|
|
10
|
+
project_root = Path(__file__).parent.parent.parent
|
|
11
|
+
sys.path.insert(0, str(project_root))
|
|
5
12
|
|
|
6
13
|
|
|
7
14
|
class Migrator(MigratorBase):
|
|
8
|
-
r"""
|
|
9
|
-
|
|
10
|
-
|
|
15
|
+
r"""A check-only migrator that raises an exception if any QuantityValue's has_unit slot
|
|
16
|
+
is not valid against the slot's storage_unit or UnitEnum constraints."""
|
|
17
|
+
|
|
18
|
+
_from_version = '11.13.0'
|
|
19
|
+
_to_version = '11.14.0'
|
|
11
20
|
|
|
12
|
-
|
|
13
|
-
|
|
21
|
+
def __init__(self, *args, **kwargs):
|
|
22
|
+
super().__init__(*args, **kwargs)
|
|
23
|
+
|
|
24
|
+
self.schema_view = create_schema_view()
|
|
25
|
+
self.validator = Validator(
|
|
26
|
+
self.schema_view.schema,
|
|
27
|
+
# This is intentionally *only* using the NMDC plugin, because this migrator is *only*
|
|
28
|
+
# concerned with validating the QuantityValue-related constraints that the plugin
|
|
29
|
+
# implements.
|
|
30
|
+
validation_plugins=[
|
|
31
|
+
NmdcSchemaValidationPlugin()
|
|
32
|
+
]
|
|
33
|
+
)
|
|
14
34
|
|
|
15
35
|
def upgrade(self, commit_changes: bool = False) -> None:
|
|
16
|
-
r
|
|
36
|
+
r'''
|
|
17
37
|
Migrates the database from conforming to the original schema, to conforming to the new schema.
|
|
38
|
+
'''
|
|
39
|
+
|
|
40
|
+
# Get the schema classes which have slots with range QuantityValue
|
|
41
|
+
classes_with_qv_slots = get_classes_with_slots_by_range(self.schema_view, 'QuantityValue')
|
|
42
|
+
|
|
43
|
+
# Get the Database collection names that can hold these classes
|
|
44
|
+
eligible_collection_names = set()
|
|
45
|
+
for class_name in classes_with_qv_slots.keys():
|
|
46
|
+
collection_names = get_database_collections_for_class(self.schema_view, class_name)
|
|
47
|
+
eligible_collection_names.update(collection_names)
|
|
48
|
+
|
|
49
|
+
# Apply migrator through collections
|
|
50
|
+
self.logger.info("Checking QuantityValue units against UnitEnum and storage_units constraints")
|
|
51
|
+
for collection_name in eligible_collection_names:
|
|
52
|
+
self.logger.info(f" Checking collection '{collection_name}'")
|
|
53
|
+
self.adapter.do_for_each_document(collection_name, self.confirm_units_fit_unitenum_and_storage_units)
|
|
54
|
+
|
|
55
|
+
def confirm_units_fit_unitenum_and_storage_units(self, document: dict) -> None:
|
|
56
|
+
r'''
|
|
57
|
+
Raise an exception if the QuantityValue's has_unit slot is not valid against slot's storage_unit or UnitEnum constraints.
|
|
58
|
+
|
|
59
|
+
>>> m = Migrator()
|
|
60
|
+
|
|
61
|
+
# Test: valid QuantityValue with proper units in biosample
|
|
62
|
+
>>> valid_biosample = {
|
|
63
|
+
... "id": "test1",
|
|
64
|
+
... "type": "nmdc:Biosample",
|
|
65
|
+
... "bulk_elect_conductivity": {
|
|
66
|
+
... "type": "nmdc:QuantityValue",
|
|
67
|
+
... "has_unit": "mS/cm",
|
|
68
|
+
... "has_numeric_value": 25.0
|
|
69
|
+
... }
|
|
70
|
+
... }
|
|
71
|
+
>>> m.confirm_units_fit_unitenum_and_storage_units(valid_biosample) # Should not raise
|
|
72
|
+
|
|
73
|
+
# Test: unit not allowed for bulk_elect_conductivity's storage_units
|
|
74
|
+
>>> invalid_biosample_storage = {
|
|
75
|
+
... "id": "test2",
|
|
76
|
+
... "type": "nmdc:Biosample",
|
|
77
|
+
... "bulk_elect_conductivity": {
|
|
78
|
+
... "type": "nmdc:QuantityValue",
|
|
79
|
+
... "has_unit": "Cel",
|
|
80
|
+
... "has_numeric_value": 25.0
|
|
81
|
+
... }
|
|
82
|
+
... }
|
|
83
|
+
>>> m.confirm_units_fit_unitenum_and_storage_units(invalid_biosample_storage)
|
|
84
|
+
Traceback (most recent call last):
|
|
85
|
+
...
|
|
86
|
+
ValueError: In test2:
|
|
87
|
+
QuantityValue at /bulk_elect_conductivity has unit 'Cel' which is not allowed for slot 'bulk_elect_conductivity' (allowed: mS/cm)
|
|
88
|
+
|
|
89
|
+
# Test: unit not allowed for substances_volume's storage_units
|
|
90
|
+
>>> invalid_chem_storage = {
|
|
91
|
+
... "id": "test3",
|
|
92
|
+
... "type": "nmdc:ChemicalConversionProcess",
|
|
93
|
+
... "substances_volume": {
|
|
94
|
+
... "type": "nmdc:QuantityValue",
|
|
95
|
+
... "has_unit": "J/K", # Wrong unit type for substances_volume
|
|
96
|
+
... "has_numeric_value": 25.0
|
|
97
|
+
... }
|
|
98
|
+
... }
|
|
99
|
+
>>> m.confirm_units_fit_unitenum_and_storage_units(invalid_chem_storage)
|
|
100
|
+
Traceback (most recent call last):
|
|
101
|
+
...
|
|
102
|
+
ValueError: In test3:
|
|
103
|
+
QuantityValue at /substances_volume has unit 'J/K' which is not allowed for slot 'substances_volume' (allowed: mL)
|
|
104
|
+
'''
|
|
18
105
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
num_migrators = len(migrator_classes)
|
|
28
|
-
for idx, migrator_class in enumerate(migrator_classes):
|
|
29
|
-
self.logger.info(f"Running migrator {idx + 1} of {num_migrators}")
|
|
30
|
-
self.logger.debug(
|
|
31
|
-
f"Migrating from {migrator_class.get_origin_version()} "
|
|
32
|
-
f"to {migrator_class.get_destination_version()}"
|
|
33
|
-
)
|
|
34
|
-
migrator = migrator_class(adapter=self.adapter, logger=self.logger)
|
|
35
|
-
migrator.upgrade(commit_changes=commit_changes)
|
|
106
|
+
document_id = document.get('id', '<unknown id>')
|
|
107
|
+
document_type = document.get('type')
|
|
108
|
+
if not document_type:
|
|
109
|
+
raise ValueError(f"Unable to infer target_class for document with id '{document_id}'")
|
|
110
|
+
target_class = document_type.replace('nmdc:', '')
|
|
111
|
+
report = self.validator.validate(document, target_class)
|
|
112
|
+
if report.results:
|
|
113
|
+
raise ValueError(f"In {document_id}:\n" + "\n".join(" " + result.message for result in report.results))
|